diff --git a/Makefile b/Makefile index 527e3ab..16d2d60 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ BINARY_NAME=local-ai GOLLAMA_VERSION?=c03e8adbc45c866e0f6d876af1887d6b01d57eb4 GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all GPT4ALL_VERSION?=3657f9417e17edf378c27d0a9274a1bf41caa914 -GOGPT2_VERSION?=abf038a7d8efa4eefdc7c891f05ad33d4e59e49d +GOGPT2_VERSION?=6a10572 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47 WHISPER_CPP_VERSION?=bf2449dfae35a46b2cd92ab22661ce81a48d4993 diff --git a/README.md b/README.md index a46b6dd..04f43d3 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,28 @@ It should also be compatible with StableLM and GPTNeoX ggml models (untested). Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `ggml` based backends. `rwkv` is less expensive on resources. +### Feature support matrix + +
+ +| Backend | Compatible models | Completion/Chat endpoint | Audio transcription | Embeddings support | Token stream support | Github | Bindings | +|-----------------|-----------------------|--------------------------|---------------------|-----------------------------------|----------------------|--------------------------------------------|-------------------------------------------| +| llama | Vicuna, Alpaca, LLaMa | yes | no | yes (doesn't seem to be accurate) | yes | https://github.com/ggerganov/llama.cpp | https://github.com/go-skynet/go-llama.cpp | +| gpt4all-llama | Vicuna, Alpaca, LLaMa | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all | +| gpt4all-mpt | MPT | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all | +| gpt4all-j | GPT4ALL-J | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all | +| gpt2 | GPT/NeoX, Cerebras | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp | +| dolly | Dolly | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp | +| redpajama | RedPajama | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp | +| stableLM | StableLM GPT/NeoX | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp | +| starcoder | Starcoder | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp | +| bloomz | Bloom | yes | no | no | no | https://github.com/NouamaneTazi/bloomz.cpp | https://github.com/go-skynet/bloomz.cpp | +| rwkv | RWKV | yes | no | no | yes | https://github.com/saharNooby/rwkv.cpp | https://github.com/donomii/go-rwkv.cpp | +| bert-embeddings | bert | no | no | yes | no | https://github.com/skeskinen/bert.cpp | https://github.com/go-skynet/go-bert.cpp | +| whisper | whisper | no | yes | no | no | https://github.com/ggerganov/whisper.cpp | https://github.com/ggerganov/whisper.cpp | + +
+ ## Usage > `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest). diff --git a/api/api_test.go b/api/api_test.go index 639f18d..de9fc34 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -79,7 +79,7 @@ var _ = Describe("API test", func() { It("returns errors", func() { _, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"}) Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 9 errors occurred:")) + Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 10 errors occurred:")) }) }) diff --git a/api/prediction.go b/api/prediction.go index 8c381c9..b128e7e 100644 --- a/api/prediction.go +++ b/api/prediction.go @@ -199,6 +199,30 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback return response, nil } + case *gpt2.Starcoder: + fn = func() (string, error) { + // Generate the prediction using the language model + predictOptions := []gpt2.PredictOption{ + gpt2.SetTemperature(c.Temperature), + gpt2.SetTopP(c.TopP), + gpt2.SetTopK(c.TopK), + gpt2.SetTokens(c.Maxtokens), + gpt2.SetThreads(c.Threads), + } + + if c.Batch != 0 { + predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch)) + } + + if c.Seed != 0 { + predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed)) + } + + return model.Predict( + s, + predictOptions..., + ) + } case *gpt2.RedPajama: fn = func() (string, error) { // Generate the prediction using the language model diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 9e48ae4..686dc38 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -20,6 +20,7 @@ const tokenizerSuffix = ".tokenizer.json" const ( LlamaBackend = "llama" BloomzBackend = "bloomz" + StarcoderBackend = "starcoder" StableLMBackend = "stablelm" DollyBackend = "dolly" RedPajamaBackend = "redpajama" @@ -45,6 +46,11 @@ var backends []string = []string{ DollyBackend, RedPajamaBackend, BertEmbeddingsBackend, + StarcoderBackend, +} + +var starCoder = func(modelFile string) (interface{}, error) { + return gpt2.NewStarcoder(modelFile) } var redPajama = func(modelFile string) (interface{}, error) { @@ -110,6 +116,8 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla return ml.LoadModel(modelFile, redPajama) case Gpt2Backend: return ml.LoadModel(modelFile, gpt2LM) + case StarcoderBackend: + return ml.LoadModel(modelFile, starCoder) case Gpt4AllLlamaBackend: return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType))) case Gpt4AllMptBackend: