Add support for gptneox/replit (#238)

token_berts
Ettore Di Giacinto 2 years ago committed by GitHub
parent fd1df4e971
commit 8250391e49
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      Makefile
  2. 4
      README.md
  3. 2
      api/api_test.go
  4. 48
      api/prediction.go
  5. 16
      pkg/model/initializers.go

@ -6,7 +6,7 @@ BINARY_NAME=local-ai
GOLLAMA_VERSION?=70593fccbe4b01dedaab805b0f25cb58192c7b38 GOLLAMA_VERSION?=70593fccbe4b01dedaab805b0f25cb58192c7b38
GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all
GPT4ALL_VERSION?=3657f9417e17edf378c27d0a9274a1bf41caa914 GPT4ALL_VERSION?=3657f9417e17edf378c27d0a9274a1bf41caa914
GOGPT2_VERSION?=6a10572a23328e18a62cfdb45e4a3c8ddbe75f25 GOGPT2_VERSION?=92421a8cf61ed6e03babd9067af292b094cb1307
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
WHISPER_CPP_VERSION?=bf2449dfae35a46b2cd92ab22661ce81a48d4993 WHISPER_CPP_VERSION?=bf2449dfae35a46b2cd92ab22661ce81a48d4993

@ -92,7 +92,7 @@ It should also be compatible with StableLM and GPTNeoX ggml models (untested).
Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `ggml` based backends. `rwkv` is less expensive on resources. Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `ggml` based backends. `rwkv` is less expensive on resources.
### Feature support matrix ### Model compatibility table
<details> <details>
@ -106,6 +106,8 @@ Depending on the model you are attempting to run might need more RAM or CPU reso
| dolly | Dolly | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp | | dolly | Dolly | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
| redpajama | RedPajama | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp | | redpajama | RedPajama | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
| stableLM | StableLM GPT/NeoX | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp | | stableLM | StableLM GPT/NeoX | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
| replit | Replit | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
| gptneox | GPT NeoX | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
| starcoder | Starcoder | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp | | starcoder | Starcoder | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
| bloomz | Bloom | yes | no | no | no | https://github.com/NouamaneTazi/bloomz.cpp | https://github.com/go-skynet/bloomz.cpp | | bloomz | Bloom | yes | no | no | no | https://github.com/NouamaneTazi/bloomz.cpp | https://github.com/go-skynet/bloomz.cpp |
| rwkv | RWKV | yes | no | no | yes | https://github.com/saharNooby/rwkv.cpp | https://github.com/donomii/go-rwkv.cpp | | rwkv | RWKV | yes | no | no | yes | https://github.com/saharNooby/rwkv.cpp | https://github.com/donomii/go-rwkv.cpp |

@ -80,7 +80,7 @@ var _ = Describe("API test", func() {
It("returns errors", func() { It("returns errors", func() {
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"}) _, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
Expect(err).To(HaveOccurred()) Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 10 errors occurred:")) Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 12 errors occurred:"))
}) })
PIt("transcribes audio", func() { PIt("transcribes audio", func() {
resp, err := client.CreateTranscription( resp, err := client.CreateTranscription(

@ -199,6 +199,54 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
return response, nil return response, nil
} }
case *gpt2.GPTNeoX:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *gpt2.Replit:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *gpt2.Starcoder: case *gpt2.Starcoder:
fn = func() (string, error) { fn = func() (string, error) {
// Generate the prediction using the language model // Generate the prediction using the language model

@ -24,6 +24,8 @@ const (
StableLMBackend = "stablelm" StableLMBackend = "stablelm"
DollyBackend = "dolly" DollyBackend = "dolly"
RedPajamaBackend = "redpajama" RedPajamaBackend = "redpajama"
GPTNeoXBackend = "gptneox"
ReplitBackend = "replit"
Gpt2Backend = "gpt2" Gpt2Backend = "gpt2"
Gpt4AllLlamaBackend = "gpt4all-llama" Gpt4AllLlamaBackend = "gpt4all-llama"
Gpt4AllMptBackend = "gpt4all-mpt" Gpt4AllMptBackend = "gpt4all-mpt"
@ -45,6 +47,8 @@ var backends []string = []string{
StableLMBackend, StableLMBackend,
DollyBackend, DollyBackend,
RedPajamaBackend, RedPajamaBackend,
GPTNeoXBackend,
ReplitBackend,
BertEmbeddingsBackend, BertEmbeddingsBackend,
StarcoderBackend, StarcoderBackend,
} }
@ -61,6 +65,14 @@ var dolly = func(modelFile string) (interface{}, error) {
return gpt2.NewDolly(modelFile) return gpt2.NewDolly(modelFile)
} }
var gptNeoX = func(modelFile string) (interface{}, error) {
return gpt2.NewGPTNeoX(modelFile)
}
var replit = func(modelFile string) (interface{}, error) {
return gpt2.NewReplit(modelFile)
}
var stableLM = func(modelFile string) (interface{}, error) { var stableLM = func(modelFile string) (interface{}, error) {
return gpt2.NewStableLM(modelFile) return gpt2.NewStableLM(modelFile)
} }
@ -116,6 +128,10 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla
return ml.LoadModel(modelFile, redPajama) return ml.LoadModel(modelFile, redPajama)
case Gpt2Backend: case Gpt2Backend:
return ml.LoadModel(modelFile, gpt2LM) return ml.LoadModel(modelFile, gpt2LM)
case GPTNeoXBackend:
return ml.LoadModel(modelFile, gptNeoX)
case ReplitBackend:
return ml.LoadModel(modelFile, replit)
case StarcoderBackend: case StarcoderBackend:
return ml.LoadModel(modelFile, starCoder) return ml.LoadModel(modelFile, starCoder)
case Gpt4AllLlamaBackend: case Gpt4AllLlamaBackend:

Loading…
Cancel
Save