diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml index 6aa7aa4..a10d2af 100644 --- a/.github/workflows/bump_deps.yaml +++ b/.github/workflows/bump_deps.yaml @@ -30,6 +30,9 @@ jobs: - repository: "go-skynet/bloomz.cpp" variable: "BLOOMZ_VERSION" branch: "main" + - repository: "go-skynet/gpt4all" + variable: "GPT4ALL_VERSION" + branch: "main" runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 diff --git a/Makefile b/Makefile index 768eeea..1ed894c 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,8 @@ GOVET=$(GOCMD) vet BINARY_NAME=local-ai GOLLAMA_VERSION?=c03e8adbc45c866e0f6d876af1887d6b01d57eb4 -GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109 +GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all +GPT4ALL_VERSION?=3657f9417e17edf378c27d0a9274a1bf41caa914 GOGPT2_VERSION?=abf038a7d8efa4eefdc7c891f05ad33d4e59e49d RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47 @@ -19,8 +20,8 @@ WHITE := $(shell tput -Txterm setaf 7) CYAN := $(shell tput -Txterm setaf 6) RESET := $(shell tput -Txterm sgr0) -C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz -LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz +C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz +LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz # Use this if you want to set the default behavior ifndef BUILD_TYPE @@ -37,19 +38,26 @@ endif all: help -## GPT4ALL-J -go-gpt4all-j: - git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j - cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION) && git submodule update --init --recursive --depth 1 +## GPT4ALL +gpt4all: + git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all + cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1 # This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml.. - @find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} + - @find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} + - @find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} + - @find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} + - @find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} + - @find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} + - @find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} + - @find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} + + @find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} + + @find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} + + @find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} + + @find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} + + @find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} + + @find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} + + @find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} + + @find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} + + @find ./gpt4all -type f -name "*.go" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} + + @find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} + + @find ./gpt4all -type f -name "*.txt" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} + + @find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} + + @find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} + + @find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} + + mv ./gpt4all/gpt4all-backend/llama.cpp/llama_util.h ./gpt4all/gpt4all-backend/llama.cpp/gptjllama_util.h ## BERT embeddings go-bert: @@ -85,8 +93,8 @@ bloomz/libbloomz.a: bloomz go-bert/libgobert.a: go-bert $(MAKE) -C go-bert libgobert.a -go-gpt4all-j/libgptj.a: go-gpt4all-j - $(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a +gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all + $(MAKE) -C gpt4all/gpt4all-bindings/golang/ $(GENERIC_PREFIX)libgpt4all.a ## CEREBRAS GPT go-gpt2: @@ -119,20 +127,20 @@ go-llama/libbinding.a: go-llama replace: $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama - $(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j + $(GOCMD) mod edit -replace github.com/nomic/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang $(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2 $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp $(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert $(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz -prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv whisper.cpp go-bert bloomz +prepare-sources: go-llama go-gpt2 gpt4all go-rwkv whisper.cpp go-bert bloomz replace $(GOCMD) mod download ## GENERIC rebuild: ## Rebuilds the project $(MAKE) -C go-llama clean - $(MAKE) -C go-gpt4all-j clean + $(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean $(MAKE) -C go-gpt2 clean $(MAKE) -C go-rwkv clean $(MAKE) -C whisper.cpp clean @@ -140,11 +148,11 @@ rebuild: ## Rebuilds the project $(MAKE) -C bloomz clean $(MAKE) build -prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a replace ## Prepares for building +prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a go-llama/libbinding.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building clean: ## Remove build related file rm -fr ./go-llama - rm -rf ./go-gpt4all-j + rm -rf ./gpt4all rm -rf ./go-gpt2 rm -rf ./go-rwkv rm -rf ./go-bert @@ -156,7 +164,7 @@ clean: ## Remove build related file build: prepare ## Build the project $(info ${GREEN}I local-ai build info:${RESET}) $(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET}) - C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./ + C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -x -o $(BINARY_NAME) ./ generic-build: ## Build the project using generic BUILD_TYPE="generic" $(MAKE) build diff --git a/api/api_test.go b/api/api_test.go index 639f18d..de9fc34 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -79,7 +79,7 @@ var _ = Describe("API test", func() { It("returns errors", func() { _, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"}) Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 9 errors occurred:")) + Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 10 errors occurred:")) }) }) diff --git a/api/prediction.go b/api/prediction.go index b705f66..8c381c9 100644 --- a/api/prediction.go +++ b/api/prediction.go @@ -11,8 +11,8 @@ import ( "github.com/go-skynet/bloomz.cpp" bert "github.com/go-skynet/go-bert.cpp" gpt2 "github.com/go-skynet/go-gpt2.cpp" - gptj "github.com/go-skynet/go-gpt4all-j.cpp" llama "github.com/go-skynet/go-llama.cpp" + gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang" ) // mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784 @@ -315,29 +315,35 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback predictOptions..., ) } - case *gptj.GPTJ: + case *gpt4all.Model: + supportStreams = true + fn = func() (string, error) { - // Generate the prediction using the language model - predictOptions := []gptj.PredictOption{ - gptj.SetTemperature(c.Temperature), - gptj.SetTopP(c.TopP), - gptj.SetTopK(c.TopK), - gptj.SetTokens(c.Maxtokens), - gptj.SetThreads(c.Threads), + if tokenCallback != nil { + model.SetTokenCallback(tokenCallback) } - if c.Batch != 0 { - predictOptions = append(predictOptions, gptj.SetBatch(c.Batch)) + // Generate the prediction using the language model + predictOptions := []gpt4all.PredictOption{ + gpt4all.SetTemperature(c.Temperature), + gpt4all.SetTopP(c.TopP), + gpt4all.SetTopK(c.TopK), + gpt4all.SetTokens(c.Maxtokens), } - if c.Seed != 0 { - predictOptions = append(predictOptions, gptj.SetSeed(c.Seed)) + if c.Batch != 0 { + predictOptions = append(predictOptions, gpt4all.SetBatch(c.Batch)) } - return model.Predict( + str, er := model.Predict( s, predictOptions..., ) + // Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels) + // For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}} + // after a stream event has occurred + model.SetTokenCallback(nil) + return str, er } case *llama.LLama: supportStreams = true diff --git a/go.mod b/go.mod index 3152709..7edbf72 100644 --- a/go.mod +++ b/go.mod @@ -49,6 +49,7 @@ require ( github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.18 // indirect github.com/mattn/go-runewidth v0.0.14 // indirect + github.com/nomic/gpt4all/gpt4all-bindings/golang v0.0.0-00010101000000-000000000000 // indirect github.com/philhofer/fwd v1.1.2 // indirect github.com/rivo/uniseg v0.2.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect diff --git a/pkg/model/loader.go b/pkg/model/loader.go index 2542248..fe82b86 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -15,9 +15,9 @@ import ( bloomz "github.com/go-skynet/bloomz.cpp" bert "github.com/go-skynet/go-bert.cpp" gpt2 "github.com/go-skynet/go-gpt2.cpp" - gptj "github.com/go-skynet/go-gpt4all-j.cpp" llama "github.com/go-skynet/go-llama.cpp" "github.com/hashicorp/go-multierror" + gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang" "github.com/rs/zerolog/log" ) @@ -26,7 +26,7 @@ type ModelLoader struct { mu sync.Mutex // TODO: this needs generics models map[string]*llama.LLama - gptmodels map[string]*gptj.GPTJ + gptmodels map[string]*gpt4all.Model gpt2models map[string]*gpt2.GPT2 gptstablelmmodels map[string]*gpt2.StableLM dollymodels map[string]*gpt2.Dolly @@ -42,7 +42,7 @@ func NewModelLoader(modelPath string) *ModelLoader { return &ModelLoader{ ModelPath: modelPath, gpt2models: make(map[string]*gpt2.GPT2), - gptmodels: make(map[string]*gptj.GPTJ), + gptmodels: make(map[string]*gpt4all.Model), gptstablelmmodels: make(map[string]*gpt2.StableLM), dollymodels: make(map[string]*gpt2.Dolly), redpajama: make(map[string]*gpt2.RedPajama), @@ -328,7 +328,7 @@ func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) { return model, err } -func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) { +func (ml *ModelLoader) LoadGPT4AllModel(modelName string, opts ...gpt4all.ModelOption) (*gpt4all.Model, error) { ml.mu.Lock() defer ml.mu.Unlock() @@ -346,7 +346,7 @@ func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) { modelFile := filepath.Join(ml.ModelPath, modelName) log.Debug().Msgf("Loading model in memory from file: %s", modelFile) - model, err := gptj.New(modelFile) + model, err := gpt4all.New(modelFile, opts...) if err != nil { return nil, err } @@ -470,8 +470,12 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla return ml.LoadRedPajama(modelFile) case "gpt2": return ml.LoadGPT2Model(modelFile) - case "gptj": - return ml.LoadGPTJModel(modelFile) + case "gpt4all-llama": + return ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType)) + case "gpt4all-mpt": + return ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType)) + case "gpt4all-j": + return ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType)) case "bert-embeddings": return ml.LoadBERT(modelFile) case "rwkv": @@ -514,7 +518,7 @@ func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOpt err = multierror.Append(err, modelerr) } - model, modelerr = ml.LoadGPTJModel(modelFile) + model, modelerr = ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType)) if modelerr == nil { updateModels(model) return model, nil @@ -522,7 +526,7 @@ func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOpt err = multierror.Append(err, modelerr) } - model, modelerr = ml.LoadGPT2Model(modelFile) + model, modelerr = ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType)) if modelerr == nil { updateModels(model) return model, nil @@ -530,7 +534,7 @@ func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOpt err = multierror.Append(err, modelerr) } - model, modelerr = ml.LoadStableLMModel(modelFile) + model, modelerr = ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType)) if modelerr == nil { updateModels(model) return model, nil @@ -538,7 +542,7 @@ func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOpt err = multierror.Append(err, modelerr) } - model, modelerr = ml.LoadDollyModel(modelFile) + model, modelerr = ml.LoadGPT2Model(modelFile) if modelerr == nil { updateModels(model) return model, nil @@ -546,7 +550,7 @@ func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOpt err = multierror.Append(err, modelerr) } - model, modelerr = ml.LoadRedPajama(modelFile) + model, modelerr = ml.LoadStableLMModel(modelFile) if modelerr == nil { updateModels(model) return model, nil @@ -554,13 +558,29 @@ func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOpt err = multierror.Append(err, modelerr) } - model, modelerr = ml.LoadBloomz(modelFile) + model, modelerr = ml.LoadDollyModel(modelFile) + if modelerr == nil { + updateModels(model) + return model, nil + } else { + err = multierror.Append(err, modelerr) + } + + model, modelerr = ml.LoadRedPajama(modelFile) if modelerr == nil { updateModels(model) return model, nil } else { err = multierror.Append(err, modelerr) } + // Do not autoload bloomz + //model, modelerr = ml.LoadBloomz(modelFile) + //if modelerr == nil { + // updateModels(model) + // return model, nil + //} else { + // err = multierror.Append(err, modelerr) + //} model, modelerr = ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads) if modelerr == nil {