diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml index 51fd12c..b464b0d 100644 --- a/.github/workflows/bump_deps.yaml +++ b/.github/workflows/bump_deps.yaml @@ -13,7 +13,7 @@ jobs: variable: "GOLLAMA_VERSION" branch: "master" - repository: "go-skynet/go-llama.cpp" - variable: "GOLLAMA_MASTER_VERSION" + variable: "GOLLAMA_GRAMMAR_VERSION" branch: "master" - repository: "go-skynet/go-ggml-transformers.cpp" variable: "GOGGMLTRANSFORMERS_VERSION" diff --git a/Makefile b/Makefile index 1e07437..5813ba2 100644 --- a/Makefile +++ b/Makefile @@ -5,16 +5,16 @@ BINARY_NAME=local-ai # llama.cpp versions # Temporarly pinned to https://github.com/go-skynet/go-llama.cpp/pull/124 -GOLLAMA_VERSION?=cb8d7cd4cb95725a04504a9e3a26dd72a12b69ac +GOLLAMA_VERSION?=c90272fdb693fc8d6faf20e1e9a5481c453318e8 -GOLLAMA_MASTER_VERSION?=c90272fdb693fc8d6faf20e1e9a5481c453318e8 +GOLLAMA_GRAMMAR_VERSION?=cb8d7cd4cb95725a04504a9e3a26dd72a12b69ac # Temporary set a specific version of llama.cpp # containing: https://github.com/ggerganov/llama.cpp/pull/1773 and # rebased on top of master. # This pin can be dropped when the PR above is merged, and go-llama has merged changes as well # Set empty to use the version pinned by go-llama -LLAMA_CPP_REPO?=https://github.com/mudler/llama.cpp -LLAMA_CPP_VERSION?=48ce8722a05a018681634af801fd0fd45b3a87cc +LLAMA_CPP_GRAMMAR_REPO?=https://github.com/mudler/llama.cpp +LLAMA_CPP_GRAMMAR_VERSION?=48ce8722a05a018681634af801fd0fd45b3a87cc # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all @@ -201,29 +201,29 @@ whisper.cpp/libwhisper.a: whisper.cpp go-llama: git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1 -ifneq ($(LLAMA_CPP_REPO),) - cd go-llama && rm -rf llama.cpp && git clone $(LLAMA_CPP_REPO) llama.cpp && cd llama.cpp && git checkout -b build $(LLAMA_CPP_VERSION) && git submodule update --init --recursive --depth 1 -endif -go-llama-master: - git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama-master - cd go-llama-master && git checkout -b build $(GOLLAMA_MASTER_VERSION) && git submodule update --init --recursive --depth 1 +go-llama-grammar: + git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama-grammar + cd go-llama-grammar && git checkout -b build $(GOLLAMA_GRAMMAR_VERSION) && git submodule update --init --recursive --depth 1 +ifneq ($(LLAMA_CPP_GRAMMAR_REPO),) + cd go-llama-grammar && rm -rf llama.cpp && git clone $(LLAMA_CPP_GRAMMAR_REPO) llama.cpp && cd llama.cpp && git checkout -b build $(LLAMA_CPP_GRAMMAR_VERSION) && git submodule update --init --recursive --depth 1 +endif go-llama/libbinding.a: go-llama $(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a -go-llama-master/libbinding.a: go-llama-master - $(MAKE) -C go-llama-master BUILD_TYPE=$(BUILD_TYPE) libbinding.a +go-llama-grammar/libbinding.a: go-llama-grammar + $(MAKE) -C go-llama-grammar BUILD_TYPE=$(BUILD_TYPE) libbinding.a go-piper/libpiper_binding.a: $(MAKE) -C go-piper libpiper_binding.a example/main -get-sources: go-llama go-ggllm go-llama-master go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion +get-sources: go-llama go-ggllm go-llama-grammar go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion touch $@ replace: $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama - $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp-master=$(shell pwd)/go-llama-master + $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp-grammar=$(shell pwd)/go-llama-grammar $(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang $(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(shell pwd)/go-ggml-transformers $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv @@ -241,7 +241,7 @@ prepare-sources: get-sources replace rebuild: ## Rebuilds the project $(GOCMD) clean -cache $(MAKE) -C go-llama clean - $(MAKE) -C go-llama-master clean + $(MAKE) -C go-llama-grammar clean $(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean $(MAKE) -C go-ggml-transformers clean $(MAKE) -C go-rwkv clean @@ -371,9 +371,9 @@ backend-assets/grpc/llama: backend-assets/grpc go-llama/libbinding.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./cmd/grpc/llama/ -backend-assets/grpc/llama-master: backend-assets/grpc go-llama-master/libbinding.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama-master LIBRARY_PATH=$(shell pwd)/go-llama-master \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-master ./cmd/grpc/llama-master/ +backend-assets/grpc/llama-grammar: backend-assets/grpc go-llama-grammar/libbinding.a + CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama-grammar LIBRARY_PATH=$(shell pwd)/go-llama-grammar \ + $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-grammar ./cmd/grpc/llama-grammar/ backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all gpt4all/gpt4all-bindings/golang/libgpt4all.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ \ @@ -438,4 +438,4 @@ backend-assets/grpc/whisper: backend-assets/grpc whisper.cpp/libwhisper.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/whisper.cpp LIBRARY_PATH=$(shell pwd)/whisper.cpp \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./cmd/grpc/whisper/ -grpcs: prepare backend-assets/grpc/langchain-huggingface backend-assets/grpc/llama-master backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC) \ No newline at end of file +grpcs: prepare backend-assets/grpc/langchain-huggingface backend-assets/grpc/llama-grammar backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC) \ No newline at end of file diff --git a/api/api_test.go b/api/api_test.go index 06e978b..6970a8f 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -291,7 +291,7 @@ var _ = Describe("API test", func() { response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ URL: "github:go-skynet/model-gallery/openllama_3b.yaml", Name: "openllama_3b", - Overrides: map[string]string{}, + Overrides: map[string]string{"backend": "llama-grammar"}, }) Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) diff --git a/cmd/grpc/llama-master/main.go b/cmd/grpc/llama-grammar/main.go similarity index 85% rename from cmd/grpc/llama-master/main.go rename to cmd/grpc/llama-grammar/main.go index 51cd00f..4653eb2 100644 --- a/cmd/grpc/llama-master/main.go +++ b/cmd/grpc/llama-grammar/main.go @@ -7,7 +7,7 @@ package main import ( "flag" - llama "github.com/go-skynet/LocalAI/pkg/grpc/llm/llama-master" + llama "github.com/go-skynet/LocalAI/pkg/grpc/llm/llama-grammar" grpc "github.com/go-skynet/LocalAI/pkg/grpc" ) diff --git a/go.mod b/go.mod index f56ce00..d0e5194 100644 --- a/go.mod +++ b/go.mod @@ -39,7 +39,7 @@ require ( require ( github.com/dlclark/regexp2 v1.8.1 // indirect github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect - github.com/go-skynet/go-llama.cpp-master v0.0.0-20230703203849-ffa57fbc3a12 // indirect + github.com/go-skynet/go-llama.cpp-grammar v0.0.0-20230703203849-ffa57fbc3a12 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/golang/snappy v0.0.2 // indirect github.com/klauspost/pgzip v1.2.5 // indirect diff --git a/pkg/grpc/llm/llama-master/llama.go b/pkg/grpc/llm/llama-grammar/llama.go similarity index 97% rename from pkg/grpc/llm/llama-master/llama.go rename to pkg/grpc/llm/llama-grammar/llama.go index 43ae25f..515a24c 100644 --- a/pkg/grpc/llm/llama-master/llama.go +++ b/pkg/grpc/llm/llama-grammar/llama.go @@ -7,7 +7,7 @@ import ( "github.com/go-skynet/LocalAI/pkg/grpc/base" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - "github.com/go-skynet/go-llama.cpp-master" + "github.com/go-skynet/go-llama.cpp-grammar" ) type LLM struct { @@ -71,6 +71,8 @@ func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption { predictOptions = append(predictOptions, llama.EnablePromptCacheRO) } + predictOptions = append(predictOptions, llama.WithGrammar(opts.Grammar)) + // Expected absolute path if opts.PromptCachePath != "" { predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath)) diff --git a/pkg/grpc/llm/llama/llama.go b/pkg/grpc/llm/llama/llama.go index 82063b7..421eb0c 100644 --- a/pkg/grpc/llm/llama/llama.go +++ b/pkg/grpc/llm/llama/llama.go @@ -71,8 +71,6 @@ func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption { predictOptions = append(predictOptions, llama.EnablePromptCacheRO) } - predictOptions = append(predictOptions, llama.WithGrammar(opts.Grammar)) - // Expected absolute path if opts.PromptCachePath != "" { predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath)) diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 9d33a6e..53fc684 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -37,7 +37,7 @@ const ( Gpt4All = "gpt4all" FalconBackend = "falcon" FalconGGMLBackend = "falcon-ggml" - LlamaMasterBackend = "llama-master" + LlamaGrammarBackend = "llama-grammar" BertEmbeddingsBackend = "bert-embeddings" RwkvBackend = "rwkv" @@ -54,7 +54,7 @@ var AutoLoadBackends []string = []string{ FalconBackend, GPTNeoXBackend, BertEmbeddingsBackend, - LlamaMasterBackend, + LlamaGrammarBackend, FalconGGMLBackend, GPTJBackend, Gpt2Backend, @@ -185,7 +185,7 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err er backend := strings.ToLower(o.backendString) switch backend { - case LlamaBackend, LlamaMasterBackend, GPTJBackend, DollyBackend, + case LlamaBackend, LlamaGrammarBackend, GPTJBackend, DollyBackend, MPTBackend, Gpt2Backend, FalconBackend, GPTNeoXBackend, ReplitBackend, StarcoderBackend, BloomzBackend, RwkvBackend, LCHuggingFaceBackend, BertEmbeddingsBackend, FalconGGMLBackend, StableDiffusionBackend, WhisperBackend: