From f8ee20991c832c8eeea0e7cd244b37b9d5692d07 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 10 May 2023 15:20:21 +0200 Subject: [PATCH] feat: add bert.cpp embeddings (#222) --- .github/workflows/bump_deps.yaml | 3 ++ Makefile | 25 ++++++++-- README.md | 13 +++-- api/api_test.go | 2 +- api/prediction.go | 9 ++++ examples/query_data/README.md | 9 ++-- examples/query_data/models/embeddings.yaml | 16 +----- examples/query_data/models/gpt-3.5-turbo.yaml | 5 +- examples/query_data/models/wizardlm.tmpl | 3 -- examples/query_data/query.py | 2 +- examples/query_data/store.py | 6 +-- go.mod | 1 + go.sum | 13 +---- pkg/model/loader.go | 50 ++++++++++++++++++- 14 files changed, 104 insertions(+), 53 deletions(-) delete mode 100644 examples/query_data/models/wizardlm.tmpl diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml index aaa5671..c889fab 100644 --- a/.github/workflows/bump_deps.yaml +++ b/.github/workflows/bump_deps.yaml @@ -24,6 +24,9 @@ jobs: - repository: "ggerganov/whisper.cpp" variable: "WHISPER_CPP_VERSION" branch: "master" + - repository: "go-skynet/go-bert.cpp" + variable: "BERT_VERSION" + branch: "master" runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 diff --git a/Makefile b/Makefile index bffb161..03ece68 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,7 @@ GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=af62fcc432be2847acb6e0688b2c2491d6588d58 WHISPER_CPP_VERSION?=bf2449dfae35a46b2cd92ab22661ce81a48d4993 +BERT_VERSION?=ec771ec715576ac050263bb7bb74bfd616a5ba13 GREEN := $(shell tput -Txterm setaf 2) @@ -17,8 +18,8 @@ WHITE := $(shell tput -Txterm setaf 7) CYAN := $(shell tput -Txterm setaf 6) RESET := $(shell tput -Txterm sgr0) -C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp -LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp +C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert +LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert # Use this if you want to set the default behavior ifndef BUILD_TYPE @@ -49,6 +50,14 @@ go-gpt4all-j: @find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} + @find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} + +## BERT embeddings +go-bert: + git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp go-bert + cd go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1 + @find ./go-bert -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} + + @find ./go-bert -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} + + @find ./go-bert -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} + + ## RWKV go-rwkv: git clone --recurse-submodules $(RWKV_REPO) go-rwkv @@ -60,6 +69,9 @@ go-rwkv: go-rwkv/librwkv.a: go-rwkv cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. && cp ggml/src/libggml.a .. +go-bert/libgobert.a: go-bert + $(MAKE) -C go-bert libgobert.a + go-gpt4all-j/libgptj.a: go-gpt4all-j $(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a @@ -98,8 +110,9 @@ replace: $(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2 $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp + $(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert -prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv whisper.cpp +prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv whisper.cpp go-bert $(GOCMD) mod download ## GENERIC @@ -109,15 +122,17 @@ rebuild: ## Rebuilds the project $(MAKE) -C go-gpt2 clean $(MAKE) -C go-rwkv clean $(MAKE) -C whisper.cpp clean + $(MAKE) -C go-bert clean $(MAKE) build -prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a replace ## Prepares for building +prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a replace ## Prepares for building clean: ## Remove build related file rm -fr ./go-llama rm -rf ./go-gpt4all-j rm -rf ./go-gpt2 rm -rf ./go-rwkv + rm -rf ./go-bert rm -rf $(BINARY_NAME) ## Build: @@ -141,7 +156,7 @@ test-models/testmodel: test: prepare test-models/testmodel cp tests/fixtures/* test-models - @C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./... + @C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./api ## Help: help: ## Show this help. diff --git a/README.md b/README.md index 87eb2f1..c77d789 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ [![](https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted)](https://discord.gg/uJAeKSAGDy) -**LocalAI** is a drop-in replacement REST API compatible with OpenAI for local CPU inferencing. It allows to run models locally or on-prem with consumer grade hardware. It is based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all), [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is licensed under Apache 2.0. +**LocalAI** is a drop-in replacement REST API compatible with OpenAI for local CPU inferencing. It allows to run models locally or on-prem with consumer grade hardware, supporting multiple models families. Supports also GPT4ALL-J which is licensed under Apache 2.0. - OpenAI compatible API - Supports multiple models @@ -19,10 +19,14 @@ LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud). +LocalAI uses C++ bindings for optimizing speed. It is based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all), [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp), [ggml](https://github.com/ggerganov/ggml), [whisper.cpp](https://github.com/ggerganov/whisper.cpp) for audio transcriptions, and [bert.cpp](https://github.com/skeskinen/bert.cpp) for embedding. + See [examples on how to integrate LocalAI](https://github.com/go-skynet/LocalAI/tree/master/examples/). ## News +- 10-05-2023: Added support for fast and accurate embeddings with `bert.cpp` ( https://github.com/go-skynet/LocalAI/pull/222 ) +- 09-05-2023: Added experimental support for transcriptions endpoint ( https://github.com/go-skynet/LocalAI/pull/211 ) - 08-05-2023: Support for embeddings with models using the `llama.cpp` backend ( https://github.com/go-skynet/LocalAI/pull/207 ) - 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint - 01-05-2023: Support for SSE stream of tokens in `llama.cpp` backends ( https://github.com/go-skynet/LocalAI/pull/152 ) @@ -534,18 +538,18 @@ curl http://localhost:8080/v1/models
-The embedding endpoint is experimental and enabled only if the model is configured with `emebddings: true` in its `yaml` file, for example: +The embedding endpoint is experimental and enabled only if the model is configured with `embeddings: true` in its `yaml` file, for example: ```yaml name: text-embedding-ada-002 parameters: - model: wizardLM-7B.ggml.q5_1.bin + model: bert embeddings: true ``` There is an example available [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/). -Note: embeddings is supported only with `llama.cpp` compatible models. (doesn't work with gpt4-all-j, yet). +Note: embeddings is supported only with `llama.cpp` compatible models and `bert` models. bert is more performant and available independently of the LLM model.
@@ -667,6 +671,7 @@ MIT - [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) - [go-skynet/go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp) - [go-skynet/go-gpt2.cpp](https://github.com/go-skynet/go-gpt2.cpp) +- [go-skynet/go-bert.cpp](https://github.com/go-skynet/go-bert.cpp) - [donomii/go-rwkv.cpp](https://github.com/donomii/go-rwkv.cpp) ## Acknowledgements diff --git a/api/api_test.go b/api/api_test.go index 9682a21..0a58191 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -79,7 +79,7 @@ var _ = Describe("API test", func() { It("returns errors", func() { _, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"}) Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 5 errors occurred:")) + Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 6 errors occurred:")) }) }) diff --git a/api/prediction.go b/api/prediction.go index 95d111f..f6c6192 100644 --- a/api/prediction.go +++ b/api/prediction.go @@ -8,6 +8,7 @@ import ( "github.com/donomii/go-rwkv.cpp" model "github.com/go-skynet/LocalAI/pkg/model" + bert "github.com/go-skynet/go-bert.cpp" gpt2 "github.com/go-skynet/go-gpt2.cpp" gptj "github.com/go-skynet/go-gpt4all-j.cpp" llama "github.com/go-skynet/go-llama.cpp" @@ -62,6 +63,14 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config) } return model.Embeddings(s, predictOptions...) } + // bert embeddings + case *bert.Bert: + fn = func() ([]float32, error) { + if len(tokens) > 0 { + return nil, fmt.Errorf("embeddings endpoint for this model supports only string") + } + return model.Embeddings(s, bert.SetThreads(c.Threads)) + } default: fn = func() ([]float32, error) { return nil, fmt.Errorf("embeddings not supported by the backend") diff --git a/examples/query_data/README.md b/examples/query_data/README.md index 9185709..f7a4e1f 100644 --- a/examples/query_data/README.md +++ b/examples/query_data/README.md @@ -12,11 +12,7 @@ Summary of the steps: ## Requirements -For this in order to work, you will need LocalAI and a model compatible with the `llama.cpp` backend. This is will not work with gpt4all, however you can mix models (use a llama.cpp one to build the index database, and gpt4all to query it). - -The example uses `WizardLM` for both embeddings and Q&A. Edit the config files in `models/` accordingly to specify the model you use (change `HERE` in the configuration files). - -You will also need a training data set. Copy that over `data`. +You will need a training data set. Copy that over `data`. ## Setup @@ -28,7 +24,8 @@ git clone https://github.com/go-skynet/LocalAI cd LocalAI/examples/query_data -# Copy your models, edit config files accordingly +wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert +wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j # start with docker-compose docker-compose up -d --build diff --git a/examples/query_data/models/embeddings.yaml b/examples/query_data/models/embeddings.yaml index 2173975..b90ca75 100644 --- a/examples/query_data/models/embeddings.yaml +++ b/examples/query_data/models/embeddings.yaml @@ -1,18 +1,6 @@ name: text-embedding-ada-002 parameters: - model: HERE - top_k: 80 - temperature: 0.2 - top_p: 0.7 -context_size: 1024 + model: bert threads: 14 -stopwords: -- "HUMAN:" -- "GPT:" -roles: - user: " " - system: " " +backend: bert-embeddings embeddings: true -template: - completion: completion - chat: gpt4all diff --git a/examples/query_data/models/gpt-3.5-turbo.yaml b/examples/query_data/models/gpt-3.5-turbo.yaml index 9cdb4a2..6df1dbf 100644 --- a/examples/query_data/models/gpt-3.5-turbo.yaml +++ b/examples/query_data/models/gpt-3.5-turbo.yaml @@ -1,12 +1,11 @@ name: gpt-3.5-turbo parameters: - model: HERE + model: ggml-gpt4all-j top_k: 80 temperature: 0.2 top_p: 0.7 context_size: 1024 threads: 14 -embeddings: true stopwords: - "HUMAN:" - "GPT:" @@ -15,4 +14,4 @@ roles: system: " " template: completion: completion - chat: wizardlm + chat: gpt4all \ No newline at end of file diff --git a/examples/query_data/models/wizardlm.tmpl b/examples/query_data/models/wizardlm.tmpl deleted file mode 100644 index e7b1985..0000000 --- a/examples/query_data/models/wizardlm.tmpl +++ /dev/null @@ -1,3 +0,0 @@ -{{.Input}} - -### Response: \ No newline at end of file diff --git a/examples/query_data/query.py b/examples/query_data/query.py index 0f1408e..e3dcde2 100644 --- a/examples/query_data/query.py +++ b/examples/query_data/query.py @@ -13,7 +13,7 @@ base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1') llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path)) # Configure prompt parameters and initialise helper -max_input_size = 1024 +max_input_size = 500 num_output = 256 max_chunk_overlap = 20 diff --git a/examples/query_data/store.py b/examples/query_data/store.py index e029694..0d628c8 100644 --- a/examples/query_data/store.py +++ b/examples/query_data/store.py @@ -13,15 +13,15 @@ base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1') llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path)) # Configure prompt parameters and initialise helper -max_input_size = 512 -num_output = 512 +max_input_size = 400 +num_output = 400 max_chunk_overlap = 30 prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap) # Load documents from the 'data' directory documents = SimpleDirectoryReader('data').load_data() -service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 512) +service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 400) index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context) index.storage_context.persist(persist_dir="./storage") diff --git a/go.mod b/go.mod index c25c9e3..59c9297 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35 github.com/go-audio/wav v1.1.0 + github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708 github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c github.com/go-skynet/go-llama.cpp v0.0.0-20230509080828-f4d26f43f1d3 diff --git a/go.sum b/go.sum index 400d9ff..538fbeb 100644 --- a/go.sum +++ b/go.sum @@ -16,8 +16,6 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be h1:3Hic97PY6hcw/SY44RuR7kyONkxd744RFeRrqckzwNQ= -github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM= github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35 h1:sMg/SgnMPS/HNUO/2kGm72vl8R9TmNIwgLFr2TNwR3g= github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4= @@ -38,14 +36,6 @@ github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7 github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM= github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= -github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708 h1:cfOi4TWvQ6JsAm9Q1A8I8j9YfNy10bmIfwOiyGyU5wQ= -github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM= -github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis= -github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI= -github.com/go-skynet/go-llama.cpp v0.0.0-20230508165257-c03e8adbc45c h1:JoW2+LKrSemoV32QRwrEC5f53erym96NCsUSM3wSVbM= -github.com/go-skynet/go-llama.cpp v0.0.0-20230508165257-c03e8adbc45c/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I= -github.com/go-skynet/go-llama.cpp v0.0.0-20230509080828-f4d26f43f1d3 h1:YNi1oetK5kGJoUgT3/r/Wj3XPOICWf3nwHsz5v89iSs= -github.com/go-skynet/go-llama.cpp v0.0.0-20230509080828-f4d26f43f1d3/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= @@ -197,8 +187,9 @@ golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8T google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b h1:QRR6H1YWRnHb4Y/HeNFCTJLFVxaq6wH4YuVdsUOr75U= +gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= diff --git a/pkg/model/loader.go b/pkg/model/loader.go index 167d5d7..8df4ec9 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -14,6 +14,7 @@ import ( "github.com/rs/zerolog/log" rwkv "github.com/donomii/go-rwkv.cpp" + bert "github.com/go-skynet/go-bert.cpp" gpt2 "github.com/go-skynet/go-gpt2.cpp" gptj "github.com/go-skynet/go-gpt4all-j.cpp" llama "github.com/go-skynet/go-llama.cpp" @@ -22,13 +23,15 @@ import ( type ModelLoader struct { ModelPath string mu sync.Mutex - + // TODO: this needs generics models map[string]*llama.LLama gptmodels map[string]*gptj.GPTJ gpt2models map[string]*gpt2.GPT2 gptstablelmmodels map[string]*gpt2.StableLM rwkv map[string]*rwkv.RwkvState - promptsTemplates map[string]*template.Template + bert map[string]*bert.Bert + + promptsTemplates map[string]*template.Template } func NewModelLoader(modelPath string) *ModelLoader { @@ -39,6 +42,7 @@ func NewModelLoader(modelPath string) *ModelLoader { gptstablelmmodels: make(map[string]*gpt2.StableLM), models: make(map[string]*llama.LLama), rwkv: make(map[string]*rwkv.RwkvState), + bert: make(map[string]*bert.Bert), promptsTemplates: make(map[string]*template.Template), } } @@ -156,6 +160,38 @@ func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, erro return model, err } +func (ml *ModelLoader) LoadBERT(modelName string) (*bert.Bert, error) { + ml.mu.Lock() + defer ml.mu.Unlock() + + // Check if we already have a loaded model + if !ml.ExistsInModelPath(modelName) { + return nil, fmt.Errorf("model does not exist") + } + + if m, ok := ml.bert[modelName]; ok { + log.Debug().Msgf("Model already loaded in memory: %s", modelName) + return m, nil + } + + // Load the model and keep it in memory for later use + modelFile := filepath.Join(ml.ModelPath, modelName) + log.Debug().Msgf("Loading model in memory from file: %s", modelFile) + + model, err := bert.New(modelFile) + if err != nil { + return nil, err + } + + // If there is a prompt template, load it + if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil { + return nil, err + } + + ml.bert[modelName] = model + return model, err +} + func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) { ml.mu.Lock() defer ml.mu.Unlock() @@ -299,6 +335,8 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla return ml.LoadGPT2Model(modelFile) case "gptj": return ml.LoadGPTJModel(modelFile) + case "bert-embeddings": + return ml.LoadBERT(modelFile) case "rwkv": return ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads) default: @@ -361,5 +399,13 @@ func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOpt err = multierror.Append(err, modelerr) } + model, modelerr = ml.LoadBERT(modelFile) + if modelerr == nil { + updateModels(model) + return model, nil + } else { + err = multierror.Append(err, modelerr) + } + return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error()) }