From 857d13e8d6008de34e9ef61659609da843f3785e Mon Sep 17 00:00:00 2001 From: mudler Date: Fri, 5 May 2023 15:53:57 +0200 Subject: [PATCH 01/13] debug: wire up go-fiber debugger --- api/api.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/api/api.go b/api/api.go index 33d073b..45e5e45 100644 --- a/api/api.go +++ b/api/api.go @@ -6,6 +6,7 @@ import ( model "github.com/go-skynet/LocalAI/pkg/model" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/cors" + "github.com/gofiber/fiber/v2/middleware/logger" "github.com/gofiber/fiber/v2/middleware/recover" "github.com/rs/zerolog" "github.com/rs/zerolog/log" @@ -40,6 +41,12 @@ func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16 }, }) + if debug { + app.Use(logger.New(logger.Config{ + Format: "[${ip}]:${port} ${status} - ${method} ${path}\n", + })) + } + cm := make(ConfigMerger) if err := cm.LoadConfigs(loader.ModelPath); err != nil { log.Error().Msgf("error loading config files: %s", err.Error()) From e73283121bece33d40d080edaa7bd9d3c88d7486 Mon Sep 17 00:00:00 2001 From: mudler Date: Fri, 5 May 2023 15:54:59 +0200 Subject: [PATCH 02/13] feat: support arrays for prompt and input Signed-off-by: mudler --- api/config.go | 2 + api/openai.go | 101 ++++++++++++++++++++++++++++------------------ api/prediction.go | 1 + 3 files changed, 64 insertions(+), 40 deletions(-) diff --git a/api/config.go b/api/config.go index 8e550e1..d5df3de 100644 --- a/api/config.go +++ b/api/config.go @@ -27,6 +27,8 @@ type Config struct { MirostatETA float64 `yaml:"mirostat_eta"` MirostatTAU float64 `yaml:"mirostat_tau"` Mirostat int `yaml:"mirostat"` + + PromptStrings, InputStrings []string } type TemplateConfig struct { diff --git a/api/openai.go b/api/openai.go index fc982f2..3a6b947 100644 --- a/api/openai.go +++ b/api/openai.go @@ -75,8 +75,8 @@ type OpenAIRequest struct { Prompt interface{} `json:"prompt" yaml:"prompt"` // Edit endpoint - Instruction string `json:"instruction" yaml:"instruction"` - Input string `json:"input" yaml:"input"` + Instruction string `json:"instruction" yaml:"instruction"` + Input interface{} `json:"input" yaml:"input"` Stop interface{} `json:"stop" yaml:"stop"` @@ -184,6 +184,30 @@ func updateConfig(config *Config, input *OpenAIRequest) { if input.MirostatTAU != 0 { config.MirostatTAU = input.MirostatTAU } + + switch inputs := input.Input.(type) { + case string: + if inputs != "" { + config.InputStrings = append(config.InputStrings, inputs) + } + case []interface{}: + for _, pp := range inputs { + if s, ok := pp.(string); ok { + config.InputStrings = append(config.InputStrings, s) + } + } + } + + switch p := input.Prompt.(type) { + case string: + config.PromptStrings = append(config.PromptStrings, p) + case []interface{}: + for _, pp := range p { + if s, ok := pp.(string); ok { + config.PromptStrings = append(config.PromptStrings, s) + } + } + } } func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) { @@ -268,19 +292,6 @@ func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, log.Debug().Msgf("Parameter Config: %+v", config) - predInput := []string{} - - switch p := input.Prompt.(type) { - case string: - predInput = append(predInput, p) - case []interface{}: - for _, pp := range p { - if s, ok := pp.(string); ok { - predInput = append(predInput, s) - } - } - } - templateFile := config.Model if config.TemplateConfig.Completion != "" { @@ -288,7 +299,7 @@ func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, } var result []Choice - for _, i := range predInput { + for _, i := range config.PromptStrings { // A model can have a "file.bin.tmpl" file associated with a prompt template prefix templatedInput, err := loader.TemplatePrefix(templateFile, struct { Input string @@ -331,20 +342,26 @@ func embeddingsEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, } log.Debug().Msgf("Parameter Config: %+v", config) + items := []Item{} - // get the model function to call for the result - embedFn, err := ModelEmbedding(input.Input, loader, *config) - if err != nil { - return err - } + for i, s := range config.InputStrings { - embeddings, err := embedFn() - if err != nil { - return err + // get the model function to call for the result + embedFn, err := ModelEmbedding(s, loader, *config) + if err != nil { + return err + } + + embeddings, err := embedFn() + if err != nil { + return err + } + items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"}) } + resp := &OpenAIResponse{ Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. - Data: []Item{{Embedding: embeddings, Index: 0, Object: "embedding"}}, + Data: items, Object: "list", } @@ -480,28 +497,32 @@ func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread log.Debug().Msgf("Parameter Config: %+v", config) - predInput := input.Input templateFile := config.Model if config.TemplateConfig.Edit != "" { templateFile = config.TemplateConfig.Edit } - // A model can have a "file.bin.tmpl" file associated with a prompt template prefix - templatedInput, err := loader.TemplatePrefix(templateFile, struct { - Input string - Instruction string - }{Input: predInput, Instruction: input.Instruction}) - if err == nil { - predInput = templatedInput - log.Debug().Msgf("Template found, input modified to: %s", predInput) - } + var result []Choice + for _, i := range config.InputStrings { + // A model can have a "file.bin.tmpl" file associated with a prompt template prefix + templatedInput, err := loader.TemplatePrefix(templateFile, struct { + Input string + Instruction string + }{Input: i}) + if err == nil { + i = templatedInput + log.Debug().Msgf("Template found, input modified to: %s", i) + } - result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) { - *c = append(*c, Choice{Text: s}) - }, nil) - if err != nil { - return err + r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) { + *c = append(*c, Choice{Text: s}) + }, nil) + if err != nil { + return err + } + + result = append(result, r...) } resp := &OpenAIResponse{ diff --git a/api/prediction.go b/api/prediction.go index 45db078..009641a 100644 --- a/api/prediction.go +++ b/api/prediction.go @@ -28,6 +28,7 @@ func defaultLLamaOpts(c Config) []llama.ModelOption { if c.Embeddings { llamaOpts = append(llamaOpts, llama.EnableEmbeddings) } + return llamaOpts } From e96eadab4083fd46adeb63444b2f42ba4459a271 Mon Sep 17 00:00:00 2001 From: mudler Date: Fri, 5 May 2023 15:55:19 +0200 Subject: [PATCH 03/13] feat: support deprecated embeddings API --- api/api.go | 4 ++++ api/openai.go | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/api/api.go b/api/api.go index 45e5e45..7994ff2 100644 --- a/api/api.go +++ b/api/api.go @@ -80,6 +80,10 @@ func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16 app.Post("/v1/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16)) app.Post("/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16)) + // /v1/engines/{engine_id}/embeddings + + app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16)) + app.Get("/v1/models", listModels(loader, cm)) app.Get("/models", listModels(loader, cm)) diff --git a/api/openai.go b/api/openai.go index 3a6b947..39212e6 100644 --- a/api/openai.go +++ b/api/openai.go @@ -218,6 +218,11 @@ func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug } modelFile := input.Model + + if c.Params("model") != "" { + modelFile = c.Params("model") + } + received, _ := json.Marshal(input) log.Debug().Msgf("Request received: %s", string(received)) From 64c0a7967fa824d12887865148616594c5264e55 Mon Sep 17 00:00:00 2001 From: mudler Date: Fri, 5 May 2023 15:56:02 +0200 Subject: [PATCH 04/13] fix: pass prediction options when using the model --- api/prediction.go | 109 ++++++++++++++++++++++++---------------------- 1 file changed, 58 insertions(+), 51 deletions(-) diff --git a/api/prediction.go b/api/prediction.go index 009641a..47229d6 100644 --- a/api/prediction.go +++ b/api/prediction.go @@ -56,7 +56,8 @@ func ModelEmbedding(s string, loader *model.ModelLoader, c Config) (func() ([]fl switch model := inferenceModel.(type) { case *llama.LLama: fn = func() ([]float32, error) { - return model.Embeddings(s) + predictOptions := buildLLamaPredictOptions(c) + return model.Embeddings(s, predictOptions...) } default: fn = func() ([]float32, error) { @@ -81,6 +82,61 @@ func ModelEmbedding(s string, loader *model.ModelLoader, c Config) (func() ([]fl }, nil } +func buildLLamaPredictOptions(c Config) []llama.PredictOption { + // Generate the prediction using the language model + predictOptions := []llama.PredictOption{ + llama.SetTemperature(c.Temperature), + llama.SetTopP(c.TopP), + llama.SetTopK(c.TopK), + llama.SetTokens(c.Maxtokens), + llama.SetThreads(c.Threads), + } + + if c.Mirostat != 0 { + predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat)) + } + + if c.MirostatETA != 0 { + predictOptions = append(predictOptions, llama.SetMirostatETA(c.MirostatETA)) + } + + if c.MirostatTAU != 0 { + predictOptions = append(predictOptions, llama.SetMirostatTAU(c.MirostatTAU)) + } + + if c.Debug { + predictOptions = append(predictOptions, llama.Debug) + } + + predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...)) + + if c.RepeatPenalty != 0 { + predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty)) + } + + if c.Keep != 0 { + predictOptions = append(predictOptions, llama.SetNKeep(c.Keep)) + } + + if c.Batch != 0 { + predictOptions = append(predictOptions, llama.SetBatch(c.Batch)) + } + + if c.F16 { + predictOptions = append(predictOptions, llama.EnableF16KV) + } + + if c.IgnoreEOS { + predictOptions = append(predictOptions, llama.IgnoreEOS) + } + + if c.Seed != 0 { + predictOptions = append(predictOptions, llama.SetSeed(c.Seed)) + } + + return predictOptions +} + func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback func(string) bool) (func() (string, error), error) { supportStreams := false modelFile := c.Model @@ -198,56 +254,7 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback model.SetTokenCallback(tokenCallback) } - // Generate the prediction using the language model - predictOptions := []llama.PredictOption{ - llama.SetTemperature(c.Temperature), - llama.SetTopP(c.TopP), - llama.SetTopK(c.TopK), - llama.SetTokens(c.Maxtokens), - llama.SetThreads(c.Threads), - } - - if c.Mirostat != 0 { - predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat)) - } - - if c.MirostatETA != 0 { - predictOptions = append(predictOptions, llama.SetMirostatETA(c.MirostatETA)) - } - - if c.MirostatTAU != 0 { - predictOptions = append(predictOptions, llama.SetMirostatTAU(c.MirostatTAU)) - } - - if c.Debug { - predictOptions = append(predictOptions, llama.Debug) - } - - predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...)) - - if c.RepeatPenalty != 0 { - predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty)) - } - - if c.Keep != 0 { - predictOptions = append(predictOptions, llama.SetNKeep(c.Keep)) - } - - if c.Batch != 0 { - predictOptions = append(predictOptions, llama.SetBatch(c.Batch)) - } - - if c.F16 { - predictOptions = append(predictOptions, llama.EnableF16KV) - } - - if c.IgnoreEOS { - predictOptions = append(predictOptions, llama.IgnoreEOS) - } - - if c.Seed != 0 { - predictOptions = append(predictOptions, llama.SetSeed(c.Seed)) - } + predictOptions := buildLLamaPredictOptions(c) str, er := model.Predict( s, From b49721cdd1dc66a57919564843cd32cdb4be1d3f Mon Sep 17 00:00:00 2001 From: mudler Date: Fri, 5 May 2023 18:05:10 +0200 Subject: [PATCH 05/13] fix: respect config from file for backends settings --- api/openai.go | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/api/openai.go b/api/openai.go index 39212e6..6061e35 100644 --- a/api/openai.go +++ b/api/openai.go @@ -262,6 +262,10 @@ func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug if !exists { config = &Config{ OpenAIRequest: defaultRequest(modelFile), + ContextSize: ctx, + Threads: threads, + F16: f16, + Debug: debug, } } else { config = &cfg @@ -270,20 +274,6 @@ func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug // Set the parameters for the language model prediction updateConfig(config, input) - if threads != 0 { - config.Threads = threads - } - if ctx != 0 { - config.ContextSize = ctx - } - if f16 { - config.F16 = true - } - - if debug { - config.Debug = true - } - return config, input, nil } From e62ee2bc0648b92393ec5c73670dbbbbdacc7b8b Mon Sep 17 00:00:00 2001 From: mudler Date: Fri, 5 May 2023 18:05:29 +0200 Subject: [PATCH 06/13] fix: remove trailing 0s from embeddings This happens when no max_tokens are set, so by default go-llama allocates more space for the slice and padding happens. --- api/prediction.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/api/prediction.go b/api/prediction.go index 47229d6..4bfb687 100644 --- a/api/prediction.go +++ b/api/prediction.go @@ -78,7 +78,19 @@ func ModelEmbedding(s string, loader *model.ModelLoader, c Config) (func() ([]fl l.Lock() defer l.Unlock() - return fn() + embeds, err := fn() + if err != nil { + return embeds, err + } + // Remove trailing 0s + for i := len(embeds) - 1; i >= 0; i-- { + if embeds[i] == 0.0 { + embeds = embeds[:i] + } else { + break + } + } + return embeds, nil }, nil } From 3ff9bbd2175a24925b26d7f87376a9b7c02e7776 Mon Sep 17 00:00:00 2001 From: mudler Date: Fri, 5 May 2023 19:04:52 +0200 Subject: [PATCH 07/13] examples: add rwkv script folder --- examples/rwkv/scripts/build.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100755 examples/rwkv/scripts/build.sh diff --git a/examples/rwkv/scripts/build.sh b/examples/rwkv/scripts/build.sh new file mode 100755 index 0000000..2ecedbe --- /dev/null +++ b/examples/rwkv/scripts/build.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -ex + +URL=$1 +OUT=$2 +FILENAME=$(basename $URL) + +wget -nc $URL -O /build/$FILENAME + +python3 /build/rwkv.cpp/rwkv/convert_pytorch_to_ggml.py /build/$FILENAME /build/float-model float16 +python3 /build/rwkv.cpp/rwkv/quantize.py /build/float-model $OUT Q4_2 From d094381e5dfd58168179f4d113ff5a5d61a4fd3b Mon Sep 17 00:00:00 2001 From: mudler Date: Fri, 5 May 2023 19:50:05 +0200 Subject: [PATCH 08/13] ci: lower fixtures spec --- README.md | 2 ++ tests/fixtures/config.yaml | 6 ++---- tests/fixtures/gpt4.yaml | 3 +-- tests/fixtures/gpt4_2.yaml | 3 +-- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index b51fc79..e8850e2 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,8 @@ LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud). +See [examples on how to integrate LocalAI](https://github.com/go-skynet/LocalAI/tree/master/examples/). + ### News - 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint diff --git a/tests/fixtures/config.yaml b/tests/fixtures/config.yaml index 866b74b..9910ffa 100644 --- a/tests/fixtures/config.yaml +++ b/tests/fixtures/config.yaml @@ -1,8 +1,7 @@ - name: list1 parameters: model: testmodel - context_size: 512 - threads: 10 + context_size: 128 stopwords: - "HUMAN:" - "### Response:" @@ -15,8 +14,7 @@ - name: list2 parameters: model: testmodel - context_size: 512 - threads: 10 + context_size: 128 stopwords: - "HUMAN:" - "### Response:" diff --git a/tests/fixtures/gpt4.yaml b/tests/fixtures/gpt4.yaml index c2f9bec..54743bd 100644 --- a/tests/fixtures/gpt4.yaml +++ b/tests/fixtures/gpt4.yaml @@ -1,8 +1,7 @@ name: gpt4all parameters: model: testmodel -context_size: 512 -threads: 10 +context_size: 128 stopwords: - "HUMAN:" - "### Response:" diff --git a/tests/fixtures/gpt4_2.yaml b/tests/fixtures/gpt4_2.yaml index 60722f4..43ef5a1 100644 --- a/tests/fixtures/gpt4_2.yaml +++ b/tests/fixtures/gpt4_2.yaml @@ -1,8 +1,7 @@ name: gpt4all-2 parameters: model: testmodel -context_size: 1024 -threads: 5 +context_size: 128 stopwords: - "HUMAN:" - "### Response:" From ad301e6ed724123fb5c4b1c86d9499f66f5a690f Mon Sep 17 00:00:00 2001 From: mudler Date: Fri, 5 May 2023 21:56:31 +0200 Subject: [PATCH 09/13] example(add): document query example --- examples/query_data/.gitignore | 1 + examples/query_data/README.md | 49 +++++++++++++++++++ examples/query_data/data/.keep | 0 examples/query_data/docker-compose.yml | 15 ++++++ examples/query_data/models/completion.tmpl | 1 + examples/query_data/models/embeddings.yaml | 18 +++++++ examples/query_data/models/gpt-3.5-turbo.yaml | 18 +++++++ examples/query_data/models/wizardlm.tmpl | 3 ++ examples/query_data/query.py | 32 ++++++++++++ examples/query_data/store.py | 25 ++++++++++ 10 files changed, 162 insertions(+) create mode 100644 examples/query_data/.gitignore create mode 100644 examples/query_data/README.md create mode 100644 examples/query_data/data/.keep create mode 100644 examples/query_data/docker-compose.yml create mode 100644 examples/query_data/models/completion.tmpl create mode 100644 examples/query_data/models/embeddings.yaml create mode 100644 examples/query_data/models/gpt-3.5-turbo.yaml create mode 100644 examples/query_data/models/wizardlm.tmpl create mode 100644 examples/query_data/query.py create mode 100644 examples/query_data/store.py diff --git a/examples/query_data/.gitignore b/examples/query_data/.gitignore new file mode 100644 index 0000000..29ea9d5 --- /dev/null +++ b/examples/query_data/.gitignore @@ -0,0 +1 @@ +storage/ \ No newline at end of file diff --git a/examples/query_data/README.md b/examples/query_data/README.md new file mode 100644 index 0000000..fb32442 --- /dev/null +++ b/examples/query_data/README.md @@ -0,0 +1,49 @@ +# Data query example + +This example makes use of [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents. + +It loosely follows [the quickstart](https://gpt-index.readthedocs.io/en/stable/guides/primer/usage_pattern.html). + +## Requirements + +For this in order to work, you will need a model compatible with the `llama.cpp` backend. This is will not work with gpt4all. + +The example uses `WizardLM`. Edit the config files in `models/` accordingly to specify the model you use (change `HERE`). + +You will also need a training data set. Copy that over `data`. + +## Setup + +Start the API: + +```bash +# Clone LocalAI +git clone https://github.com/go-skynet/LocalAI + +cd LocalAI/examples/query_data + +# Copy your models, edit config files accordingly + +# start with docker-compose +docker-compose up -d --build +``` + +### Create a storage: + +```bash +export OPENAI_API_BASE=http://localhost:8080/v1 +export OPENAI_API_KEY=sk- + +python store.py +``` + +After it finishes, a directory "storage" will be created with the vector index database. + +## Query + +```bash +export OPENAI_API_BASE=http://localhost:8080/v1 +export OPENAI_API_KEY=sk- + +python query.py +``` \ No newline at end of file diff --git a/examples/query_data/data/.keep b/examples/query_data/data/.keep new file mode 100644 index 0000000..e69de29 diff --git a/examples/query_data/docker-compose.yml b/examples/query_data/docker-compose.yml new file mode 100644 index 0000000..a59edfc --- /dev/null +++ b/examples/query_data/docker-compose.yml @@ -0,0 +1,15 @@ +version: '3.6' + +services: + api: + image: quay.io/go-skynet/local-ai:latest + build: + context: . + dockerfile: Dockerfile + ports: + - 8080:8080 + env_file: + - .env + volumes: + - ./models:/models:cached + command: ["/usr/bin/local-ai"] diff --git a/examples/query_data/models/completion.tmpl b/examples/query_data/models/completion.tmpl new file mode 100644 index 0000000..9867cfc --- /dev/null +++ b/examples/query_data/models/completion.tmpl @@ -0,0 +1 @@ +{{.Input}} \ No newline at end of file diff --git a/examples/query_data/models/embeddings.yaml b/examples/query_data/models/embeddings.yaml new file mode 100644 index 0000000..2173975 --- /dev/null +++ b/examples/query_data/models/embeddings.yaml @@ -0,0 +1,18 @@ +name: text-embedding-ada-002 +parameters: + model: HERE + top_k: 80 + temperature: 0.2 + top_p: 0.7 +context_size: 1024 +threads: 14 +stopwords: +- "HUMAN:" +- "GPT:" +roles: + user: " " + system: " " +embeddings: true +template: + completion: completion + chat: gpt4all diff --git a/examples/query_data/models/gpt-3.5-turbo.yaml b/examples/query_data/models/gpt-3.5-turbo.yaml new file mode 100644 index 0000000..9cdb4a2 --- /dev/null +++ b/examples/query_data/models/gpt-3.5-turbo.yaml @@ -0,0 +1,18 @@ +name: gpt-3.5-turbo +parameters: + model: HERE + top_k: 80 + temperature: 0.2 + top_p: 0.7 +context_size: 1024 +threads: 14 +embeddings: true +stopwords: +- "HUMAN:" +- "GPT:" +roles: + user: " " + system: " " +template: + completion: completion + chat: wizardlm diff --git a/examples/query_data/models/wizardlm.tmpl b/examples/query_data/models/wizardlm.tmpl new file mode 100644 index 0000000..e7b1985 --- /dev/null +++ b/examples/query_data/models/wizardlm.tmpl @@ -0,0 +1,3 @@ +{{.Input}} + +### Response: \ No newline at end of file diff --git a/examples/query_data/query.py b/examples/query_data/query.py new file mode 100644 index 0000000..05a288f --- /dev/null +++ b/examples/query_data/query.py @@ -0,0 +1,32 @@ +import os + +# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended) +# os.environ['OPENAI_API_KEY']= "" + +from llama_index import LLMPredictor, PromptHelper, ServiceContext +from langchain.llms.openai import OpenAI +from llama_index import StorageContext, load_index_from_storage + + +# This example uses text-davinci-003 by default; feel free to change if desired +llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo",openai_api_base="http://localhost:8080/v1")) + +# Configure prompt parameters and initialise helper +max_input_size = 1024 +num_output = 256 +max_chunk_overlap = 20 + +prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap) + +# Load documents from the 'data' directory +service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper) + +# rebuild storage context +storage_context = StorageContext.from_defaults(persist_dir='./storage') + +# load index +index = load_index_from_storage(storage_context, service_context=service_context, ) + +query_engine = index.as_query_engine() +response = query_engine.query("XXXXXX your question here XXXXX") +print(response) \ No newline at end of file diff --git a/examples/query_data/store.py b/examples/query_data/store.py new file mode 100644 index 0000000..56d83bb --- /dev/null +++ b/examples/query_data/store.py @@ -0,0 +1,25 @@ +import os + +# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended) +# os.environ['OPENAI_API_KEY']= "" + +from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, LLMPredictor, PromptHelper, ServiceContext +from langchain.llms.openai import OpenAI +from llama_index import StorageContext, load_index_from_storage + +# This example uses text-davinci-003 by default; feel free to change if desired +llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo",openai_api_base="http://localhost:8080/v1")) + +# Configure prompt parameters and initialise helper +max_input_size = 256 +num_output = 256 +max_chunk_overlap = 10 + +prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap) + +# Load documents from the 'data' directory +documents = SimpleDirectoryReader('data').load_data() +service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 257) +index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context) +index.storage_context.persist(persist_dir="./storage") + From ec2adc2c031e106a23bc1438bb3f8d6517f22ff8 Mon Sep 17 00:00:00 2001 From: mudler Date: Fri, 5 May 2023 22:07:01 +0200 Subject: [PATCH 10/13] tests: use 3 cores --- api/api_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/api_test.go b/api/api_test.go index 9682a21..6f21f21 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -23,7 +23,7 @@ var _ = Describe("API test", func() { Context("API query", func() { BeforeEach(func() { modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH")) - app = App("", modelLoader, 1, 512, false, true, true) + app = App("", modelLoader, 3, 512, false, true, true) go app.Listen("127.0.0.1:9090") defaultConfig := openai.DefaultConfig("") @@ -87,7 +87,7 @@ var _ = Describe("API test", func() { Context("Config file", func() { BeforeEach(func() { modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH")) - app = App(os.Getenv("CONFIG_FILE"), modelLoader, 1, 512, false, true, true) + app = App(os.Getenv("CONFIG_FILE"), modelLoader, 3, 512, false, true, true) go app.Listen("127.0.0.1:9090") defaultConfig := openai.DefaultConfig("") From 009ee47fe2eb4347b08d01e6e741e13305708acc Mon Sep 17 00:00:00 2001 From: mudler Date: Fri, 5 May 2023 22:51:20 +0200 Subject: [PATCH 11/13] Don't allow 0 as thread count --- api/config.go | 173 +++++++++++++++++++++++++++++++++++++ api/openai.go | 162 ---------------------------------- tests/fixtures/config.yaml | 10 ++- tests/fixtures/gpt4.yaml | 5 +- tests/fixtures/gpt4_2.yaml | 5 +- 5 files changed, 189 insertions(+), 166 deletions(-) diff --git a/api/config.go b/api/config.go index d5df3de..b032d15 100644 --- a/api/config.go +++ b/api/config.go @@ -1,12 +1,16 @@ package api import ( + "encoding/json" "fmt" "io/ioutil" "os" "path/filepath" "strings" + model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" + "github.com/rs/zerolog/log" "gopkg.in/yaml.v3" ) @@ -106,3 +110,172 @@ func (cm ConfigMerger) LoadConfigs(path string) error { return nil } + +func updateConfig(config *Config, input *OpenAIRequest) { + if input.Echo { + config.Echo = input.Echo + } + if input.TopK != 0 { + config.TopK = input.TopK + } + if input.TopP != 0 { + config.TopP = input.TopP + } + + if input.Temperature != 0 { + config.Temperature = input.Temperature + } + + if input.Maxtokens != 0 { + config.Maxtokens = input.Maxtokens + } + + switch stop := input.Stop.(type) { + case string: + if stop != "" { + config.StopWords = append(config.StopWords, stop) + } + case []interface{}: + for _, pp := range stop { + if s, ok := pp.(string); ok { + config.StopWords = append(config.StopWords, s) + } + } + } + + if input.RepeatPenalty != 0 { + config.RepeatPenalty = input.RepeatPenalty + } + + if input.Keep != 0 { + config.Keep = input.Keep + } + + if input.Batch != 0 { + config.Batch = input.Batch + } + + if input.F16 { + config.F16 = input.F16 + } + + if input.IgnoreEOS { + config.IgnoreEOS = input.IgnoreEOS + } + + if input.Seed != 0 { + config.Seed = input.Seed + } + + if input.Mirostat != 0 { + config.Mirostat = input.Mirostat + } + + if input.MirostatETA != 0 { + config.MirostatETA = input.MirostatETA + } + + if input.MirostatTAU != 0 { + config.MirostatTAU = input.MirostatTAU + } + + switch inputs := input.Input.(type) { + case string: + if inputs != "" { + config.InputStrings = append(config.InputStrings, inputs) + } + case []interface{}: + for _, pp := range inputs { + if s, ok := pp.(string); ok { + config.InputStrings = append(config.InputStrings, s) + } + } + } + + switch p := input.Prompt.(type) { + case string: + config.PromptStrings = append(config.PromptStrings, p) + case []interface{}: + for _, pp := range p { + if s, ok := pp.(string); ok { + config.PromptStrings = append(config.PromptStrings, s) + } + } + } +} + +func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) { + input := new(OpenAIRequest) + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return nil, nil, err + } + + modelFile := input.Model + + if c.Params("model") != "" { + modelFile = c.Params("model") + } + + received, _ := json.Marshal(input) + + log.Debug().Msgf("Request received: %s", string(received)) + + // Set model from bearer token, if available + bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ") + bearerExists := bearer != "" && loader.ExistsInModelPath(bearer) + + // If no model was specified, take the first available + if modelFile == "" && !bearerExists { + models, _ := loader.ListModels() + if len(models) > 0 { + modelFile = models[0] + log.Debug().Msgf("No model specified, using: %s", modelFile) + } else { + log.Debug().Msgf("No model specified, returning error") + return nil, nil, fmt.Errorf("no model specified") + } + } + + // If a model is found in bearer token takes precedence + if bearerExists { + log.Debug().Msgf("Using model from bearer token: %s", bearer) + modelFile = bearer + } + + // Load a config file if present after the model name + modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml") + if _, err := os.Stat(modelConfig); err == nil { + if err := cm.LoadConfig(modelConfig); err != nil { + return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) + } + } + + var config *Config + cfg, exists := cm[modelFile] + if !exists { + config = &Config{ + OpenAIRequest: defaultRequest(modelFile), + ContextSize: ctx, + Threads: threads, + F16: f16, + Debug: debug, + } + } else { + config = &cfg + } + + // Set the parameters for the language model prediction + updateConfig(config, input) + + // Don't allow 0 as setting + if config.Threads == 0 { + if threads != 0 { + config.Threads = threads + } else { + config.Threads = 4 + } + } + + return config, input, nil +} diff --git a/api/openai.go b/api/openai.go index 6061e35..d98dc56 100644 --- a/api/openai.go +++ b/api/openai.go @@ -5,8 +5,6 @@ import ( "bytes" "encoding/json" "fmt" - "os" - "path/filepath" "strings" model "github.com/go-skynet/LocalAI/pkg/model" @@ -117,166 +115,6 @@ func defaultRequest(modelFile string) OpenAIRequest { } } -func updateConfig(config *Config, input *OpenAIRequest) { - if input.Echo { - config.Echo = input.Echo - } - if input.TopK != 0 { - config.TopK = input.TopK - } - if input.TopP != 0 { - config.TopP = input.TopP - } - - if input.Temperature != 0 { - config.Temperature = input.Temperature - } - - if input.Maxtokens != 0 { - config.Maxtokens = input.Maxtokens - } - - switch stop := input.Stop.(type) { - case string: - if stop != "" { - config.StopWords = append(config.StopWords, stop) - } - case []interface{}: - for _, pp := range stop { - if s, ok := pp.(string); ok { - config.StopWords = append(config.StopWords, s) - } - } - } - - if input.RepeatPenalty != 0 { - config.RepeatPenalty = input.RepeatPenalty - } - - if input.Keep != 0 { - config.Keep = input.Keep - } - - if input.Batch != 0 { - config.Batch = input.Batch - } - - if input.F16 { - config.F16 = input.F16 - } - - if input.IgnoreEOS { - config.IgnoreEOS = input.IgnoreEOS - } - - if input.Seed != 0 { - config.Seed = input.Seed - } - - if input.Mirostat != 0 { - config.Mirostat = input.Mirostat - } - - if input.MirostatETA != 0 { - config.MirostatETA = input.MirostatETA - } - - if input.MirostatTAU != 0 { - config.MirostatTAU = input.MirostatTAU - } - - switch inputs := input.Input.(type) { - case string: - if inputs != "" { - config.InputStrings = append(config.InputStrings, inputs) - } - case []interface{}: - for _, pp := range inputs { - if s, ok := pp.(string); ok { - config.InputStrings = append(config.InputStrings, s) - } - } - } - - switch p := input.Prompt.(type) { - case string: - config.PromptStrings = append(config.PromptStrings, p) - case []interface{}: - for _, pp := range p { - if s, ok := pp.(string); ok { - config.PromptStrings = append(config.PromptStrings, s) - } - } - } -} - -func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) { - input := new(OpenAIRequest) - // Get input data from the request body - if err := c.BodyParser(input); err != nil { - return nil, nil, err - } - - modelFile := input.Model - - if c.Params("model") != "" { - modelFile = c.Params("model") - } - - received, _ := json.Marshal(input) - - log.Debug().Msgf("Request received: %s", string(received)) - - // Set model from bearer token, if available - bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ") - bearerExists := bearer != "" && loader.ExistsInModelPath(bearer) - - // If no model was specified, take the first available - if modelFile == "" && !bearerExists { - models, _ := loader.ListModels() - if len(models) > 0 { - modelFile = models[0] - log.Debug().Msgf("No model specified, using: %s", modelFile) - } else { - log.Debug().Msgf("No model specified, returning error") - return nil, nil, fmt.Errorf("no model specified") - } - } - - // If a model is found in bearer token takes precedence - if bearerExists { - log.Debug().Msgf("Using model from bearer token: %s", bearer) - modelFile = bearer - } - - // Load a config file if present after the model name - modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml") - if _, err := os.Stat(modelConfig); err == nil { - if err := cm.LoadConfig(modelConfig); err != nil { - return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) - } - } - - var config *Config - cfg, exists := cm[modelFile] - if !exists { - config = &Config{ - OpenAIRequest: defaultRequest(modelFile), - ContextSize: ctx, - Threads: threads, - F16: f16, - Debug: debug, - } - } else { - config = &cfg - } - - // Set the parameters for the language model prediction - updateConfig(config, input) - - return config, input, nil -} - // https://platform.openai.com/docs/api-reference/completions func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { diff --git a/tests/fixtures/config.yaml b/tests/fixtures/config.yaml index 9910ffa..3deabf9 100644 --- a/tests/fixtures/config.yaml +++ b/tests/fixtures/config.yaml @@ -1,7 +1,10 @@ - name: list1 parameters: model: testmodel - context_size: 128 + top_p: 80 + top_k: 0.9 + temperature: 0.1 + context_size: 10 stopwords: - "HUMAN:" - "### Response:" @@ -13,8 +16,11 @@ chat: ggml-gpt4all-j - name: list2 parameters: + top_p: 80 + top_k: 0.9 + temperature: 0.1 model: testmodel - context_size: 128 + context_size: 10 stopwords: - "HUMAN:" - "### Response:" diff --git a/tests/fixtures/gpt4.yaml b/tests/fixtures/gpt4.yaml index 54743bd..77b72b3 100644 --- a/tests/fixtures/gpt4.yaml +++ b/tests/fixtures/gpt4.yaml @@ -1,7 +1,10 @@ name: gpt4all parameters: model: testmodel -context_size: 128 + top_p: 80 + top_k: 0.9 + temperature: 0.1 +context_size: 10 stopwords: - "HUMAN:" - "### Response:" diff --git a/tests/fixtures/gpt4_2.yaml b/tests/fixtures/gpt4_2.yaml index 43ef5a1..62d9fdb 100644 --- a/tests/fixtures/gpt4_2.yaml +++ b/tests/fixtures/gpt4_2.yaml @@ -1,7 +1,10 @@ name: gpt4all-2 parameters: model: testmodel -context_size: 128 + top_p: 80 + top_k: 0.9 + temperature: 0.1 +context_size: 10 stopwords: - "HUMAN:" - "### Response:" From 75b25297fde42c1ec1c3d72dfd616720aaf944eb Mon Sep 17 00:00:00 2001 From: mudler Date: Fri, 5 May 2023 22:51:30 +0200 Subject: [PATCH 12/13] tests: run with ginkgo --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8bc7660..615e7a8 100644 --- a/Makefile +++ b/Makefile @@ -130,7 +130,7 @@ test-models/testmodel: test: prepare test-models/testmodel cp tests/fixtures/* test-models - @C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) test -v -timeout 30m ./... + @C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./... ## Help: help: ## Show this help. From 8c8cf38d4d4347ea1f7b712dd14122308ddd2711 Mon Sep 17 00:00:00 2001 From: mudler Date: Fri, 5 May 2023 23:24:34 +0200 Subject: [PATCH 13/13] tests: use 1 core --- api/api_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/api_test.go b/api/api_test.go index 6f21f21..9682a21 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -23,7 +23,7 @@ var _ = Describe("API test", func() { Context("API query", func() { BeforeEach(func() { modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH")) - app = App("", modelLoader, 3, 512, false, true, true) + app = App("", modelLoader, 1, 512, false, true, true) go app.Listen("127.0.0.1:9090") defaultConfig := openai.DefaultConfig("") @@ -87,7 +87,7 @@ var _ = Describe("API test", func() { Context("Config file", func() { BeforeEach(func() { modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH")) - app = App(os.Getenv("CONFIG_FILE"), modelLoader, 3, 512, false, true, true) + app = App(os.Getenv("CONFIG_FILE"), modelLoader, 1, 512, false, true, true) go app.Listen("127.0.0.1:9090") defaultConfig := openai.DefaultConfig("")