From 857d13e8d6008de34e9ef61659609da843f3785e Mon Sep 17 00:00:00 2001
From: mudler <mudler@mocaccino.org>
Date: Fri, 5 May 2023 15:53:57 +0200
Subject: [PATCH 01/13] debug: wire up go-fiber debugger

---
 api/api.go | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/api/api.go b/api/api.go
index 33d073b..45e5e45 100644
--- a/api/api.go
+++ b/api/api.go
@@ -6,6 +6,7 @@ import (
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/cors"
+	"github.com/gofiber/fiber/v2/middleware/logger"
 	"github.com/gofiber/fiber/v2/middleware/recover"
 	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
@@ -40,6 +41,12 @@ func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16
 		},
 	})
 
+	if debug {
+		app.Use(logger.New(logger.Config{
+			Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
+		}))
+	}
+
 	cm := make(ConfigMerger)
 	if err := cm.LoadConfigs(loader.ModelPath); err != nil {
 		log.Error().Msgf("error loading config files: %s", err.Error())

From e73283121bece33d40d080edaa7bd9d3c88d7486 Mon Sep 17 00:00:00 2001
From: mudler <mudler@mocaccino.org>
Date: Fri, 5 May 2023 15:54:59 +0200
Subject: [PATCH 02/13] feat: support arrays for prompt and input

Signed-off-by: mudler <mudler@mocaccino.org>
---
 api/config.go     |   2 +
 api/openai.go     | 101 ++++++++++++++++++++++++++++------------------
 api/prediction.go |   1 +
 3 files changed, 64 insertions(+), 40 deletions(-)

diff --git a/api/config.go b/api/config.go
index 8e550e1..d5df3de 100644
--- a/api/config.go
+++ b/api/config.go
@@ -27,6 +27,8 @@ type Config struct {
 	MirostatETA    float64           `yaml:"mirostat_eta"`
 	MirostatTAU    float64           `yaml:"mirostat_tau"`
 	Mirostat       int               `yaml:"mirostat"`
+
+	PromptStrings, InputStrings []string
 }
 
 type TemplateConfig struct {
diff --git a/api/openai.go b/api/openai.go
index fc982f2..3a6b947 100644
--- a/api/openai.go
+++ b/api/openai.go
@@ -75,8 +75,8 @@ type OpenAIRequest struct {
 	Prompt interface{} `json:"prompt" yaml:"prompt"`
 
 	// Edit endpoint
-	Instruction string `json:"instruction" yaml:"instruction"`
-	Input       string `json:"input" yaml:"input"`
+	Instruction string      `json:"instruction" yaml:"instruction"`
+	Input       interface{} `json:"input" yaml:"input"`
 
 	Stop interface{} `json:"stop" yaml:"stop"`
 
@@ -184,6 +184,30 @@ func updateConfig(config *Config, input *OpenAIRequest) {
 	if input.MirostatTAU != 0 {
 		config.MirostatTAU = input.MirostatTAU
 	}
+
+	switch inputs := input.Input.(type) {
+	case string:
+		if inputs != "" {
+			config.InputStrings = append(config.InputStrings, inputs)
+		}
+	case []interface{}:
+		for _, pp := range inputs {
+			if s, ok := pp.(string); ok {
+				config.InputStrings = append(config.InputStrings, s)
+			}
+		}
+	}
+
+	switch p := input.Prompt.(type) {
+	case string:
+		config.PromptStrings = append(config.PromptStrings, p)
+	case []interface{}:
+		for _, pp := range p {
+			if s, ok := pp.(string); ok {
+				config.PromptStrings = append(config.PromptStrings, s)
+			}
+		}
+	}
 }
 
 func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
@@ -268,19 +292,6 @@ func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
 
 		log.Debug().Msgf("Parameter Config: %+v", config)
 
-		predInput := []string{}
-
-		switch p := input.Prompt.(type) {
-		case string:
-			predInput = append(predInput, p)
-		case []interface{}:
-			for _, pp := range p {
-				if s, ok := pp.(string); ok {
-					predInput = append(predInput, s)
-				}
-			}
-		}
-
 		templateFile := config.Model
 
 		if config.TemplateConfig.Completion != "" {
@@ -288,7 +299,7 @@ func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
 		}
 
 		var result []Choice
-		for _, i := range predInput {
+		for _, i := range config.PromptStrings {
 			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
 			templatedInput, err := loader.TemplatePrefix(templateFile, struct {
 				Input string
@@ -331,20 +342,26 @@ func embeddingsEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
 		}
 
 		log.Debug().Msgf("Parameter Config: %+v", config)
+		items := []Item{}
 
-		// get the model function to call for the result
-		embedFn, err := ModelEmbedding(input.Input, loader, *config)
-		if err != nil {
-			return err
-		}
+		for i, s := range config.InputStrings {
 
-		embeddings, err := embedFn()
-		if err != nil {
-			return err
+			// get the model function to call for the result
+			embedFn, err := ModelEmbedding(s, loader, *config)
+			if err != nil {
+				return err
+			}
+
+			embeddings, err := embedFn()
+			if err != nil {
+				return err
+			}
+			items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
 		}
+
 		resp := &OpenAIResponse{
 			Model:  input.Model, // we have to return what the user sent here, due to OpenAI spec.
-			Data:   []Item{{Embedding: embeddings, Index: 0, Object: "embedding"}},
+			Data:   items,
 			Object: "list",
 		}
 
@@ -480,28 +497,32 @@ func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread
 
 		log.Debug().Msgf("Parameter Config: %+v", config)
 
-		predInput := input.Input
 		templateFile := config.Model
 
 		if config.TemplateConfig.Edit != "" {
 			templateFile = config.TemplateConfig.Edit
 		}
 
-		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-		templatedInput, err := loader.TemplatePrefix(templateFile, struct {
-			Input       string
-			Instruction string
-		}{Input: predInput, Instruction: input.Instruction})
-		if err == nil {
-			predInput = templatedInput
-			log.Debug().Msgf("Template found, input modified to: %s", predInput)
-		}
+		var result []Choice
+		for _, i := range config.InputStrings {
+			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+			templatedInput, err := loader.TemplatePrefix(templateFile, struct {
+				Input       string
+				Instruction string
+			}{Input: i})
+			if err == nil {
+				i = templatedInput
+				log.Debug().Msgf("Template found, input modified to: %s", i)
+			}
 
-		result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {
-			*c = append(*c, Choice{Text: s})
-		}, nil)
-		if err != nil {
-			return err
+			r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
+				*c = append(*c, Choice{Text: s})
+			}, nil)
+			if err != nil {
+				return err
+			}
+
+			result = append(result, r...)
 		}
 
 		resp := &OpenAIResponse{
diff --git a/api/prediction.go b/api/prediction.go
index 45db078..009641a 100644
--- a/api/prediction.go
+++ b/api/prediction.go
@@ -28,6 +28,7 @@ func defaultLLamaOpts(c Config) []llama.ModelOption {
 	if c.Embeddings {
 		llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
 	}
+
 	return llamaOpts
 }
 

From e96eadab4083fd46adeb63444b2f42ba4459a271 Mon Sep 17 00:00:00 2001
From: mudler <mudler@mocaccino.org>
Date: Fri, 5 May 2023 15:55:19 +0200
Subject: [PATCH 03/13] feat: support deprecated embeddings API

---
 api/api.go    | 4 ++++
 api/openai.go | 5 +++++
 2 files changed, 9 insertions(+)

diff --git a/api/api.go b/api/api.go
index 45e5e45..7994ff2 100644
--- a/api/api.go
+++ b/api/api.go
@@ -80,6 +80,10 @@ func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16
 	app.Post("/v1/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
 	app.Post("/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
 
+	// /v1/engines/{engine_id}/embeddings
+
+	app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
+
 	app.Get("/v1/models", listModels(loader, cm))
 	app.Get("/models", listModels(loader, cm))
 
diff --git a/api/openai.go b/api/openai.go
index 3a6b947..39212e6 100644
--- a/api/openai.go
+++ b/api/openai.go
@@ -218,6 +218,11 @@ func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug
 	}
 
 	modelFile := input.Model
+
+	if c.Params("model") != "" {
+		modelFile = c.Params("model")
+	}
+
 	received, _ := json.Marshal(input)
 
 	log.Debug().Msgf("Request received: %s", string(received))

From 64c0a7967fa824d12887865148616594c5264e55 Mon Sep 17 00:00:00 2001
From: mudler <mudler@mocaccino.org>
Date: Fri, 5 May 2023 15:56:02 +0200
Subject: [PATCH 04/13] fix: pass prediction options when using the model

---
 api/prediction.go | 109 ++++++++++++++++++++++++----------------------
 1 file changed, 58 insertions(+), 51 deletions(-)

diff --git a/api/prediction.go b/api/prediction.go
index 009641a..47229d6 100644
--- a/api/prediction.go
+++ b/api/prediction.go
@@ -56,7 +56,8 @@ func ModelEmbedding(s string, loader *model.ModelLoader, c Config) (func() ([]fl
 	switch model := inferenceModel.(type) {
 	case *llama.LLama:
 		fn = func() ([]float32, error) {
-			return model.Embeddings(s)
+			predictOptions := buildLLamaPredictOptions(c)
+			return model.Embeddings(s, predictOptions...)
 		}
 	default:
 		fn = func() ([]float32, error) {
@@ -81,6 +82,61 @@ func ModelEmbedding(s string, loader *model.ModelLoader, c Config) (func() ([]fl
 	}, nil
 }
 
+func buildLLamaPredictOptions(c Config) []llama.PredictOption {
+	// Generate the prediction using the language model
+	predictOptions := []llama.PredictOption{
+		llama.SetTemperature(c.Temperature),
+		llama.SetTopP(c.TopP),
+		llama.SetTopK(c.TopK),
+		llama.SetTokens(c.Maxtokens),
+		llama.SetThreads(c.Threads),
+	}
+
+	if c.Mirostat != 0 {
+		predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat))
+	}
+
+	if c.MirostatETA != 0 {
+		predictOptions = append(predictOptions, llama.SetMirostatETA(c.MirostatETA))
+	}
+
+	if c.MirostatTAU != 0 {
+		predictOptions = append(predictOptions, llama.SetMirostatTAU(c.MirostatTAU))
+	}
+
+	if c.Debug {
+		predictOptions = append(predictOptions, llama.Debug)
+	}
+
+	predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...))
+
+	if c.RepeatPenalty != 0 {
+		predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty))
+	}
+
+	if c.Keep != 0 {
+		predictOptions = append(predictOptions, llama.SetNKeep(c.Keep))
+	}
+
+	if c.Batch != 0 {
+		predictOptions = append(predictOptions, llama.SetBatch(c.Batch))
+	}
+
+	if c.F16 {
+		predictOptions = append(predictOptions, llama.EnableF16KV)
+	}
+
+	if c.IgnoreEOS {
+		predictOptions = append(predictOptions, llama.IgnoreEOS)
+	}
+
+	if c.Seed != 0 {
+		predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
+	}
+
+	return predictOptions
+}
+
 func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback func(string) bool) (func() (string, error), error) {
 	supportStreams := false
 	modelFile := c.Model
@@ -198,56 +254,7 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
 				model.SetTokenCallback(tokenCallback)
 			}
 
-			// Generate the prediction using the language model
-			predictOptions := []llama.PredictOption{
-				llama.SetTemperature(c.Temperature),
-				llama.SetTopP(c.TopP),
-				llama.SetTopK(c.TopK),
-				llama.SetTokens(c.Maxtokens),
-				llama.SetThreads(c.Threads),
-			}
-
-			if c.Mirostat != 0 {
-				predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat))
-			}
-
-			if c.MirostatETA != 0 {
-				predictOptions = append(predictOptions, llama.SetMirostatETA(c.MirostatETA))
-			}
-
-			if c.MirostatTAU != 0 {
-				predictOptions = append(predictOptions, llama.SetMirostatTAU(c.MirostatTAU))
-			}
-
-			if c.Debug {
-				predictOptions = append(predictOptions, llama.Debug)
-			}
-
-			predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...))
-
-			if c.RepeatPenalty != 0 {
-				predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty))
-			}
-
-			if c.Keep != 0 {
-				predictOptions = append(predictOptions, llama.SetNKeep(c.Keep))
-			}
-
-			if c.Batch != 0 {
-				predictOptions = append(predictOptions, llama.SetBatch(c.Batch))
-			}
-
-			if c.F16 {
-				predictOptions = append(predictOptions, llama.EnableF16KV)
-			}
-
-			if c.IgnoreEOS {
-				predictOptions = append(predictOptions, llama.IgnoreEOS)
-			}
-
-			if c.Seed != 0 {
-				predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
-			}
+			predictOptions := buildLLamaPredictOptions(c)
 
 			str, er := model.Predict(
 				s,

From b49721cdd1dc66a57919564843cd32cdb4be1d3f Mon Sep 17 00:00:00 2001
From: mudler <mudler@mocaccino.org>
Date: Fri, 5 May 2023 18:05:10 +0200
Subject: [PATCH 05/13] fix: respect config from file for backends settings

---
 api/openai.go | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/api/openai.go b/api/openai.go
index 39212e6..6061e35 100644
--- a/api/openai.go
+++ b/api/openai.go
@@ -262,6 +262,10 @@ func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug
 	if !exists {
 		config = &Config{
 			OpenAIRequest: defaultRequest(modelFile),
+			ContextSize:   ctx,
+			Threads:       threads,
+			F16:           f16,
+			Debug:         debug,
 		}
 	} else {
 		config = &cfg
@@ -270,20 +274,6 @@ func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug
 	// Set the parameters for the language model prediction
 	updateConfig(config, input)
 
-	if threads != 0 {
-		config.Threads = threads
-	}
-	if ctx != 0 {
-		config.ContextSize = ctx
-	}
-	if f16 {
-		config.F16 = true
-	}
-
-	if debug {
-		config.Debug = true
-	}
-
 	return config, input, nil
 }
 

From e62ee2bc0648b92393ec5c73670dbbbbdacc7b8b Mon Sep 17 00:00:00 2001
From: mudler <mudler@mocaccino.org>
Date: Fri, 5 May 2023 18:05:29 +0200
Subject: [PATCH 06/13] fix: remove trailing 0s from embeddings

This happens when no max_tokens are set, so by default go-llama
allocates more space for the slice and padding happens.
---
 api/prediction.go | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/api/prediction.go b/api/prediction.go
index 47229d6..4bfb687 100644
--- a/api/prediction.go
+++ b/api/prediction.go
@@ -78,7 +78,19 @@ func ModelEmbedding(s string, loader *model.ModelLoader, c Config) (func() ([]fl
 		l.Lock()
 		defer l.Unlock()
 
-		return fn()
+		embeds, err := fn()
+		if err != nil {
+			return embeds, err
+		}
+		// Remove trailing 0s
+		for i := len(embeds) - 1; i >= 0; i-- {
+			if embeds[i] == 0.0 {
+				embeds = embeds[:i]
+			} else {
+				break
+			}
+		}
+		return embeds, nil
 	}, nil
 }
 

From 3ff9bbd2175a24925b26d7f87376a9b7c02e7776 Mon Sep 17 00:00:00 2001
From: mudler <mudler@mocaccino.org>
Date: Fri, 5 May 2023 19:04:52 +0200
Subject: [PATCH 07/13] examples: add rwkv script folder

---
 examples/rwkv/scripts/build.sh | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100755 examples/rwkv/scripts/build.sh

diff --git a/examples/rwkv/scripts/build.sh b/examples/rwkv/scripts/build.sh
new file mode 100755
index 0000000..2ecedbe
--- /dev/null
+++ b/examples/rwkv/scripts/build.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+set -ex
+
+URL=$1
+OUT=$2
+FILENAME=$(basename $URL)
+
+wget -nc $URL -O /build/$FILENAME
+
+python3 /build/rwkv.cpp/rwkv/convert_pytorch_to_ggml.py /build/$FILENAME /build/float-model float16
+python3 /build/rwkv.cpp/rwkv/quantize.py /build/float-model $OUT Q4_2

From d094381e5dfd58168179f4d113ff5a5d61a4fd3b Mon Sep 17 00:00:00 2001
From: mudler <mudler@mocaccino.org>
Date: Fri, 5 May 2023 19:50:05 +0200
Subject: [PATCH 08/13] ci: lower fixtures spec

---
 README.md                  | 2 ++
 tests/fixtures/config.yaml | 6 ++----
 tests/fixtures/gpt4.yaml   | 3 +--
 tests/fixtures/gpt4_2.yaml | 3 +--
 4 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index b51fc79..e8850e2 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,8 @@
 
 LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
 
+See [examples on how to integrate LocalAI](https://github.com/go-skynet/LocalAI/tree/master/examples/).
+
 ### News
 
 - 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint
diff --git a/tests/fixtures/config.yaml b/tests/fixtures/config.yaml
index 866b74b..9910ffa 100644
--- a/tests/fixtures/config.yaml
+++ b/tests/fixtures/config.yaml
@@ -1,8 +1,7 @@
 - name: list1
   parameters:
     model: testmodel
-  context_size: 512
-  threads: 10
+  context_size: 128
   stopwords:
   - "HUMAN:"
   - "### Response:"
@@ -15,8 +14,7 @@
 - name: list2
   parameters:
     model: testmodel
-  context_size: 512
-  threads: 10
+  context_size: 128
   stopwords:
   - "HUMAN:"
   - "### Response:"
diff --git a/tests/fixtures/gpt4.yaml b/tests/fixtures/gpt4.yaml
index c2f9bec..54743bd 100644
--- a/tests/fixtures/gpt4.yaml
+++ b/tests/fixtures/gpt4.yaml
@@ -1,8 +1,7 @@
 name: gpt4all
 parameters:
   model: testmodel
-context_size: 512
-threads: 10
+context_size: 128
 stopwords:
 - "HUMAN:"
 - "### Response:"
diff --git a/tests/fixtures/gpt4_2.yaml b/tests/fixtures/gpt4_2.yaml
index 60722f4..43ef5a1 100644
--- a/tests/fixtures/gpt4_2.yaml
+++ b/tests/fixtures/gpt4_2.yaml
@@ -1,8 +1,7 @@
 name: gpt4all-2
 parameters:
   model: testmodel
-context_size: 1024
-threads: 5
+context_size: 128
 stopwords:
 - "HUMAN:"
 - "### Response:"

From ad301e6ed724123fb5c4b1c86d9499f66f5a690f Mon Sep 17 00:00:00 2001
From: mudler <mudler@mocaccino.org>
Date: Fri, 5 May 2023 21:56:31 +0200
Subject: [PATCH 09/13] example(add): document query example

---
 examples/query_data/.gitignore                |  1 +
 examples/query_data/README.md                 | 49 +++++++++++++++++++
 examples/query_data/data/.keep                |  0
 examples/query_data/docker-compose.yml        | 15 ++++++
 examples/query_data/models/completion.tmpl    |  1 +
 examples/query_data/models/embeddings.yaml    | 18 +++++++
 examples/query_data/models/gpt-3.5-turbo.yaml | 18 +++++++
 examples/query_data/models/wizardlm.tmpl      |  3 ++
 examples/query_data/query.py                  | 32 ++++++++++++
 examples/query_data/store.py                  | 25 ++++++++++
 10 files changed, 162 insertions(+)
 create mode 100644 examples/query_data/.gitignore
 create mode 100644 examples/query_data/README.md
 create mode 100644 examples/query_data/data/.keep
 create mode 100644 examples/query_data/docker-compose.yml
 create mode 100644 examples/query_data/models/completion.tmpl
 create mode 100644 examples/query_data/models/embeddings.yaml
 create mode 100644 examples/query_data/models/gpt-3.5-turbo.yaml
 create mode 100644 examples/query_data/models/wizardlm.tmpl
 create mode 100644 examples/query_data/query.py
 create mode 100644 examples/query_data/store.py

diff --git a/examples/query_data/.gitignore b/examples/query_data/.gitignore
new file mode 100644
index 0000000..29ea9d5
--- /dev/null
+++ b/examples/query_data/.gitignore
@@ -0,0 +1 @@
+storage/
\ No newline at end of file
diff --git a/examples/query_data/README.md b/examples/query_data/README.md
new file mode 100644
index 0000000..fb32442
--- /dev/null
+++ b/examples/query_data/README.md
@@ -0,0 +1,49 @@
+# Data query example
+
+This example makes use of [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
+
+It loosely follows [the quickstart](https://gpt-index.readthedocs.io/en/stable/guides/primer/usage_pattern.html).
+
+## Requirements
+
+For this in order to work, you will need a model compatible with the `llama.cpp` backend. This is will not work with gpt4all.
+
+The example uses `WizardLM`. Edit the config files in `models/` accordingly to specify the model you use (change `HERE`).
+
+You will also need a training data set. Copy that over `data`.
+
+## Setup
+
+Start the API:
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/query_data
+
+# Copy your models, edit config files accordingly
+
+# start with docker-compose
+docker-compose up -d --build
+```
+
+### Create a storage:
+
+```bash
+export OPENAI_API_BASE=http://localhost:8080/v1
+export OPENAI_API_KEY=sk-
+
+python store.py
+```
+
+After it finishes, a directory "storage" will be created with the vector index database.
+
+## Query
+
+```bash
+export OPENAI_API_BASE=http://localhost:8080/v1
+export OPENAI_API_KEY=sk-
+
+python query.py
+```
\ No newline at end of file
diff --git a/examples/query_data/data/.keep b/examples/query_data/data/.keep
new file mode 100644
index 0000000..e69de29
diff --git a/examples/query_data/docker-compose.yml b/examples/query_data/docker-compose.yml
new file mode 100644
index 0000000..a59edfc
--- /dev/null
+++ b/examples/query_data/docker-compose.yml
@@ -0,0 +1,15 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - 8080:8080
+    env_file:
+      - .env
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai"]
diff --git a/examples/query_data/models/completion.tmpl b/examples/query_data/models/completion.tmpl
new file mode 100644
index 0000000..9867cfc
--- /dev/null
+++ b/examples/query_data/models/completion.tmpl
@@ -0,0 +1 @@
+{{.Input}}
\ No newline at end of file
diff --git a/examples/query_data/models/embeddings.yaml b/examples/query_data/models/embeddings.yaml
new file mode 100644
index 0000000..2173975
--- /dev/null
+++ b/examples/query_data/models/embeddings.yaml
@@ -0,0 +1,18 @@
+name: text-embedding-ada-002
+parameters:
+  model: HERE
+  top_k: 80
+  temperature: 0.2
+  top_p: 0.7
+context_size: 1024
+threads: 14
+stopwords:
+- "HUMAN:"
+- "GPT:"
+roles:
+  user: " "
+  system: " "
+embeddings: true
+template:
+  completion: completion
+  chat: gpt4all
diff --git a/examples/query_data/models/gpt-3.5-turbo.yaml b/examples/query_data/models/gpt-3.5-turbo.yaml
new file mode 100644
index 0000000..9cdb4a2
--- /dev/null
+++ b/examples/query_data/models/gpt-3.5-turbo.yaml
@@ -0,0 +1,18 @@
+name: gpt-3.5-turbo
+parameters:
+  model: HERE
+  top_k: 80
+  temperature: 0.2
+  top_p: 0.7
+context_size: 1024
+threads: 14
+embeddings: true
+stopwords:
+- "HUMAN:"
+- "GPT:"
+roles:
+  user: " "
+  system: " "
+template:
+  completion: completion
+  chat: wizardlm
diff --git a/examples/query_data/models/wizardlm.tmpl b/examples/query_data/models/wizardlm.tmpl
new file mode 100644
index 0000000..e7b1985
--- /dev/null
+++ b/examples/query_data/models/wizardlm.tmpl
@@ -0,0 +1,3 @@
+{{.Input}}
+
+### Response:
\ No newline at end of file
diff --git a/examples/query_data/query.py b/examples/query_data/query.py
new file mode 100644
index 0000000..05a288f
--- /dev/null
+++ b/examples/query_data/query.py
@@ -0,0 +1,32 @@
+import os
+
+# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
+# os.environ['OPENAI_API_KEY']= ""
+
+from llama_index import   LLMPredictor, PromptHelper, ServiceContext
+from langchain.llms.openai import OpenAI
+from llama_index import StorageContext, load_index_from_storage
+
+
+# This example uses text-davinci-003 by default; feel free to change if desired
+llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo",openai_api_base="http://localhost:8080/v1"))
+
+# Configure prompt parameters and initialise helper
+max_input_size = 1024
+num_output = 256
+max_chunk_overlap = 20
+
+prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
+
+# Load documents from the 'data' directory
+service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
+
+# rebuild storage context
+storage_context = StorageContext.from_defaults(persist_dir='./storage')
+
+# load index
+index = load_index_from_storage(storage_context,     service_context=service_context,    )
+
+query_engine = index.as_query_engine()
+response = query_engine.query("XXXXXX your question here XXXXX")
+print(response)
\ No newline at end of file
diff --git a/examples/query_data/store.py b/examples/query_data/store.py
new file mode 100644
index 0000000..56d83bb
--- /dev/null
+++ b/examples/query_data/store.py
@@ -0,0 +1,25 @@
+import os
+
+# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
+# os.environ['OPENAI_API_KEY']= ""
+
+from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, LLMPredictor, PromptHelper, ServiceContext
+from langchain.llms.openai import OpenAI
+from llama_index import StorageContext, load_index_from_storage
+
+# This example uses text-davinci-003 by default; feel free to change if desired
+llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo",openai_api_base="http://localhost:8080/v1"))
+
+# Configure prompt parameters and initialise helper
+max_input_size = 256
+num_output = 256
+max_chunk_overlap = 10
+
+prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
+
+# Load documents from the 'data' directory
+documents = SimpleDirectoryReader('data').load_data()
+service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 257)
+index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
+index.storage_context.persist(persist_dir="./storage")
+

From ec2adc2c031e106a23bc1438bb3f8d6517f22ff8 Mon Sep 17 00:00:00 2001
From: mudler <mudler@c3os.io>
Date: Fri, 5 May 2023 22:07:01 +0200
Subject: [PATCH 10/13] tests: use 3 cores

---
 api/api_test.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/api/api_test.go b/api/api_test.go
index 9682a21..6f21f21 100644
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -23,7 +23,7 @@ var _ = Describe("API test", func() {
 	Context("API query", func() {
 		BeforeEach(func() {
 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
-			app = App("", modelLoader, 1, 512, false, true, true)
+			app = App("", modelLoader, 3, 512, false, true, true)
 			go app.Listen("127.0.0.1:9090")
 
 			defaultConfig := openai.DefaultConfig("")
@@ -87,7 +87,7 @@ var _ = Describe("API test", func() {
 	Context("Config file", func() {
 		BeforeEach(func() {
 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
-			app = App(os.Getenv("CONFIG_FILE"), modelLoader, 1, 512, false, true, true)
+			app = App(os.Getenv("CONFIG_FILE"), modelLoader, 3, 512, false, true, true)
 			go app.Listen("127.0.0.1:9090")
 
 			defaultConfig := openai.DefaultConfig("")

From 009ee47fe2eb4347b08d01e6e741e13305708acc Mon Sep 17 00:00:00 2001
From: mudler <mudler@c3os.io>
Date: Fri, 5 May 2023 22:51:20 +0200
Subject: [PATCH 11/13] Don't allow 0 as thread count

---
 api/config.go              | 173 +++++++++++++++++++++++++++++++++++++
 api/openai.go              | 162 ----------------------------------
 tests/fixtures/config.yaml |  10 ++-
 tests/fixtures/gpt4.yaml   |   5 +-
 tests/fixtures/gpt4_2.yaml |   5 +-
 5 files changed, 189 insertions(+), 166 deletions(-)

diff --git a/api/config.go b/api/config.go
index d5df3de..b032d15 100644
--- a/api/config.go
+++ b/api/config.go
@@ -1,12 +1,16 @@
 package api
 
 import (
+	"encoding/json"
 	"fmt"
 	"io/ioutil"
 	"os"
 	"path/filepath"
 	"strings"
 
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v3"
 )
 
@@ -106,3 +110,172 @@ func (cm ConfigMerger) LoadConfigs(path string) error {
 
 	return nil
 }
+
+func updateConfig(config *Config, input *OpenAIRequest) {
+	if input.Echo {
+		config.Echo = input.Echo
+	}
+	if input.TopK != 0 {
+		config.TopK = input.TopK
+	}
+	if input.TopP != 0 {
+		config.TopP = input.TopP
+	}
+
+	if input.Temperature != 0 {
+		config.Temperature = input.Temperature
+	}
+
+	if input.Maxtokens != 0 {
+		config.Maxtokens = input.Maxtokens
+	}
+
+	switch stop := input.Stop.(type) {
+	case string:
+		if stop != "" {
+			config.StopWords = append(config.StopWords, stop)
+		}
+	case []interface{}:
+		for _, pp := range stop {
+			if s, ok := pp.(string); ok {
+				config.StopWords = append(config.StopWords, s)
+			}
+		}
+	}
+
+	if input.RepeatPenalty != 0 {
+		config.RepeatPenalty = input.RepeatPenalty
+	}
+
+	if input.Keep != 0 {
+		config.Keep = input.Keep
+	}
+
+	if input.Batch != 0 {
+		config.Batch = input.Batch
+	}
+
+	if input.F16 {
+		config.F16 = input.F16
+	}
+
+	if input.IgnoreEOS {
+		config.IgnoreEOS = input.IgnoreEOS
+	}
+
+	if input.Seed != 0 {
+		config.Seed = input.Seed
+	}
+
+	if input.Mirostat != 0 {
+		config.Mirostat = input.Mirostat
+	}
+
+	if input.MirostatETA != 0 {
+		config.MirostatETA = input.MirostatETA
+	}
+
+	if input.MirostatTAU != 0 {
+		config.MirostatTAU = input.MirostatTAU
+	}
+
+	switch inputs := input.Input.(type) {
+	case string:
+		if inputs != "" {
+			config.InputStrings = append(config.InputStrings, inputs)
+		}
+	case []interface{}:
+		for _, pp := range inputs {
+			if s, ok := pp.(string); ok {
+				config.InputStrings = append(config.InputStrings, s)
+			}
+		}
+	}
+
+	switch p := input.Prompt.(type) {
+	case string:
+		config.PromptStrings = append(config.PromptStrings, p)
+	case []interface{}:
+		for _, pp := range p {
+			if s, ok := pp.(string); ok {
+				config.PromptStrings = append(config.PromptStrings, s)
+			}
+		}
+	}
+}
+
+func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
+	input := new(OpenAIRequest)
+	// Get input data from the request body
+	if err := c.BodyParser(input); err != nil {
+		return nil, nil, err
+	}
+
+	modelFile := input.Model
+
+	if c.Params("model") != "" {
+		modelFile = c.Params("model")
+	}
+
+	received, _ := json.Marshal(input)
+
+	log.Debug().Msgf("Request received: %s", string(received))
+
+	// Set model from bearer token, if available
+	bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
+	bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
+
+	// If no model was specified, take the first available
+	if modelFile == "" && !bearerExists {
+		models, _ := loader.ListModels()
+		if len(models) > 0 {
+			modelFile = models[0]
+			log.Debug().Msgf("No model specified, using: %s", modelFile)
+		} else {
+			log.Debug().Msgf("No model specified, returning error")
+			return nil, nil, fmt.Errorf("no model specified")
+		}
+	}
+
+	// If a model is found in bearer token takes precedence
+	if bearerExists {
+		log.Debug().Msgf("Using model from bearer token: %s", bearer)
+		modelFile = bearer
+	}
+
+	// Load a config file if present after the model name
+	modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
+	if _, err := os.Stat(modelConfig); err == nil {
+		if err := cm.LoadConfig(modelConfig); err != nil {
+			return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
+		}
+	}
+
+	var config *Config
+	cfg, exists := cm[modelFile]
+	if !exists {
+		config = &Config{
+			OpenAIRequest: defaultRequest(modelFile),
+			ContextSize:   ctx,
+			Threads:       threads,
+			F16:           f16,
+			Debug:         debug,
+		}
+	} else {
+		config = &cfg
+	}
+
+	// Set the parameters for the language model prediction
+	updateConfig(config, input)
+
+	// Don't allow 0 as setting
+	if config.Threads == 0 {
+		if threads != 0 {
+			config.Threads = threads
+		} else {
+			config.Threads = 4
+		}
+	}
+
+	return config, input, nil
+}
diff --git a/api/openai.go b/api/openai.go
index 6061e35..d98dc56 100644
--- a/api/openai.go
+++ b/api/openai.go
@@ -5,8 +5,6 @@ import (
 	"bytes"
 	"encoding/json"
 	"fmt"
-	"os"
-	"path/filepath"
 	"strings"
 
 	model "github.com/go-skynet/LocalAI/pkg/model"
@@ -117,166 +115,6 @@ func defaultRequest(modelFile string) OpenAIRequest {
 	}
 }
 
-func updateConfig(config *Config, input *OpenAIRequest) {
-	if input.Echo {
-		config.Echo = input.Echo
-	}
-	if input.TopK != 0 {
-		config.TopK = input.TopK
-	}
-	if input.TopP != 0 {
-		config.TopP = input.TopP
-	}
-
-	if input.Temperature != 0 {
-		config.Temperature = input.Temperature
-	}
-
-	if input.Maxtokens != 0 {
-		config.Maxtokens = input.Maxtokens
-	}
-
-	switch stop := input.Stop.(type) {
-	case string:
-		if stop != "" {
-			config.StopWords = append(config.StopWords, stop)
-		}
-	case []interface{}:
-		for _, pp := range stop {
-			if s, ok := pp.(string); ok {
-				config.StopWords = append(config.StopWords, s)
-			}
-		}
-	}
-
-	if input.RepeatPenalty != 0 {
-		config.RepeatPenalty = input.RepeatPenalty
-	}
-
-	if input.Keep != 0 {
-		config.Keep = input.Keep
-	}
-
-	if input.Batch != 0 {
-		config.Batch = input.Batch
-	}
-
-	if input.F16 {
-		config.F16 = input.F16
-	}
-
-	if input.IgnoreEOS {
-		config.IgnoreEOS = input.IgnoreEOS
-	}
-
-	if input.Seed != 0 {
-		config.Seed = input.Seed
-	}
-
-	if input.Mirostat != 0 {
-		config.Mirostat = input.Mirostat
-	}
-
-	if input.MirostatETA != 0 {
-		config.MirostatETA = input.MirostatETA
-	}
-
-	if input.MirostatTAU != 0 {
-		config.MirostatTAU = input.MirostatTAU
-	}
-
-	switch inputs := input.Input.(type) {
-	case string:
-		if inputs != "" {
-			config.InputStrings = append(config.InputStrings, inputs)
-		}
-	case []interface{}:
-		for _, pp := range inputs {
-			if s, ok := pp.(string); ok {
-				config.InputStrings = append(config.InputStrings, s)
-			}
-		}
-	}
-
-	switch p := input.Prompt.(type) {
-	case string:
-		config.PromptStrings = append(config.PromptStrings, p)
-	case []interface{}:
-		for _, pp := range p {
-			if s, ok := pp.(string); ok {
-				config.PromptStrings = append(config.PromptStrings, s)
-			}
-		}
-	}
-}
-
-func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
-	input := new(OpenAIRequest)
-	// Get input data from the request body
-	if err := c.BodyParser(input); err != nil {
-		return nil, nil, err
-	}
-
-	modelFile := input.Model
-
-	if c.Params("model") != "" {
-		modelFile = c.Params("model")
-	}
-
-	received, _ := json.Marshal(input)
-
-	log.Debug().Msgf("Request received: %s", string(received))
-
-	// Set model from bearer token, if available
-	bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
-	bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
-
-	// If no model was specified, take the first available
-	if modelFile == "" && !bearerExists {
-		models, _ := loader.ListModels()
-		if len(models) > 0 {
-			modelFile = models[0]
-			log.Debug().Msgf("No model specified, using: %s", modelFile)
-		} else {
-			log.Debug().Msgf("No model specified, returning error")
-			return nil, nil, fmt.Errorf("no model specified")
-		}
-	}
-
-	// If a model is found in bearer token takes precedence
-	if bearerExists {
-		log.Debug().Msgf("Using model from bearer token: %s", bearer)
-		modelFile = bearer
-	}
-
-	// Load a config file if present after the model name
-	modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
-	if _, err := os.Stat(modelConfig); err == nil {
-		if err := cm.LoadConfig(modelConfig); err != nil {
-			return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
-		}
-	}
-
-	var config *Config
-	cfg, exists := cm[modelFile]
-	if !exists {
-		config = &Config{
-			OpenAIRequest: defaultRequest(modelFile),
-			ContextSize:   ctx,
-			Threads:       threads,
-			F16:           f16,
-			Debug:         debug,
-		}
-	} else {
-		config = &cfg
-	}
-
-	// Set the parameters for the language model prediction
-	updateConfig(config, input)
-
-	return config, input, nil
-}
-
 // https://platform.openai.com/docs/api-reference/completions
 func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
diff --git a/tests/fixtures/config.yaml b/tests/fixtures/config.yaml
index 9910ffa..3deabf9 100644
--- a/tests/fixtures/config.yaml
+++ b/tests/fixtures/config.yaml
@@ -1,7 +1,10 @@
 - name: list1
   parameters:
     model: testmodel
-  context_size: 128
+    top_p: 80
+    top_k: 0.9
+    temperature: 0.1
+  context_size: 10
   stopwords:
   - "HUMAN:"
   - "### Response:"
@@ -13,8 +16,11 @@
     chat: ggml-gpt4all-j
 - name: list2
   parameters:
+    top_p: 80
+    top_k: 0.9
+    temperature: 0.1
     model: testmodel
-  context_size: 128
+  context_size: 10
   stopwords:
   - "HUMAN:"
   - "### Response:"
diff --git a/tests/fixtures/gpt4.yaml b/tests/fixtures/gpt4.yaml
index 54743bd..77b72b3 100644
--- a/tests/fixtures/gpt4.yaml
+++ b/tests/fixtures/gpt4.yaml
@@ -1,7 +1,10 @@
 name: gpt4all
 parameters:
   model: testmodel
-context_size: 128
+  top_p: 80
+  top_k: 0.9
+  temperature: 0.1
+context_size: 10
 stopwords:
 - "HUMAN:"
 - "### Response:"
diff --git a/tests/fixtures/gpt4_2.yaml b/tests/fixtures/gpt4_2.yaml
index 43ef5a1..62d9fdb 100644
--- a/tests/fixtures/gpt4_2.yaml
+++ b/tests/fixtures/gpt4_2.yaml
@@ -1,7 +1,10 @@
 name: gpt4all-2
 parameters:
   model: testmodel
-context_size: 128
+  top_p: 80
+  top_k: 0.9
+  temperature: 0.1
+context_size: 10
 stopwords:
 - "HUMAN:"
 - "### Response:"

From 75b25297fde42c1ec1c3d72dfd616720aaf944eb Mon Sep 17 00:00:00 2001
From: mudler <mudler@c3os.io>
Date: Fri, 5 May 2023 22:51:30 +0200
Subject: [PATCH 12/13] tests: run with ginkgo

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 8bc7660..615e7a8 100644
--- a/Makefile
+++ b/Makefile
@@ -130,7 +130,7 @@ test-models/testmodel:
 
 test: prepare test-models/testmodel
 	cp tests/fixtures/* test-models
-	@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) test -v -timeout 30m ./...
+	@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./...
 
 ## Help:
 help: ## Show this help.

From 8c8cf38d4d4347ea1f7b712dd14122308ddd2711 Mon Sep 17 00:00:00 2001
From: mudler <mudler@mocaccino.org>
Date: Fri, 5 May 2023 23:24:34 +0200
Subject: [PATCH 13/13] tests: use 1 core

---
 api/api_test.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/api/api_test.go b/api/api_test.go
index 6f21f21..9682a21 100644
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -23,7 +23,7 @@ var _ = Describe("API test", func() {
 	Context("API query", func() {
 		BeforeEach(func() {
 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
-			app = App("", modelLoader, 3, 512, false, true, true)
+			app = App("", modelLoader, 1, 512, false, true, true)
 			go app.Listen("127.0.0.1:9090")
 
 			defaultConfig := openai.DefaultConfig("")
@@ -87,7 +87,7 @@ var _ = Describe("API test", func() {
 	Context("Config file", func() {
 		BeforeEach(func() {
 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
-			app = App(os.Getenv("CONFIG_FILE"), modelLoader, 3, 512, false, true, true)
+			app = App(os.Getenv("CONFIG_FILE"), modelLoader, 1, 512, false, true, true)
 			go app.Listen("127.0.0.1:9090")
 
 			defaultConfig := openai.DefaultConfig("")