Mixed enhancements (#196)

agent v1.7.0
Ettore Di Giacinto 2 years ago committed by GitHub
commit 7e5fe35ae4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      Makefile
  2. 2
      README.md
  3. 11
      api/api.go
  4. 175
      api/config.go
  5. 220
      api/openai.go
  6. 124
      api/prediction.go
  7. 1
      examples/query_data/.gitignore
  8. 49
      examples/query_data/README.md
  9. 0
      examples/query_data/data/.keep
  10. 15
      examples/query_data/docker-compose.yml
  11. 1
      examples/query_data/models/completion.tmpl
  12. 18
      examples/query_data/models/embeddings.yaml
  13. 18
      examples/query_data/models/gpt-3.5-turbo.yaml
  14. 3
      examples/query_data/models/wizardlm.tmpl
  15. 32
      examples/query_data/query.py
  16. 25
      examples/query_data/store.py
  17. 11
      examples/rwkv/scripts/build.sh
  18. 12
      tests/fixtures/config.yaml
  19. 6
      tests/fixtures/gpt4.yaml
  20. 6
      tests/fixtures/gpt4_2.yaml

@ -130,7 +130,7 @@ test-models/testmodel:
test: prepare test-models/testmodel test: prepare test-models/testmodel
cp tests/fixtures/* test-models cp tests/fixtures/* test-models
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) test -v -timeout 30m ./... @C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./...
## Help: ## Help:
help: ## Show this help. help: ## Show this help.

@ -19,6 +19,8 @@
LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud). LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
See [examples on how to integrate LocalAI](https://github.com/go-skynet/LocalAI/tree/master/examples/).
### News ### News
- 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint - 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint

@ -6,6 +6,7 @@ import (
model "github.com/go-skynet/LocalAI/pkg/model" model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/cors" "github.com/gofiber/fiber/v2/middleware/cors"
"github.com/gofiber/fiber/v2/middleware/logger"
"github.com/gofiber/fiber/v2/middleware/recover" "github.com/gofiber/fiber/v2/middleware/recover"
"github.com/rs/zerolog" "github.com/rs/zerolog"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
@ -40,6 +41,12 @@ func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16
}, },
}) })
if debug {
app.Use(logger.New(logger.Config{
Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
}))
}
cm := make(ConfigMerger) cm := make(ConfigMerger)
if err := cm.LoadConfigs(loader.ModelPath); err != nil { if err := cm.LoadConfigs(loader.ModelPath); err != nil {
log.Error().Msgf("error loading config files: %s", err.Error()) log.Error().Msgf("error loading config files: %s", err.Error())
@ -73,6 +80,10 @@ func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16
app.Post("/v1/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16)) app.Post("/v1/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16)) app.Post("/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
// /v1/engines/{engine_id}/embeddings
app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Get("/v1/models", listModels(loader, cm)) app.Get("/v1/models", listModels(loader, cm))
app.Get("/models", listModels(loader, cm)) app.Get("/models", listModels(loader, cm))

@ -1,12 +1,16 @@
package api package api
import ( import (
"encoding/json"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
) )
@ -27,6 +31,8 @@ type Config struct {
MirostatETA float64 `yaml:"mirostat_eta"` MirostatETA float64 `yaml:"mirostat_eta"`
MirostatTAU float64 `yaml:"mirostat_tau"` MirostatTAU float64 `yaml:"mirostat_tau"`
Mirostat int `yaml:"mirostat"` Mirostat int `yaml:"mirostat"`
PromptStrings, InputStrings []string
} }
type TemplateConfig struct { type TemplateConfig struct {
@ -104,3 +110,172 @@ func (cm ConfigMerger) LoadConfigs(path string) error {
return nil return nil
} }
func updateConfig(config *Config, input *OpenAIRequest) {
if input.Echo {
config.Echo = input.Echo
}
if input.TopK != 0 {
config.TopK = input.TopK
}
if input.TopP != 0 {
config.TopP = input.TopP
}
if input.Temperature != 0 {
config.Temperature = input.Temperature
}
if input.Maxtokens != 0 {
config.Maxtokens = input.Maxtokens
}
switch stop := input.Stop.(type) {
case string:
if stop != "" {
config.StopWords = append(config.StopWords, stop)
}
case []interface{}:
for _, pp := range stop {
if s, ok := pp.(string); ok {
config.StopWords = append(config.StopWords, s)
}
}
}
if input.RepeatPenalty != 0 {
config.RepeatPenalty = input.RepeatPenalty
}
if input.Keep != 0 {
config.Keep = input.Keep
}
if input.Batch != 0 {
config.Batch = input.Batch
}
if input.F16 {
config.F16 = input.F16
}
if input.IgnoreEOS {
config.IgnoreEOS = input.IgnoreEOS
}
if input.Seed != 0 {
config.Seed = input.Seed
}
if input.Mirostat != 0 {
config.Mirostat = input.Mirostat
}
if input.MirostatETA != 0 {
config.MirostatETA = input.MirostatETA
}
if input.MirostatTAU != 0 {
config.MirostatTAU = input.MirostatTAU
}
switch inputs := input.Input.(type) {
case string:
if inputs != "" {
config.InputStrings = append(config.InputStrings, inputs)
}
case []interface{}:
for _, pp := range inputs {
if s, ok := pp.(string); ok {
config.InputStrings = append(config.InputStrings, s)
}
}
}
switch p := input.Prompt.(type) {
case string:
config.PromptStrings = append(config.PromptStrings, p)
case []interface{}:
for _, pp := range p {
if s, ok := pp.(string); ok {
config.PromptStrings = append(config.PromptStrings, s)
}
}
}
}
func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
input := new(OpenAIRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return nil, nil, err
}
modelFile := input.Model
if c.Params("model") != "" {
modelFile = c.Params("model")
}
received, _ := json.Marshal(input)
log.Debug().Msgf("Request received: %s", string(received))
// Set model from bearer token, if available
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
// If no model was specified, take the first available
if modelFile == "" && !bearerExists {
models, _ := loader.ListModels()
if len(models) > 0 {
modelFile = models[0]
log.Debug().Msgf("No model specified, using: %s", modelFile)
} else {
log.Debug().Msgf("No model specified, returning error")
return nil, nil, fmt.Errorf("no model specified")
}
}
// If a model is found in bearer token takes precedence
if bearerExists {
log.Debug().Msgf("Using model from bearer token: %s", bearer)
modelFile = bearer
}
// Load a config file if present after the model name
modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
if _, err := os.Stat(modelConfig); err == nil {
if err := cm.LoadConfig(modelConfig); err != nil {
return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
}
}
var config *Config
cfg, exists := cm[modelFile]
if !exists {
config = &Config{
OpenAIRequest: defaultRequest(modelFile),
ContextSize: ctx,
Threads: threads,
F16: f16,
Debug: debug,
}
} else {
config = &cfg
}
// Set the parameters for the language model prediction
updateConfig(config, input)
// Don't allow 0 as setting
if config.Threads == 0 {
if threads != 0 {
config.Threads = threads
} else {
config.Threads = 4
}
}
return config, input, nil
}

@ -5,8 +5,6 @@ import (
"bytes" "bytes"
"encoding/json" "encoding/json"
"fmt" "fmt"
"os"
"path/filepath"
"strings" "strings"
model "github.com/go-skynet/LocalAI/pkg/model" model "github.com/go-skynet/LocalAI/pkg/model"
@ -75,8 +73,8 @@ type OpenAIRequest struct {
Prompt interface{} `json:"prompt" yaml:"prompt"` Prompt interface{} `json:"prompt" yaml:"prompt"`
// Edit endpoint // Edit endpoint
Instruction string `json:"instruction" yaml:"instruction"` Instruction string `json:"instruction" yaml:"instruction"`
Input string `json:"input" yaml:"input"` Input interface{} `json:"input" yaml:"input"`
Stop interface{} `json:"stop" yaml:"stop"` Stop interface{} `json:"stop" yaml:"stop"`
@ -117,147 +115,6 @@ func defaultRequest(modelFile string) OpenAIRequest {
} }
} }
func updateConfig(config *Config, input *OpenAIRequest) {
if input.Echo {
config.Echo = input.Echo
}
if input.TopK != 0 {
config.TopK = input.TopK
}
if input.TopP != 0 {
config.TopP = input.TopP
}
if input.Temperature != 0 {
config.Temperature = input.Temperature
}
if input.Maxtokens != 0 {
config.Maxtokens = input.Maxtokens
}
switch stop := input.Stop.(type) {
case string:
if stop != "" {
config.StopWords = append(config.StopWords, stop)
}
case []interface{}:
for _, pp := range stop {
if s, ok := pp.(string); ok {
config.StopWords = append(config.StopWords, s)
}
}
}
if input.RepeatPenalty != 0 {
config.RepeatPenalty = input.RepeatPenalty
}
if input.Keep != 0 {
config.Keep = input.Keep
}
if input.Batch != 0 {
config.Batch = input.Batch
}
if input.F16 {
config.F16 = input.F16
}
if input.IgnoreEOS {
config.IgnoreEOS = input.IgnoreEOS
}
if input.Seed != 0 {
config.Seed = input.Seed
}
if input.Mirostat != 0 {
config.Mirostat = input.Mirostat
}
if input.MirostatETA != 0 {
config.MirostatETA = input.MirostatETA
}
if input.MirostatTAU != 0 {
config.MirostatTAU = input.MirostatTAU
}
}
func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
input := new(OpenAIRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return nil, nil, err
}
modelFile := input.Model
received, _ := json.Marshal(input)
log.Debug().Msgf("Request received: %s", string(received))
// Set model from bearer token, if available
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
// If no model was specified, take the first available
if modelFile == "" && !bearerExists {
models, _ := loader.ListModels()
if len(models) > 0 {
modelFile = models[0]
log.Debug().Msgf("No model specified, using: %s", modelFile)
} else {
log.Debug().Msgf("No model specified, returning error")
return nil, nil, fmt.Errorf("no model specified")
}
}
// If a model is found in bearer token takes precedence
if bearerExists {
log.Debug().Msgf("Using model from bearer token: %s", bearer)
modelFile = bearer
}
// Load a config file if present after the model name
modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
if _, err := os.Stat(modelConfig); err == nil {
if err := cm.LoadConfig(modelConfig); err != nil {
return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
}
}
var config *Config
cfg, exists := cm[modelFile]
if !exists {
config = &Config{
OpenAIRequest: defaultRequest(modelFile),
}
} else {
config = &cfg
}
// Set the parameters for the language model prediction
updateConfig(config, input)
if threads != 0 {
config.Threads = threads
}
if ctx != 0 {
config.ContextSize = ctx
}
if f16 {
config.F16 = true
}
if debug {
config.Debug = true
}
return config, input, nil
}
// https://platform.openai.com/docs/api-reference/completions // https://platform.openai.com/docs/api-reference/completions
func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error { func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
@ -268,19 +125,6 @@ func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
log.Debug().Msgf("Parameter Config: %+v", config) log.Debug().Msgf("Parameter Config: %+v", config)
predInput := []string{}
switch p := input.Prompt.(type) {
case string:
predInput = append(predInput, p)
case []interface{}:
for _, pp := range p {
if s, ok := pp.(string); ok {
predInput = append(predInput, s)
}
}
}
templateFile := config.Model templateFile := config.Model
if config.TemplateConfig.Completion != "" { if config.TemplateConfig.Completion != "" {
@ -288,7 +132,7 @@ func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
} }
var result []Choice var result []Choice
for _, i := range predInput { for _, i := range config.PromptStrings {
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix // A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := loader.TemplatePrefix(templateFile, struct { templatedInput, err := loader.TemplatePrefix(templateFile, struct {
Input string Input string
@ -331,20 +175,26 @@ func embeddingsEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
} }
log.Debug().Msgf("Parameter Config: %+v", config) log.Debug().Msgf("Parameter Config: %+v", config)
items := []Item{}
// get the model function to call for the result for i, s := range config.InputStrings {
embedFn, err := ModelEmbedding(input.Input, loader, *config)
if err != nil {
return err
}
embeddings, err := embedFn() // get the model function to call for the result
if err != nil { embedFn, err := ModelEmbedding(s, loader, *config)
return err if err != nil {
return err
}
embeddings, err := embedFn()
if err != nil {
return err
}
items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
} }
resp := &OpenAIResponse{ resp := &OpenAIResponse{
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Data: []Item{{Embedding: embeddings, Index: 0, Object: "embedding"}}, Data: items,
Object: "list", Object: "list",
} }
@ -480,28 +330,32 @@ func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread
log.Debug().Msgf("Parameter Config: %+v", config) log.Debug().Msgf("Parameter Config: %+v", config)
predInput := input.Input
templateFile := config.Model templateFile := config.Model
if config.TemplateConfig.Edit != "" { if config.TemplateConfig.Edit != "" {
templateFile = config.TemplateConfig.Edit templateFile = config.TemplateConfig.Edit
} }
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix var result []Choice
templatedInput, err := loader.TemplatePrefix(templateFile, struct { for _, i := range config.InputStrings {
Input string // A model can have a "file.bin.tmpl" file associated with a prompt template prefix
Instruction string templatedInput, err := loader.TemplatePrefix(templateFile, struct {
}{Input: predInput, Instruction: input.Instruction}) Input string
if err == nil { Instruction string
predInput = templatedInput }{Input: i})
log.Debug().Msgf("Template found, input modified to: %s", predInput) if err == nil {
} i = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", i)
}
result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) { r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
*c = append(*c, Choice{Text: s}) *c = append(*c, Choice{Text: s})
}, nil) }, nil)
if err != nil { if err != nil {
return err return err
}
result = append(result, r...)
} }
resp := &OpenAIResponse{ resp := &OpenAIResponse{

@ -28,6 +28,7 @@ func defaultLLamaOpts(c Config) []llama.ModelOption {
if c.Embeddings { if c.Embeddings {
llamaOpts = append(llamaOpts, llama.EnableEmbeddings) llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
} }
return llamaOpts return llamaOpts
} }
@ -55,7 +56,8 @@ func ModelEmbedding(s string, loader *model.ModelLoader, c Config) (func() ([]fl
switch model := inferenceModel.(type) { switch model := inferenceModel.(type) {
case *llama.LLama: case *llama.LLama:
fn = func() ([]float32, error) { fn = func() ([]float32, error) {
return model.Embeddings(s) predictOptions := buildLLamaPredictOptions(c)
return model.Embeddings(s, predictOptions...)
} }
default: default:
fn = func() ([]float32, error) { fn = func() ([]float32, error) {
@ -76,10 +78,77 @@ func ModelEmbedding(s string, loader *model.ModelLoader, c Config) (func() ([]fl
l.Lock() l.Lock()
defer l.Unlock() defer l.Unlock()
return fn() embeds, err := fn()
if err != nil {
return embeds, err
}
// Remove trailing 0s
for i := len(embeds) - 1; i >= 0; i-- {
if embeds[i] == 0.0 {
embeds = embeds[:i]
} else {
break
}
}
return embeds, nil
}, nil }, nil
} }
func buildLLamaPredictOptions(c Config) []llama.PredictOption {
// Generate the prediction using the language model
predictOptions := []llama.PredictOption{
llama.SetTemperature(c.Temperature),
llama.SetTopP(c.TopP),
llama.SetTopK(c.TopK),
llama.SetTokens(c.Maxtokens),
llama.SetThreads(c.Threads),
}
if c.Mirostat != 0 {
predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat))
}
if c.MirostatETA != 0 {
predictOptions = append(predictOptions, llama.SetMirostatETA(c.MirostatETA))
}
if c.MirostatTAU != 0 {
predictOptions = append(predictOptions, llama.SetMirostatTAU(c.MirostatTAU))
}
if c.Debug {
predictOptions = append(predictOptions, llama.Debug)
}
predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...))
if c.RepeatPenalty != 0 {
predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty))
}
if c.Keep != 0 {
predictOptions = append(predictOptions, llama.SetNKeep(c.Keep))
}
if c.Batch != 0 {
predictOptions = append(predictOptions, llama.SetBatch(c.Batch))
}
if c.F16 {
predictOptions = append(predictOptions, llama.EnableF16KV)
}
if c.IgnoreEOS {
predictOptions = append(predictOptions, llama.IgnoreEOS)
}
if c.Seed != 0 {
predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
}
return predictOptions
}
func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback func(string) bool) (func() (string, error), error) { func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback func(string) bool) (func() (string, error), error) {
supportStreams := false supportStreams := false
modelFile := c.Model modelFile := c.Model
@ -197,56 +266,7 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
model.SetTokenCallback(tokenCallback) model.SetTokenCallback(tokenCallback)
} }
// Generate the prediction using the language model predictOptions := buildLLamaPredictOptions(c)
predictOptions := []llama.PredictOption{
llama.SetTemperature(c.Temperature),
llama.SetTopP(c.TopP),
llama.SetTopK(c.TopK),
llama.SetTokens(c.Maxtokens),
llama.SetThreads(c.Threads),
}
if c.Mirostat != 0 {
predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat))
}
if c.MirostatETA != 0 {
predictOptions = append(predictOptions, llama.SetMirostatETA(c.MirostatETA))
}
if c.MirostatTAU != 0 {
predictOptions = append(predictOptions, llama.SetMirostatTAU(c.MirostatTAU))
}
if c.Debug {
predictOptions = append(predictOptions, llama.Debug)
}
predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...))
if c.RepeatPenalty != 0 {
predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty))
}
if c.Keep != 0 {
predictOptions = append(predictOptions, llama.SetNKeep(c.Keep))
}
if c.Batch != 0 {
predictOptions = append(predictOptions, llama.SetBatch(c.Batch))
}
if c.F16 {
predictOptions = append(predictOptions, llama.EnableF16KV)
}
if c.IgnoreEOS {
predictOptions = append(predictOptions, llama.IgnoreEOS)
}
if c.Seed != 0 {
predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
}
str, er := model.Predict( str, er := model.Predict(
s, s,

@ -0,0 +1 @@
storage/

@ -0,0 +1,49 @@
# Data query example
This example makes use of [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
It loosely follows [the quickstart](https://gpt-index.readthedocs.io/en/stable/guides/primer/usage_pattern.html).
## Requirements
For this in order to work, you will need a model compatible with the `llama.cpp` backend. This is will not work with gpt4all.
The example uses `WizardLM`. Edit the config files in `models/` accordingly to specify the model you use (change `HERE`).
You will also need a training data set. Copy that over `data`.
## Setup
Start the API:
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/query_data
# Copy your models, edit config files accordingly
# start with docker-compose
docker-compose up -d --build
```
### Create a storage:
```bash
export OPENAI_API_BASE=http://localhost:8080/v1
export OPENAI_API_KEY=sk-
python store.py
```
After it finishes, a directory "storage" will be created with the vector index database.
## Query
```bash
export OPENAI_API_BASE=http://localhost:8080/v1
export OPENAI_API_KEY=sk-
python query.py
```

@ -0,0 +1,15 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: .
dockerfile: Dockerfile
ports:
- 8080:8080
env_file:
- .env
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai"]

@ -0,0 +1,18 @@
name: text-embedding-ada-002
parameters:
model: HERE
top_k: 80
temperature: 0.2
top_p: 0.7
context_size: 1024
threads: 14
stopwords:
- "HUMAN:"
- "GPT:"
roles:
user: " "
system: " "
embeddings: true
template:
completion: completion
chat: gpt4all

@ -0,0 +1,18 @@
name: gpt-3.5-turbo
parameters:
model: HERE
top_k: 80
temperature: 0.2
top_p: 0.7
context_size: 1024
threads: 14
embeddings: true
stopwords:
- "HUMAN:"
- "GPT:"
roles:
user: " "
system: " "
template:
completion: completion
chat: wizardlm

@ -0,0 +1,3 @@
{{.Input}}
### Response:

@ -0,0 +1,32 @@
import os
# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
# os.environ['OPENAI_API_KEY']= ""
from llama_index import LLMPredictor, PromptHelper, ServiceContext
from langchain.llms.openai import OpenAI
from llama_index import StorageContext, load_index_from_storage
# This example uses text-davinci-003 by default; feel free to change if desired
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo",openai_api_base="http://localhost:8080/v1"))
# Configure prompt parameters and initialise helper
max_input_size = 1024
num_output = 256
max_chunk_overlap = 20
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
# Load documents from the 'data' directory
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir='./storage')
# load index
index = load_index_from_storage(storage_context, service_context=service_context, )
query_engine = index.as_query_engine()
response = query_engine.query("XXXXXX your question here XXXXX")
print(response)

@ -0,0 +1,25 @@
import os
# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
# os.environ['OPENAI_API_KEY']= ""
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, LLMPredictor, PromptHelper, ServiceContext
from langchain.llms.openai import OpenAI
from llama_index import StorageContext, load_index_from_storage
# This example uses text-davinci-003 by default; feel free to change if desired
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo",openai_api_base="http://localhost:8080/v1"))
# Configure prompt parameters and initialise helper
max_input_size = 256
num_output = 256
max_chunk_overlap = 10
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
# Load documents from the 'data' directory
documents = SimpleDirectoryReader('data').load_data()
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 257)
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
index.storage_context.persist(persist_dir="./storage")

@ -0,0 +1,11 @@
#!/bin/bash
set -ex
URL=$1
OUT=$2
FILENAME=$(basename $URL)
wget -nc $URL -O /build/$FILENAME
python3 /build/rwkv.cpp/rwkv/convert_pytorch_to_ggml.py /build/$FILENAME /build/float-model float16
python3 /build/rwkv.cpp/rwkv/quantize.py /build/float-model $OUT Q4_2

@ -1,8 +1,10 @@
- name: list1 - name: list1
parameters: parameters:
model: testmodel model: testmodel
context_size: 512 top_p: 80
threads: 10 top_k: 0.9
temperature: 0.1
context_size: 10
stopwords: stopwords:
- "HUMAN:" - "HUMAN:"
- "### Response:" - "### Response:"
@ -14,9 +16,11 @@
chat: ggml-gpt4all-j chat: ggml-gpt4all-j
- name: list2 - name: list2
parameters: parameters:
top_p: 80
top_k: 0.9
temperature: 0.1
model: testmodel model: testmodel
context_size: 512 context_size: 10
threads: 10
stopwords: stopwords:
- "HUMAN:" - "HUMAN:"
- "### Response:" - "### Response:"

@ -1,8 +1,10 @@
name: gpt4all name: gpt4all
parameters: parameters:
model: testmodel model: testmodel
context_size: 512 top_p: 80
threads: 10 top_k: 0.9
temperature: 0.1
context_size: 10
stopwords: stopwords:
- "HUMAN:" - "HUMAN:"
- "### Response:" - "### Response:"

@ -1,8 +1,10 @@
name: gpt4all-2 name: gpt4all-2
parameters: parameters:
model: testmodel model: testmodel
context_size: 1024 top_p: 80
threads: 5 top_k: 0.9
temperature: 0.1
context_size: 10
stopwords: stopwords:
- "HUMAN:" - "HUMAN:"
- "### Response:" - "### Response:"

Loading…
Cancel
Save