feat: config files and SSE (#83)
Signed-off-by: mudler <mudler@mocaccino.org> Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com> Co-authored-by: Tyler Gillson <tyler.gillson@gmail.com>docs_upd_2
parent
4e2061636e
commit
c806eae0de
@ -1 +1,2 @@ |
||||
models |
||||
examples/chatbot-ui/models |
@ -0,0 +1,100 @@ |
||||
package api |
||||
|
||||
import ( |
||||
"fmt" |
||||
"io/ioutil" |
||||
"os" |
||||
"path/filepath" |
||||
"strings" |
||||
|
||||
"gopkg.in/yaml.v3" |
||||
) |
||||
|
||||
type Config struct { |
||||
OpenAIRequest `yaml:"parameters"` |
||||
Name string `yaml:"name"` |
||||
StopWords []string `yaml:"stopwords"` |
||||
Cutstrings []string `yaml:"cutstrings"` |
||||
TrimSpace []string `yaml:"trimspace"` |
||||
ContextSize int `yaml:"context_size"` |
||||
F16 bool `yaml:"f16"` |
||||
Threads int `yaml:"threads"` |
||||
Debug bool `yaml:"debug"` |
||||
Roles map[string]string `yaml:"roles"` |
||||
TemplateConfig TemplateConfig `yaml:"template"` |
||||
} |
||||
|
||||
type TemplateConfig struct { |
||||
Completion string `yaml:"completion"` |
||||
Chat string `yaml:"chat"` |
||||
} |
||||
|
||||
type ConfigMerger map[string]Config |
||||
|
||||
func ReadConfigFile(file string) ([]*Config, error) { |
||||
c := &[]*Config{} |
||||
f, err := os.ReadFile(file) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("cannot read config file: %w", err) |
||||
} |
||||
if err := yaml.Unmarshal(f, c); err != nil { |
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err) |
||||
} |
||||
|
||||
return *c, nil |
||||
} |
||||
|
||||
func ReadConfig(file string) (*Config, error) { |
||||
c := &Config{} |
||||
f, err := os.ReadFile(file) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("cannot read config file: %w", err) |
||||
} |
||||
if err := yaml.Unmarshal(f, c); err != nil { |
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err) |
||||
} |
||||
|
||||
return c, nil |
||||
} |
||||
|
||||
func (cm ConfigMerger) LoadConfigFile(file string) error { |
||||
c, err := ReadConfigFile(file) |
||||
if err != nil { |
||||
return fmt.Errorf("cannot load config file: %w", err) |
||||
} |
||||
|
||||
for _, cc := range c { |
||||
cm[cc.Name] = *cc |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func (cm ConfigMerger) LoadConfig(file string) error { |
||||
c, err := ReadConfig(file) |
||||
if err != nil { |
||||
return fmt.Errorf("cannot read config file: %w", err) |
||||
} |
||||
|
||||
cm[c.Name] = *c |
||||
return nil |
||||
} |
||||
|
||||
func (cm ConfigMerger) LoadConfigs(path string) error { |
||||
files, err := ioutil.ReadDir(path) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
for _, file := range files { |
||||
// Skip templates, YAML and .keep files
|
||||
if !strings.Contains(file.Name(), ".yaml") { |
||||
continue |
||||
} |
||||
c, err := ReadConfig(filepath.Join(path, file.Name())) |
||||
if err == nil { |
||||
cm[c.Name] = *c |
||||
} |
||||
} |
||||
|
||||
return nil |
||||
} |
@ -0,0 +1,396 @@ |
||||
package api |
||||
|
||||
import ( |
||||
"bufio" |
||||
"encoding/json" |
||||
"fmt" |
||||
"os" |
||||
"path/filepath" |
||||
"regexp" |
||||
"strings" |
||||
"sync" |
||||
|
||||
model "github.com/go-skynet/LocalAI/pkg/model" |
||||
"github.com/gofiber/fiber/v2" |
||||
"github.com/rs/zerolog/log" |
||||
"github.com/valyala/fasthttp" |
||||
) |
||||
|
||||
// APIError provides error information returned by the OpenAI API.
|
||||
type APIError struct { |
||||
Code any `json:"code,omitempty"` |
||||
Message string `json:"message"` |
||||
Param *string `json:"param,omitempty"` |
||||
Type string `json:"type"` |
||||
} |
||||
|
||||
type ErrorResponse struct { |
||||
Error *APIError `json:"error,omitempty"` |
||||
} |
||||
|
||||
type OpenAIResponse struct { |
||||
Created int `json:"created,omitempty"` |
||||
Object string `json:"object,omitempty"` |
||||
ID string `json:"id,omitempty"` |
||||
Model string `json:"model,omitempty"` |
||||
Choices []Choice `json:"choices,omitempty"` |
||||
} |
||||
|
||||
type Choice struct { |
||||
Index int `json:"index,omitempty"` |
||||
FinishReason string `json:"finish_reason,omitempty"` |
||||
Message *Message `json:"message,omitempty"` |
||||
Delta *Message `json:"delta,omitempty"` |
||||
Text string `json:"text,omitempty"` |
||||
} |
||||
|
||||
type Message struct { |
||||
Role string `json:"role,omitempty" yaml:"role"` |
||||
Content string `json:"content,omitempty" yaml:"content"` |
||||
} |
||||
|
||||
type OpenAIModel struct { |
||||
ID string `json:"id"` |
||||
Object string `json:"object"` |
||||
} |
||||
|
||||
type OpenAIRequest struct { |
||||
Model string `json:"model" yaml:"model"` |
||||
|
||||
// Prompt is read only by completion API calls
|
||||
Prompt string `json:"prompt" yaml:"prompt"` |
||||
|
||||
Stop string `json:"stop" yaml:"stop"` |
||||
|
||||
// Messages is read only by chat/completion API calls
|
||||
Messages []Message `json:"messages" yaml:"messages"` |
||||
|
||||
Stream bool `json:"stream"` |
||||
Echo bool `json:"echo"` |
||||
// Common options between all the API calls
|
||||
TopP float64 `json:"top_p" yaml:"top_p"` |
||||
TopK int `json:"top_k" yaml:"top_k"` |
||||
Temperature float64 `json:"temperature" yaml:"temperature"` |
||||
Maxtokens int `json:"max_tokens" yaml:"max_tokens"` |
||||
|
||||
N int `json:"n"` |
||||
|
||||
// Custom parameters - not present in the OpenAI API
|
||||
Batch int `json:"batch" yaml:"batch"` |
||||
F16 bool `json:"f16" yaml:"f16"` |
||||
IgnoreEOS bool `json:"ignore_eos" yaml:"ignore_eos"` |
||||
RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"` |
||||
Keep int `json:"n_keep" yaml:"n_keep"` |
||||
|
||||
Seed int `json:"seed" yaml:"seed"` |
||||
} |
||||
|
||||
func defaultRequest(modelFile string) OpenAIRequest { |
||||
return OpenAIRequest{ |
||||
TopP: 0.7, |
||||
TopK: 80, |
||||
Maxtokens: 512, |
||||
Temperature: 0.9, |
||||
Model: modelFile, |
||||
} |
||||
} |
||||
|
||||
func updateConfig(config *Config, input *OpenAIRequest) { |
||||
if input.Echo { |
||||
config.Echo = input.Echo |
||||
} |
||||
if input.TopK != 0 { |
||||
config.TopK = input.TopK |
||||
} |
||||
if input.TopP != 0 { |
||||
config.TopP = input.TopP |
||||
} |
||||
|
||||
if input.Temperature != 0 { |
||||
config.Temperature = input.Temperature |
||||
} |
||||
|
||||
if input.Maxtokens != 0 { |
||||
config.Maxtokens = input.Maxtokens |
||||
} |
||||
|
||||
if input.Stop != "" { |
||||
config.StopWords = append(config.StopWords, input.Stop) |
||||
} |
||||
|
||||
if input.RepeatPenalty != 0 { |
||||
config.RepeatPenalty = input.RepeatPenalty |
||||
} |
||||
|
||||
if input.Keep != 0 { |
||||
config.Keep = input.Keep |
||||
} |
||||
|
||||
if input.Batch != 0 { |
||||
config.Batch = input.Batch |
||||
} |
||||
|
||||
if input.F16 { |
||||
config.F16 = input.F16 |
||||
} |
||||
|
||||
if input.IgnoreEOS { |
||||
config.IgnoreEOS = input.IgnoreEOS |
||||
} |
||||
|
||||
if input.Seed != 0 { |
||||
config.Seed = input.Seed |
||||
} |
||||
} |
||||
|
||||
var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp) |
||||
var mu sync.Mutex = sync.Mutex{} |
||||
|
||||
// https://platform.openai.com/docs/api-reference/completions
|
||||
func openAIEndpoint(cm ConfigMerger, chat, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error { |
||||
return func(c *fiber.Ctx) error { |
||||
|
||||
input := new(OpenAIRequest) |
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil { |
||||
return err |
||||
} |
||||
|
||||
if input.Stream { |
||||
log.Debug().Msgf("Stream request received") |
||||
//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
|
||||
c.Set("Content-Type", "text/event-stream; charset=utf-8") |
||||
c.Set("Cache-Control", "no-cache") |
||||
c.Set("Connection", "keep-alive") |
||||
c.Set("Transfer-Encoding", "chunked") |
||||
} |
||||
|
||||
modelFile := input.Model |
||||
received, _ := json.Marshal(input) |
||||
|
||||
log.Debug().Msgf("Request received: %s", string(received)) |
||||
|
||||
// Set model from bearer token, if available
|
||||
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ") |
||||
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer) |
||||
|
||||
// If no model was specified, take the first available
|
||||
if modelFile == "" && !bearerExists { |
||||
models, _ := loader.ListModels() |
||||
if len(models) > 0 { |
||||
modelFile = models[0] |
||||
log.Debug().Msgf("No model specified, using: %s", modelFile) |
||||
} else { |
||||
log.Debug().Msgf("No model specified, returning error") |
||||
return fmt.Errorf("no model specified") |
||||
} |
||||
} |
||||
|
||||
// If a model is found in bearer token takes precedence
|
||||
if bearerExists { |
||||
log.Debug().Msgf("Using model from bearer token: %s", bearer) |
||||
modelFile = bearer |
||||
} |
||||
|
||||
// Load a config file if present after the model name
|
||||
modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml") |
||||
if _, err := os.Stat(modelConfig); err == nil { |
||||
if err := cm.LoadConfig(modelConfig); err != nil { |
||||
return fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) |
||||
} |
||||
} |
||||
|
||||
var config *Config |
||||
cfg, exists := cm[modelFile] |
||||
if !exists { |
||||
config = &Config{ |
||||
OpenAIRequest: defaultRequest(modelFile), |
||||
} |
||||
} else { |
||||
config = &cfg |
||||
} |
||||
|
||||
// Set the parameters for the language model prediction
|
||||
updateConfig(config, input) |
||||
|
||||
if threads != 0 { |
||||
config.Threads = threads |
||||
} |
||||
if ctx != 0 { |
||||
config.ContextSize = ctx |
||||
} |
||||
if f16 { |
||||
config.F16 = true |
||||
} |
||||
|
||||
if debug { |
||||
config.Debug = true |
||||
} |
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config) |
||||
|
||||
predInput := input.Prompt |
||||
if chat { |
||||
mess := []string{} |
||||
for _, i := range input.Messages { |
||||
r := config.Roles[i.Role] |
||||
if r == "" { |
||||
r = i.Role |
||||
} |
||||
|
||||
content := fmt.Sprint(r, " ", i.Content) |
||||
mess = append(mess, content) |
||||
} |
||||
|
||||
predInput = strings.Join(mess, "\n") |
||||
} |
||||
|
||||
templateFile := config.Model |
||||
if config.TemplateConfig.Chat != "" && chat { |
||||
templateFile = config.TemplateConfig.Chat |
||||
} |
||||
|
||||
if config.TemplateConfig.Completion != "" && !chat { |
||||
templateFile = config.TemplateConfig.Completion |
||||
} |
||||
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
templatedInput, err := loader.TemplatePrefix(templateFile, struct { |
||||
Input string |
||||
}{Input: predInput}) |
||||
if err == nil { |
||||
predInput = templatedInput |
||||
log.Debug().Msgf("Template found, input modified to: %s", predInput) |
||||
} |
||||
|
||||
result := []Choice{} |
||||
|
||||
n := input.N |
||||
|
||||
if input.N == 0 { |
||||
n = 1 |
||||
} |
||||
|
||||
// get the model function to call for the result
|
||||
predFunc, err := ModelInference(predInput, loader, *config) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
finetunePrediction := func(prediction string) string { |
||||
if config.Echo { |
||||
prediction = predInput + prediction |
||||
} |
||||
|
||||
for _, c := range config.Cutstrings { |
||||
mu.Lock() |
||||
reg, ok := cutstrings[c] |
||||
if !ok { |
||||
cutstrings[c] = regexp.MustCompile(c) |
||||
reg = cutstrings[c] |
||||
} |
||||
mu.Unlock() |
||||
prediction = reg.ReplaceAllString(prediction, "") |
||||
} |
||||
|
||||
for _, c := range config.TrimSpace { |
||||
prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c)) |
||||
} |
||||
return prediction |
||||
} |
||||
|
||||
for i := 0; i < n; i++ { |
||||
prediction, err := predFunc() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
prediction = finetunePrediction(prediction) |
||||
|
||||
if chat { |
||||
if input.Stream { |
||||
result = append(result, Choice{Delta: &Message{Role: "assistant", Content: prediction}}) |
||||
} else { |
||||
result = append(result, Choice{Message: &Message{Role: "assistant", Content: prediction}}) |
||||
} |
||||
} else { |
||||
result = append(result, Choice{Text: prediction}) |
||||
} |
||||
} |
||||
|
||||
resp := &OpenAIResponse{ |
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: result, |
||||
} |
||||
if input.Stream && chat { |
||||
resp.Object = "chat.completion.chunk" |
||||
} else if chat { |
||||
resp.Object = "chat.completion" |
||||
} else { |
||||
resp.Object = "text_completion" |
||||
} |
||||
|
||||
jsonResult, _ := json.Marshal(resp) |
||||
log.Debug().Msgf("Response: %s", jsonResult) |
||||
|
||||
if input.Stream { |
||||
log.Debug().Msgf("Handling stream request") |
||||
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) { |
||||
fmt.Fprintf(w, "event: data\n") |
||||
w.Flush() |
||||
|
||||
fmt.Fprintf(w, "data: %s\n\n", jsonResult) |
||||
w.Flush() |
||||
|
||||
fmt.Fprintf(w, "event: data\n") |
||||
w.Flush() |
||||
|
||||
resp := &OpenAIResponse{ |
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []Choice{Choice{FinishReason: "stop"}}, |
||||
} |
||||
respData, _ := json.Marshal(resp) |
||||
|
||||
fmt.Fprintf(w, "data: %s\n\n", respData) |
||||
w.Flush() |
||||
|
||||
// fmt.Fprintf(w, "data: [DONE]\n\n")
|
||||
// w.Flush()
|
||||
})) |
||||
return nil |
||||
} else { |
||||
// Return the prediction in the response body
|
||||
return c.JSON(resp) |
||||
} |
||||
} |
||||
} |
||||
|
||||
func listModels(loader *model.ModelLoader, cm ConfigMerger) func(ctx *fiber.Ctx) error { |
||||
return func(c *fiber.Ctx) error { |
||||
models, err := loader.ListModels() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
var mm map[string]interface{} = map[string]interface{}{} |
||||
|
||||
dataModels := []OpenAIModel{} |
||||
for _, m := range models { |
||||
mm[m] = nil |
||||
dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"}) |
||||
} |
||||
|
||||
for k := range cm { |
||||
if _, exists := mm[k]; !exists { |
||||
dataModels = append(dataModels, OpenAIModel{ID: k, Object: "model"}) |
||||
} |
||||
} |
||||
|
||||
return c.JSON(struct { |
||||
Object string `json:"object"` |
||||
Data []OpenAIModel `json:"data"` |
||||
}{ |
||||
Object: "list", |
||||
Data: dataModels, |
||||
}) |
||||
} |
||||
} |
@ -0,0 +1,188 @@ |
||||
package api |
||||
|
||||
import ( |
||||
"fmt" |
||||
"sync" |
||||
|
||||
model "github.com/go-skynet/LocalAI/pkg/model" |
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp" |
||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp" |
||||
llama "github.com/go-skynet/go-llama.cpp" |
||||
) |
||||
|
||||
// mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
||||
var mutexMap sync.Mutex |
||||
var mutexes map[string]*sync.Mutex = make(map[string]*sync.Mutex) |
||||
|
||||
func ModelInference(s string, loader *model.ModelLoader, c Config) (func() (string, error), error) { |
||||
var model *llama.LLama |
||||
var gptModel *gptj.GPTJ |
||||
var gpt2Model *gpt2.GPT2 |
||||
var stableLMModel *gpt2.StableLM |
||||
|
||||
modelFile := c.Model |
||||
|
||||
// Try to load the model
|
||||
var llamaerr, gpt2err, gptjerr, stableerr error |
||||
llamaOpts := []llama.ModelOption{} |
||||
if c.ContextSize != 0 { |
||||
llamaOpts = append(llamaOpts, llama.SetContext(c.ContextSize)) |
||||
} |
||||
if c.F16 { |
||||
llamaOpts = append(llamaOpts, llama.EnableF16Memory) |
||||
} |
||||
|
||||
// TODO: this is ugly, better identifying the model somehow! however, it is a good stab for a first implementation..
|
||||
model, llamaerr = loader.LoadLLaMAModel(modelFile, llamaOpts...) |
||||
if llamaerr != nil { |
||||
gptModel, gptjerr = loader.LoadGPTJModel(modelFile) |
||||
if gptjerr != nil { |
||||
gpt2Model, gpt2err = loader.LoadGPT2Model(modelFile) |
||||
if gpt2err != nil { |
||||
stableLMModel, stableerr = loader.LoadStableLMModel(modelFile) |
||||
if stableerr != nil { |
||||
return nil, fmt.Errorf("llama: %s gpt: %s gpt2: %s stableLM: %s", llamaerr.Error(), gptjerr.Error(), gpt2err.Error(), stableerr.Error()) // llama failed first, so we want to catch both errors
|
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
var fn func() (string, error) |
||||
|
||||
switch { |
||||
case stableLMModel != nil: |
||||
fn = func() (string, error) { |
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{ |
||||
gpt2.SetTemperature(c.Temperature), |
||||
gpt2.SetTopP(c.TopP), |
||||
gpt2.SetTopK(c.TopK), |
||||
gpt2.SetTokens(c.Maxtokens), |
||||
gpt2.SetThreads(c.Threads), |
||||
} |
||||
|
||||
if c.Batch != 0 { |
||||
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch)) |
||||
} |
||||
|
||||
if c.Seed != 0 { |
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed)) |
||||
} |
||||
|
||||
return stableLMModel.Predict( |
||||
s, |
||||
predictOptions..., |
||||
) |
||||
} |
||||
case gpt2Model != nil: |
||||
fn = func() (string, error) { |
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{ |
||||
gpt2.SetTemperature(c.Temperature), |
||||
gpt2.SetTopP(c.TopP), |
||||
gpt2.SetTopK(c.TopK), |
||||
gpt2.SetTokens(c.Maxtokens), |
||||
gpt2.SetThreads(c.Threads), |
||||
} |
||||
|
||||
if c.Batch != 0 { |
||||
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch)) |
||||
} |
||||
|
||||
if c.Seed != 0 { |
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed)) |
||||
} |
||||
|
||||
return gpt2Model.Predict( |
||||
s, |
||||
predictOptions..., |
||||
) |
||||
} |
||||
case gptModel != nil: |
||||
fn = func() (string, error) { |
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gptj.PredictOption{ |
||||
gptj.SetTemperature(c.Temperature), |
||||
gptj.SetTopP(c.TopP), |
||||
gptj.SetTopK(c.TopK), |
||||
gptj.SetTokens(c.Maxtokens), |
||||
gptj.SetThreads(c.Threads), |
||||
} |
||||
|
||||
if c.Batch != 0 { |
||||
predictOptions = append(predictOptions, gptj.SetBatch(c.Batch)) |
||||
} |
||||
|
||||
if c.Seed != 0 { |
||||
predictOptions = append(predictOptions, gptj.SetSeed(c.Seed)) |
||||
} |
||||
|
||||
return gptModel.Predict( |
||||
s, |
||||
predictOptions..., |
||||
) |
||||
} |
||||
case model != nil: |
||||
fn = func() (string, error) { |
||||
// Generate the prediction using the language model
|
||||
predictOptions := []llama.PredictOption{ |
||||
llama.SetTemperature(c.Temperature), |
||||
llama.SetTopP(c.TopP), |
||||
llama.SetTopK(c.TopK), |
||||
llama.SetTokens(c.Maxtokens), |
||||
llama.SetThreads(c.Threads), |
||||
} |
||||
|
||||
if c.Debug { |
||||
predictOptions = append(predictOptions, llama.Debug) |
||||
} |
||||
|
||||
predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...)) |
||||
|
||||
if c.RepeatPenalty != 0 { |
||||
predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty)) |
||||
} |
||||
|
||||
if c.Keep != 0 { |
||||
predictOptions = append(predictOptions, llama.SetNKeep(c.Keep)) |
||||
} |
||||
|
||||
if c.Batch != 0 { |
||||
predictOptions = append(predictOptions, llama.SetBatch(c.Batch)) |
||||
} |
||||
|
||||
if c.F16 { |
||||
predictOptions = append(predictOptions, llama.EnableF16KV) |
||||
} |
||||
|
||||
if c.IgnoreEOS { |
||||
predictOptions = append(predictOptions, llama.IgnoreEOS) |
||||
} |
||||
|
||||
if c.Seed != 0 { |
||||
predictOptions = append(predictOptions, llama.SetSeed(c.Seed)) |
||||
} |
||||
|
||||
return model.Predict( |
||||
s, |
||||
predictOptions..., |
||||
) |
||||
} |
||||
} |
||||
|
||||
return func() (string, error) { |
||||
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
||||
mutexMap.Lock() |
||||
l, ok := mutexes[modelFile] |
||||
if !ok { |
||||
m := &sync.Mutex{} |
||||
mutexes[modelFile] = m |
||||
l = m |
||||
} |
||||
mutexMap.Unlock() |
||||
l.Lock() |
||||
defer l.Unlock() |
||||
|
||||
return fn() |
||||
}, nil |
||||
} |
@ -0,0 +1,11 @@ |
||||
# Examples |
||||
|
||||
Here is a list of projects that can easily be integrated with the LocalAI backend. |
||||
|
||||
## Projects |
||||
|
||||
- [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/) (by [@mudler](https://github.com/mudler)) |
||||
|
||||
## Want to contribute? |
||||
|
||||
Create an issue, and put `Example: <description>` in the title! We will post your examples here. |
@ -0,0 +1,26 @@ |
||||
# chatbot-ui |
||||
|
||||
Example of integration with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui). |
||||
|
||||
![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png) |
||||
|
||||
## Setup |
||||
|
||||
```bash |
||||
# Clone LocalAI |
||||
git clone https://github.com/go-skynet/LocalAI |
||||
|
||||
cd LocalAI/examples/chatbot-ui |
||||
|
||||
# (optional) Checkout a specific LocalAI tag |
||||
# git checkout -b build <TAG> |
||||
|
||||
# Download gpt4all-j to models/ |
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j |
||||
|
||||
# start with docker-compose |
||||
docker compose up -d --build |
||||
``` |
||||
|
||||
Open http://localhost:3000 for the Web UI. |
||||
|
@ -0,0 +1,24 @@ |
||||
version: '3.6' |
||||
|
||||
services: |
||||
api: |
||||
image: quay.io/go-skynet/local-ai:latest |
||||
build: |
||||
context: ../../ |
||||
dockerfile: Dockerfile |
||||
ports: |
||||
- 8080:8080 |
||||
environment: |
||||
- DEBUG=true |
||||
- MODELS_PATH=/models |
||||
volumes: |
||||
- ./models:/models:cached |
||||
command: ["/usr/bin/local-ai" ] |
||||
|
||||
chatgpt: |
||||
image: ghcr.io/mckaywrigley/chatbot-ui:main |
||||
ports: |
||||
- 3000:3000 |
||||
environment: |
||||
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX' |
||||
- 'OPENAI_API_HOST=http://api:8080' |
@ -0,0 +1 @@ |
||||
{{.Input}} |
@ -0,0 +1,17 @@ |
||||
name: gpt-3.5-turbo |
||||
parameters: |
||||
model: ggml-gpt4all-j |
||||
top_k: 80 |
||||
temperature: 0.2 |
||||
top_p: 0.7 |
||||
context_size: 1024 |
||||
threads: 14 |
||||
stopwords: |
||||
- "HUMAN:" |
||||
- "GPT:" |
||||
roles: |
||||
user: " " |
||||
system: " " |
||||
template: |
||||
completion: completion |
||||
chat: gpt4all |
@ -0,0 +1,4 @@ |
||||
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response. |
||||
### Prompt: |
||||
{{.Input}} |
||||
### Response: |
@ -0,0 +1 @@ |
||||
{{.Input}} |
@ -0,0 +1,28 @@ |
||||
- name: list1 |
||||
parameters: |
||||
model: testmodel |
||||
context_size: 512 |
||||
threads: 10 |
||||
stopwords: |
||||
- "HUMAN:" |
||||
- "### Response:" |
||||
roles: |
||||
user: "HUMAN:" |
||||
system: "GPT:" |
||||
template: |
||||
completion: completion |
||||
chat: ggml-gpt4all-j |
||||
- name: list2 |
||||
parameters: |
||||
model: testmodel |
||||
context_size: 512 |
||||
threads: 10 |
||||
stopwords: |
||||
- "HUMAN:" |
||||
- "### Response:" |
||||
roles: |
||||
user: "HUMAN:" |
||||
system: "GPT:" |
||||
template: |
||||
completion: completion |
||||
chat: ggml-gpt4all-j |
@ -0,0 +1,4 @@ |
||||
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response. |
||||
### Prompt: |
||||
{{.Input}} |
||||
### Response: |
@ -0,0 +1,14 @@ |
||||
name: gpt4all |
||||
parameters: |
||||
model: testmodel |
||||
context_size: 512 |
||||
threads: 10 |
||||
stopwords: |
||||
- "HUMAN:" |
||||
- "### Response:" |
||||
roles: |
||||
user: "HUMAN:" |
||||
system: "GPT:" |
||||
template: |
||||
completion: completion |
||||
chat: ggml-gpt4all-j |
@ -0,0 +1,14 @@ |
||||
name: gpt4all-2 |
||||
parameters: |
||||
model: testmodel |
||||
context_size: 1024 |
||||
threads: 5 |
||||
stopwords: |
||||
- "HUMAN:" |
||||
- "### Response:" |
||||
roles: |
||||
user: "HUMAN:" |
||||
system: "GPT:" |
||||
template: |
||||
completion: completion |
||||
chat: ggml-gpt4all-j |
Loading…
Reference in new issue