From b710147b9518185625df61982c57c66420c00958 Mon Sep 17 00:00:00 2001 From: mudler Date: Sat, 8 Apr 2023 11:45:36 +0200 Subject: [PATCH] Add mutex on same models (parallel isn't supported yet) --- api.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/api.go b/api.go index 5ed04ea..79e4e0f 100644 --- a/api.go +++ b/api.go @@ -49,10 +49,14 @@ func api(defaultModel *llama.LLama, loader *ModelLoader, listenAddr string, thre NotFoundFile: "index.html", })) + // This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784 var mutex = &sync.Mutex{} + mu := map[string]*sync.Mutex{} + var mumutex = &sync.Mutex{} // openAI compatible API endpoint app.Post("/v1/chat/completions", func(c *fiber.Ctx) error { + var err error var model *llama.LLama @@ -77,6 +81,23 @@ func api(defaultModel *llama.LLama, loader *ModelLoader, listenAddr string, thre } } + // This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784 + if input.Model != "" { + mumutex.Lock() + l, ok := mu[input.Model] + if !ok { + m := &sync.Mutex{} + mu[input.Model] = m + l = m + } + mumutex.Unlock() + l.Lock() + defer l.Unlock() + } else { + mutex.Lock() + defer mutex.Unlock() + } + // Set the parameters for the language model prediction topP, err := strconv.ParseFloat(c.Query("topP", "0.9"), 64) // Default value of topP is 0.9 if err != nil { @@ -105,6 +126,7 @@ func api(defaultModel *llama.LLama, loader *ModelLoader, listenAddr string, thre predInput := strings.Join(mess, "\n") + // A model can have a "file.bin.tmpl" file associated with a prompt template prefix templatedInput, err := loader.TemplatePrefix(input.Model, struct { Input string }{Input: predInput})