From b710147b9518185625df61982c57c66420c00958 Mon Sep 17 00:00:00 2001
From: mudler <mudler@c3os.io>
Date: Sat, 8 Apr 2023 11:45:36 +0200
Subject: [PATCH] Add mutex on same models (parallel isn't supported yet)

---
 api.go | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/api.go b/api.go
index 5ed04ea..79e4e0f 100644
--- a/api.go
+++ b/api.go
@@ -49,10 +49,14 @@ func api(defaultModel *llama.LLama, loader *ModelLoader, listenAddr string, thre
 		NotFoundFile: "index.html",
 	}))
 
+	// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
 	var mutex = &sync.Mutex{}
+	mu := map[string]*sync.Mutex{}
+	var mumutex = &sync.Mutex{}
 
 	// openAI compatible API endpoint
 	app.Post("/v1/chat/completions", func(c *fiber.Ctx) error {
+
 		var err error
 		var model *llama.LLama
 
@@ -77,6 +81,23 @@ func api(defaultModel *llama.LLama, loader *ModelLoader, listenAddr string, thre
 			}
 		}
 
+		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
+		if input.Model != "" {
+			mumutex.Lock()
+			l, ok := mu[input.Model]
+			if !ok {
+				m := &sync.Mutex{}
+				mu[input.Model] = m
+				l = m
+			}
+			mumutex.Unlock()
+			l.Lock()
+			defer l.Unlock()
+		} else {
+			mutex.Lock()
+			defer mutex.Unlock()
+		}
+
 		// Set the parameters for the language model prediction
 		topP, err := strconv.ParseFloat(c.Query("topP", "0.9"), 64) // Default value of topP is 0.9
 		if err != nil {
@@ -105,6 +126,7 @@ func api(defaultModel *llama.LLama, loader *ModelLoader, listenAddr string, thre
 
 		predInput := strings.Join(mess, "\n")
 
+		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
 		templatedInput, err := loader.TemplatePrefix(input.Model, struct {
 			Input string
 		}{Input: predInput})