diff --git a/Makefile b/Makefile index 12447b0..1aea365 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test GOVET=$(GOCMD) vet BINARY_NAME=local-ai -GOLLAMA_VERSION?=8bd97d532e90cf34e755b3ea2d8aa17000443cf2 +GOLLAMA_VERSION?=fbec625895ba0c458f783b62c8569135c5e80d79 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all GPT4ALL_VERSION?=73db20ba85fbbdc66a56e2619394c0eea40dc72b GOGGMLTRANSFORMERS_VERSION?=c4c581f1853cf1b66276501c7c0dbea1e3e564b7 diff --git a/api/config.go b/api/config.go index 7e0d826..42aecbe 100644 --- a/api/config.go +++ b/api/config.go @@ -16,24 +16,28 @@ import ( ) type Config struct { - OpenAIRequest `yaml:"parameters"` - Name string `yaml:"name"` - StopWords []string `yaml:"stopwords"` - Cutstrings []string `yaml:"cutstrings"` - TrimSpace []string `yaml:"trimspace"` - ContextSize int `yaml:"context_size"` - F16 bool `yaml:"f16"` - Threads int `yaml:"threads"` - Debug bool `yaml:"debug"` - Roles map[string]string `yaml:"roles"` - Embeddings bool `yaml:"embeddings"` - Backend string `yaml:"backend"` - TemplateConfig TemplateConfig `yaml:"template"` - MirostatETA float64 `yaml:"mirostat_eta"` - MirostatTAU float64 `yaml:"mirostat_tau"` - Mirostat int `yaml:"mirostat"` - NGPULayers int `yaml:"gpu_layers"` - ImageGenerationAssets string `yaml:"asset_dir"` + OpenAIRequest `yaml:"parameters"` + Name string `yaml:"name"` + StopWords []string `yaml:"stopwords"` + Cutstrings []string `yaml:"cutstrings"` + TrimSpace []string `yaml:"trimspace"` + ContextSize int `yaml:"context_size"` + F16 bool `yaml:"f16"` + Threads int `yaml:"threads"` + Debug bool `yaml:"debug"` + Roles map[string]string `yaml:"roles"` + Embeddings bool `yaml:"embeddings"` + Backend string `yaml:"backend"` + TemplateConfig TemplateConfig `yaml:"template"` + MirostatETA float64 `yaml:"mirostat_eta"` + MirostatTAU float64 `yaml:"mirostat_tau"` + Mirostat int `yaml:"mirostat"` + NGPULayers int `yaml:"gpu_layers"` + ImageGenerationAssets string `yaml:"asset_dir"` + + PromptCachePath string `yaml:"prompt_cache_path"` + PromptCacheAll bool `yaml:"prompt_cache_all"` + PromptStrings, InputStrings []string InputToken [][]int } diff --git a/api/prediction.go b/api/prediction.go index 08a01e0..4ae1b69 100644 --- a/api/prediction.go +++ b/api/prediction.go @@ -2,6 +2,8 @@ package api import ( "fmt" + "os" + "path/filepath" "regexp" "strings" "sync" @@ -102,7 +104,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config) switch model := inferenceModel.(type) { case *llama.LLama: fn = func() ([]float32, error) { - predictOptions := buildLLamaPredictOptions(c) + predictOptions := buildLLamaPredictOptions(c, loader.ModelPath) if len(tokens) > 0 { return model.TokenEmbeddings(tokens, predictOptions...) } @@ -151,7 +153,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config) }, nil } -func buildLLamaPredictOptions(c Config) []llama.PredictOption { +func buildLLamaPredictOptions(c Config, modelPath string) []llama.PredictOption { // Generate the prediction using the language model predictOptions := []llama.PredictOption{ llama.SetTemperature(c.Temperature), @@ -161,6 +163,17 @@ func buildLLamaPredictOptions(c Config) []llama.PredictOption { llama.SetThreads(c.Threads), } + if c.PromptCacheAll { + predictOptions = append(predictOptions, llama.EnablePromptCacheAll) + } + + if c.PromptCachePath != "" { + // Create parent directory + p := filepath.Join(modelPath, c.PromptCachePath) + os.MkdirAll(filepath.Dir(p), 0755) + predictOptions = append(predictOptions, llama.SetPathPromptCache(p)) + } + if c.Mirostat != 0 { predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat)) } @@ -469,7 +482,7 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback model.SetTokenCallback(tokenCallback) } - predictOptions := buildLLamaPredictOptions(c) + predictOptions := buildLLamaPredictOptions(c, loader.ModelPath) str, er := model.Predict( s,