diff --git a/Makefile b/Makefile index 92b5edc..e4d39ad 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai GOLLAMA_VERSION?=5f1620443a59c5531b5a15a16cd68f600a8437e9 GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all -GPT4ALL_VERSION?=f7498c9 +GPT4ALL_VERSION?=d34c513e01174fe83c6042403a0d183e56478d56 GOGGMLTRANSFORMERS_VERSION?=01b8436f44294d0e1267430f9eda4460458cec54 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=930a774fa0152426ed2279cb1005b3490bb0eba6 @@ -70,6 +70,7 @@ gpt4all: # This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml.. @find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt4all_/g' {} + @find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt4all_/g' {} + + @find ./gpt4all -type f -name "*.m" -exec sed -i'' -e 's/ggml_/ggml_gpt4all_/g' {} + @find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt4all_/g' {} + @find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/llama_/llama_gpt4all_/g' {} + @find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/llama_/llama_gpt4all_/g' {} + diff --git a/api/api.go b/api/api.go index 701ef40..9d34392 100644 --- a/api/api.go +++ b/api/api.go @@ -3,6 +3,7 @@ package api import ( "errors" + "github.com/go-skynet/LocalAI/pkg/assets" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/cors" "github.com/gofiber/fiber/v2/middleware/logger" @@ -68,7 +69,9 @@ func App(opts ...AppOption) (*fiber.App, error) { } if options.assetsDestination != "" { - if err := PrepareBackendAssets(options.backendAssets, options.assetsDestination); err != nil { + // Extract files from the embedded FS + err := assets.ExtractFiles(options.backendAssets, options.assetsDestination) + if err != nil { log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err) } } diff --git a/api/backend_assets.go b/api/backend_assets.go deleted file mode 100644 index 5a3ed76..0000000 --- a/api/backend_assets.go +++ /dev/null @@ -1,27 +0,0 @@ -package api - -import ( - "embed" - "os" - "path/filepath" - - "github.com/go-skynet/LocalAI/pkg/assets" - "github.com/rs/zerolog/log" -) - -func PrepareBackendAssets(backendAssets embed.FS, dst string) error { - - // Extract files from the embedded FS - err := assets.ExtractFiles(backendAssets, dst) - if err != nil { - return err - } - - // Set GPT4ALL libs where we extracted the files - // https://github.com/nomic-ai/gpt4all/commit/27e80e1d10985490c9fd4214e4bf458cfcf70896 - gpt4alldir := filepath.Join(dst, "backend-assets", "gpt4all") - os.Setenv("GPT4ALL_IMPLEMENTATIONS_PATH", gpt4alldir) - log.Debug().Msgf("GPT4ALL_IMPLEMENTATIONS_PATH: %s", gpt4alldir) - - return nil -} diff --git a/api/openai.go b/api/openai.go index 0e1c0f6..8017201 100644 --- a/api/openai.go +++ b/api/openai.go @@ -148,7 +148,7 @@ func defaultRequest(modelFile string) OpenAIRequest { // https://platform.openai.com/docs/api-reference/completions func completionEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error { process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) { - ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool { + ComputeChoices(s, req, config, o, loader, func(s string, c *[]Choice) {}, func(s string) bool { resp := OpenAIResponse{ Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []Choice{{Text: s}}, @@ -249,7 +249,7 @@ func completionEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error { log.Debug().Msgf("Template found, input modified to: %s", i) } - r, err := ComputeChoices(i, input, config, o.loader, func(s string, c *[]Choice) { + r, err := ComputeChoices(i, input, config, o, o.loader, func(s string, c *[]Choice) { *c = append(*c, Choice{Text: s}) }, nil) if err != nil { @@ -291,7 +291,7 @@ func embeddingsEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error { for i, s := range config.InputToken { // get the model function to call for the result - embedFn, err := ModelEmbedding("", s, o.loader, *config) + embedFn, err := ModelEmbedding("", s, o.loader, *config, o) if err != nil { return err } @@ -305,7 +305,7 @@ func embeddingsEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error { for i, s := range config.InputStrings { // get the model function to call for the result - embedFn, err := ModelEmbedding(s, []int{}, o.loader, *config) + embedFn, err := ModelEmbedding(s, []int{}, o.loader, *config, o) if err != nil { return err } @@ -341,7 +341,7 @@ func chatEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error { } responses <- initialMessage - ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool { + ComputeChoices(s, req, config, o, loader, func(s string, c *[]Choice) {}, func(s string) bool { resp := OpenAIResponse{ Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []Choice{{Delta: &Message{Content: s}}}, @@ -439,7 +439,7 @@ func chatEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error { return nil } - result, err := ComputeChoices(predInput, input, config, o.loader, func(s string, c *[]Choice) { + result, err := ComputeChoices(predInput, input, config, o, o.loader, func(s string, c *[]Choice) { *c = append(*c, Choice{Message: &Message{Role: "assistant", Content: s}}) }, nil) if err != nil { @@ -491,7 +491,7 @@ func editEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error { log.Debug().Msgf("Template found, input modified to: %s", i) } - r, err := ComputeChoices(i, input, config, o.loader, func(s string, c *[]Choice) { + r, err := ComputeChoices(i, input, config, o, o.loader, func(s string, c *[]Choice) { *c = append(*c, Choice{Text: s}) }, nil) if err != nil { @@ -616,7 +616,7 @@ func imageEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error { baseURL := c.BaseURL() - fn, err := ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, output, o.loader, *config) + fn, err := ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, output, o.loader, *config, o) if err != nil { return err } @@ -697,7 +697,7 @@ func transcriptEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error { log.Debug().Msgf("Audio file copied to: %+v", dst) - whisperModel, err := o.loader.BackendLoader(model.WhisperBackend, config.Model, []llama.ModelOption{}, uint32(config.Threads)) + whisperModel, err := o.loader.BackendLoader(model.WhisperBackend, config.Model, []llama.ModelOption{}, uint32(config.Threads), o.assetsDestination) if err != nil { return err } diff --git a/api/prediction.go b/api/prediction.go index 2406e42..ff4866f 100644 --- a/api/prediction.go +++ b/api/prediction.go @@ -49,11 +49,11 @@ func defaultLLamaOpts(c Config) []llama.ModelOption { return llamaOpts } -func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string, loader *model.ModelLoader, c Config) (func() error, error) { +func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string, loader *model.ModelLoader, c Config, o *Option) (func() error, error) { if c.Backend != model.StableDiffusionBackend { return nil, fmt.Errorf("endpoint only working with stablediffusion models") } - inferenceModel, err := loader.BackendLoader(c.Backend, c.ImageGenerationAssets, []llama.ModelOption{}, uint32(c.Threads)) + inferenceModel, err := loader.BackendLoader(c.Backend, c.ImageGenerationAssets, []llama.ModelOption{}, uint32(c.Threads), o.assetsDestination) if err != nil { return nil, err } @@ -88,7 +88,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat }, nil } -func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config) (func() ([]float32, error), error) { +func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config, o *Option) (func() ([]float32, error), error) { if !c.Embeddings { return nil, fmt.Errorf("endpoint disabled for this model by API configuration") } @@ -100,9 +100,9 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config) var inferenceModel interface{} var err error if c.Backend == "" { - inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads)) + inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads), o.assetsDestination) } else { - inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads)) + inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads), o.assetsDestination) } if err != nil { return nil, err @@ -240,7 +240,7 @@ func buildLLamaPredictOptions(c Config, modelPath string) []llama.PredictOption return predictOptions } -func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback func(string) bool) (func() (string, error), error) { +func ModelInference(s string, loader *model.ModelLoader, c Config, o *Option, tokenCallback func(string) bool) (func() (string, error), error) { supportStreams := false modelFile := c.Model @@ -249,9 +249,9 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback var inferenceModel interface{} var err error if c.Backend == "" { - inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads)) + inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads), o.assetsDestination) } else { - inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads)) + inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads), o.assetsDestination) } if err != nil { return nil, err @@ -579,7 +579,7 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback }, nil } -func ComputeChoices(predInput string, input *OpenAIRequest, config *Config, loader *model.ModelLoader, cb func(string, *[]Choice), tokenCallback func(string) bool) ([]Choice, error) { +func ComputeChoices(predInput string, input *OpenAIRequest, config *Config, o *Option, loader *model.ModelLoader, cb func(string, *[]Choice), tokenCallback func(string) bool) ([]Choice, error) { result := []Choice{} n := input.N @@ -589,7 +589,7 @@ func ComputeChoices(predInput string, input *OpenAIRequest, config *Config, load } // get the model function to call for the result - predFunc, err := ModelInference(predInput, loader, *config, tokenCallback) + predFunc, err := ModelInference(predInput, loader, *config, o, tokenCallback) if err != nil { return result, err } diff --git a/examples/flowise/README.md b/examples/flowise/README.md index d8bb13d..9dbac91 100644 --- a/examples/flowise/README.md +++ b/examples/flowise/README.md @@ -24,3 +24,7 @@ docker-compose up --pull always Open http://localhost:3000. +## Using LocalAI + +Search for LocalAI in the integration, and use the `http://api:8080/` as URL. + diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 7de487d..0091ee4 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -135,7 +135,7 @@ func rwkvLM(tokenFile string, threads uint32) func(string) (interface{}, error) } } -func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) { +func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32, assetDir string) (model interface{}, err error) { log.Debug().Msgf("Loading model %s from %s", backendString, modelFile) switch strings.ToLower(backendString) { case LlamaBackend: @@ -161,7 +161,7 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla case StarcoderBackend: return ml.LoadModel(modelFile, starCoder) case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All: - return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)))) + return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetLibrarySearchPath(filepath.Join(assetDir, "backend-assets", "gpt4all")))) case BertEmbeddingsBackend: return ml.LoadModel(modelFile, bertEmbeddings) case RwkvBackend: @@ -175,7 +175,7 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla } } -func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (interface{}, error) { +func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32, assetDir string) (interface{}, error) { log.Debug().Msgf("Loading model '%s' greedly", modelFile) ml.mu.Lock() @@ -193,7 +193,7 @@ func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOpt continue } log.Debug().Msgf("[%s] Attempting to load", b) - model, modelerr := ml.BackendLoader(b, modelFile, llamaOpts, threads) + model, modelerr := ml.BackendLoader(b, modelFile, llamaOpts, threads, assetDir) if modelerr == nil && model != nil { log.Debug().Msgf("[%s] Loads OK", b) return model, nil