From b816009db0e43d3bd979c598f56e9431b76a9157 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 15 Jul 2023 01:19:43 +0200 Subject: [PATCH] feat: add falcon ggllm via grpc client Signed-off-by: Ettore Di Giacinto --- .gitignore | 9 +- Makefile | 36 +- api/api.go | 1 + api/localai.go | 6 +- api/openai.go | 18 +- api/prediction.go | 379 +++++++++--- cmd/grpc/falcon/main.go | 25 + go.mod | 23 +- go.sum | 162 +++--- pkg/grpc/client.go | 98 ++++ pkg/grpc/interface.go | 11 + pkg/grpc/llm/falcon/falcon.go | 136 +++++ pkg/grpc/llm/ggml/starcoder.go | 0 pkg/grpc/proto/llmserver.pb.go | 870 ++++++++++++++++++++++++++++ pkg/grpc/proto/llmserver.proto | 82 +++ pkg/grpc/proto/llmserver_grpc.pb.go | 241 ++++++++ pkg/grpc/server.go | 76 +++ pkg/model/initializers.go | 182 +++++- pkg/model/loader.go | 3 + pkg/model/options.go | 62 ++ 20 files changed, 2194 insertions(+), 226 deletions(-) create mode 100644 cmd/grpc/falcon/main.go create mode 100644 pkg/grpc/client.go create mode 100644 pkg/grpc/interface.go create mode 100644 pkg/grpc/llm/falcon/falcon.go create mode 100644 pkg/grpc/llm/ggml/starcoder.go create mode 100644 pkg/grpc/proto/llmserver.pb.go create mode 100644 pkg/grpc/proto/llmserver.proto create mode 100644 pkg/grpc/proto/llmserver_grpc.pb.go create mode 100644 pkg/grpc/server.go create mode 100644 pkg/model/options.go diff --git a/.gitignore b/.gitignore index 8ad9f22..8819ad7 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,13 @@ go-llama gpt4all go-stable-diffusion +go-piper +go-ggllm +piper + +*.a +get-sources + go-ggml-transformers go-gpt2 go-rwkv @@ -29,4 +36,4 @@ release/ # Generated during build backend-assets/ -/ggml-metal.metal \ No newline at end of file +/ggml-metal.metal diff --git a/Makefile b/Makefile index d885b94..abac2b4 100644 --- a/Makefile +++ b/Makefile @@ -41,6 +41,9 @@ BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f # stablediffusion version STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632 +# Go-ggllm +GOGGLLM_VERSION?=862477d16eefb0805261c19c9b0d053e3b2b684b + export BUILD_TYPE?= CGO_LDFLAGS?= CUDA_LIBPATH?=/usr/local/cuda/lib64/ @@ -126,6 +129,14 @@ gpt4all: @find ./gpt4all/gpt4all-bindings/golang -type f -name "*.c" -exec sed -i'' -e 's/clear_numa_thread_affinity/gpt4all__clear_numa_thread_affinity/g' {} + @find ./gpt4all/gpt4all-bindings/golang -type f -name "*.h" -exec sed -i'' -e 's/clear_numa_thread_affinity/gpt4all__clear_numa_thread_affinity/g' {} + +## go-ggllm +go-ggllm: + git clone --recurse-submodules https://github.com/mudler/go-ggllm.cpp go-ggllm + cd go-ggllm && git checkout -b build $(GOGGLLM_VERSION) && git submodule update --init --recursive --depth 1 + +go-ggllm/libggllm.a: go-ggllm + $(MAKE) -C go-ggllm BUILD_TYPE=$(BUILD_TYPE) libggllm.a + ## go-piper go-piper: git clone --recurse-submodules https://github.com/mudler/go-piper go-piper @@ -238,7 +249,7 @@ go-llama/libbinding.a: go-llama go-piper/libpiper_binding.a: $(MAKE) -C go-piper libpiper_binding.a example/main -get-sources: go-llama go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion +get-sources: go-llama go-ggllm go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion touch $@ replace: @@ -251,6 +262,7 @@ replace: $(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz $(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion $(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper + $(GOCMD) mod edit -replace github.com/mudler/go-ggllm.cpp=$(shell pwd)/go-ggllm prepare-sources: get-sources replace $(GOCMD) mod download @@ -267,9 +279,10 @@ rebuild: ## Rebuilds the project $(MAKE) -C go-bert clean $(MAKE) -C bloomz clean $(MAKE) -C go-piper clean + $(MAKE) -C go-ggllm clean $(MAKE) build -prepare: prepare-sources backend-assets/gpt4all $(OPTIONAL_TARGETS) go-llama/libbinding.a go-bert/libgobert.a go-ggml-transformers/libtransformers.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building +prepare: prepare-sources backend-assets/gpt4all grpcs $(OPTIONAL_TARGETS) go-ggllm/libggllm.a go-llama/libbinding.a go-bert/libgobert.a go-ggml-transformers/libtransformers.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building touch $@ clean: ## Remove build related file @@ -285,6 +298,7 @@ clean: ## Remove build related file rm -rf ./bloomz rm -rf ./whisper.cpp rm -rf ./go-piper + rm -rf ./go-ggllm rm -rf $(BINARY_NAME) rm -rf release/ @@ -296,7 +310,7 @@ build: prepare ## Build the project $(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET}) $(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET}) - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./ + CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -x -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./ ifeq ($(BUILD_TYPE),metal) cp go-llama/build/bin/ggml-metal.metal . endif @@ -341,3 +355,19 @@ help: ## Show this help. if (/^[a-zA-Z_-]+:.*?##.*$$/) {printf " ${YELLOW}%-20s${GREEN}%s${RESET}\n", $$1, $$2} \ else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \ }' $(MAKEFILE_LIST) + +protogen: + protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative \ + pkg/grpc/proto/llmserver.proto + +## GRPC + +backend-assets/grpc: + mkdir -p backend-assets/grpc + +falcon-grpc: backend-assets/grpc + CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggllm LIBRARY_PATH=$(shell pwd)/go-ggllm \ + $(GOCMD) build -x -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon ./cmd/grpc/falcon/ + + +grpcs: falcon-grpc \ No newline at end of file diff --git a/api/api.go b/api/api.go index 543e756..1438f1f 100644 --- a/api/api.go +++ b/api/api.go @@ -75,6 +75,7 @@ func App(opts ...AppOption) (*fiber.App, error) { if options.assetsDestination != "" { // Extract files from the embedded FS err := assets.ExtractFiles(options.backendAssets, options.assetsDestination) + log.Debug().Msgf("Extracting backend assets files to %s", options.assetsDestination) if err != nil { log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err) } diff --git a/api/localai.go b/api/localai.go index b719689..66eda5a 100644 --- a/api/localai.go +++ b/api/localai.go @@ -8,7 +8,6 @@ import ( model "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/tts" "github.com/go-skynet/LocalAI/pkg/utils" - llama "github.com/go-skynet/go-llama.cpp" "github.com/gofiber/fiber/v2" ) @@ -42,7 +41,10 @@ func ttsEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error { return err } - piperModel, err := o.loader.BackendLoader(model.PiperBackend, input.Model, []llama.ModelOption{}, uint32(0), o.assetsDestination) + piperModel, err := o.loader.BackendLoader( + model.WithBackendString(model.PiperBackend), + model.WithModelFile(input.Model), + model.WithAssetDir(o.assetsDestination)) if err != nil { return err } diff --git a/api/openai.go b/api/openai.go index 77d2c8e..c39b1cc 100644 --- a/api/openai.go +++ b/api/openai.go @@ -20,7 +20,6 @@ import ( "github.com/go-skynet/LocalAI/pkg/grammar" model "github.com/go-skynet/LocalAI/pkg/model" whisperutil "github.com/go-skynet/LocalAI/pkg/whisper" - llama "github.com/go-skynet/go-llama.cpp" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" "github.com/valyala/fasthttp" @@ -362,6 +361,13 @@ func embeddingsEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error { } } +func isEOS(s string) bool { + if s == "<|endoftext|>" { + return true + } + + return false +} func chatEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error { process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) { @@ -380,7 +386,9 @@ func chatEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error { } log.Debug().Msgf("Sending goroutine: %s", s) - responses <- resp + if s != "" && !isEOS(s) { + responses <- resp + } return true }) close(responses) @@ -905,7 +913,11 @@ func transcriptEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error { log.Debug().Msgf("Audio file copied to: %+v", dst) - whisperModel, err := o.loader.BackendLoader(model.WhisperBackend, config.Model, []llama.ModelOption{}, uint32(config.Threads), o.assetsDestination) + whisperModel, err := o.loader.BackendLoader( + model.WithBackendString(model.WhisperBackend), + model.WithModelFile(config.Model), + model.WithThreads(uint32(config.Threads)), + model.WithAssetDir(o.assetsDestination)) if err != nil { return err } diff --git a/api/prediction.go b/api/prediction.go index 7daa730..b9b5710 100644 --- a/api/prediction.go +++ b/api/prediction.go @@ -1,6 +1,7 @@ package api import ( + "context" "fmt" "os" "path/filepath" @@ -9,6 +10,8 @@ import ( "sync" "github.com/donomii/go-rwkv.cpp" + "github.com/go-skynet/LocalAI/pkg/grpc" + pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" "github.com/go-skynet/LocalAI/pkg/langchain" model "github.com/go-skynet/LocalAI/pkg/model" "github.com/go-skynet/LocalAI/pkg/stablediffusion" @@ -16,6 +19,7 @@ import ( bert "github.com/go-skynet/go-bert.cpp" transformers "github.com/go-skynet/go-ggml-transformers.cpp" llama "github.com/go-skynet/go-llama.cpp" + gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang" ) @@ -23,6 +27,160 @@ import ( var mutexMap sync.Mutex var mutexes map[string]*sync.Mutex = make(map[string]*sync.Mutex) +func gRPCModelOpts(c Config) *pb.ModelOptions { + b := 512 + if c.Batch != 0 { + b = c.Batch + } + return &pb.ModelOptions{ + ContextSize: int32(c.ContextSize), + Seed: int32(c.Seed), + NBatch: int32(b), + NGPULayers: int32(c.NGPULayers), + MMap: c.MMap, + MainGPU: c.MainGPU, + TensorSplit: c.TensorSplit, + } +} + +// func defaultGGLLMOpts(c Config) []ggllm.ModelOption { +// ggllmOpts := []ggllm.ModelOption{} +// if c.ContextSize != 0 { +// ggllmOpts = append(ggllmOpts, ggllm.SetContext(c.ContextSize)) +// } +// // F16 doesn't seem to produce good output at all! +// //if c.F16 { +// // llamaOpts = append(llamaOpts, llama.EnableF16Memory) +// //} + +// if c.NGPULayers != 0 { +// ggllmOpts = append(ggllmOpts, ggllm.SetGPULayers(c.NGPULayers)) +// } + +// ggllmOpts = append(ggllmOpts, ggllm.SetMMap(c.MMap)) +// ggllmOpts = append(ggllmOpts, ggllm.SetMainGPU(c.MainGPU)) +// ggllmOpts = append(ggllmOpts, ggllm.SetTensorSplit(c.TensorSplit)) +// if c.Batch != 0 { +// ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(c.Batch)) +// } else { +// ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(512)) +// } + +// return ggllmOpts +// } + +func gRPCPredictOpts(c Config, modelPath string) *pb.PredictOptions { + promptCachePath := "" + if c.PromptCachePath != "" { + p := filepath.Join(modelPath, c.PromptCachePath) + os.MkdirAll(filepath.Dir(p), 0755) + promptCachePath = p + } + return &pb.PredictOptions{ + Temperature: float32(c.Temperature), + TopP: float32(c.TopP), + TopK: int32(c.TopK), + Tokens: int32(c.Maxtokens), + Threads: int32(c.Threads), + PromptCacheAll: c.PromptCacheAll, + PromptCacheRO: c.PromptCacheRO, + PromptCachePath: promptCachePath, + Mirostat: int32(c.Mirostat), + MirostatETA: float32(c.MirostatETA), + MirostatTAU: float32(c.MirostatTAU), + Debug: c.Debug, + StopPrompts: c.StopWords, + Repeat: int32(c.RepeatPenalty), + NKeep: int32(c.Keep), + Batch: int32(c.Batch), + IgnoreEOS: c.IgnoreEOS, + Seed: int32(c.Seed), + FrequencyPenalty: float32(c.FrequencyPenalty), + MLock: c.MMlock, + MMap: c.MMap, + MainGPU: c.MainGPU, + TensorSplit: c.TensorSplit, + TailFreeSamplingZ: float32(c.TFZ), + TypicalP: float32(c.TypicalP), + } +} + +// func buildGGLLMPredictOptions(c Config, modelPath string) []ggllm.PredictOption { +// // Generate the prediction using the language model +// predictOptions := []ggllm.PredictOption{ +// ggllm.SetTemperature(c.Temperature), +// ggllm.SetTopP(c.TopP), +// ggllm.SetTopK(c.TopK), +// ggllm.SetTokens(c.Maxtokens), +// ggllm.SetThreads(c.Threads), +// } + +// if c.PromptCacheAll { +// predictOptions = append(predictOptions, ggllm.EnablePromptCacheAll) +// } + +// if c.PromptCacheRO { +// predictOptions = append(predictOptions, ggllm.EnablePromptCacheRO) +// } + +// if c.PromptCachePath != "" { +// // Create parent directory +// p := filepath.Join(modelPath, c.PromptCachePath) +// os.MkdirAll(filepath.Dir(p), 0755) +// predictOptions = append(predictOptions, ggllm.SetPathPromptCache(p)) +// } + +// if c.Mirostat != 0 { +// predictOptions = append(predictOptions, ggllm.SetMirostat(c.Mirostat)) +// } + +// if c.MirostatETA != 0 { +// predictOptions = append(predictOptions, ggllm.SetMirostatETA(c.MirostatETA)) +// } + +// if c.MirostatTAU != 0 { +// predictOptions = append(predictOptions, ggllm.SetMirostatTAU(c.MirostatTAU)) +// } + +// if c.Debug { +// predictOptions = append(predictOptions, ggllm.Debug) +// } + +// predictOptions = append(predictOptions, ggllm.SetStopWords(c.StopWords...)) + +// if c.RepeatPenalty != 0 { +// predictOptions = append(predictOptions, ggllm.SetPenalty(c.RepeatPenalty)) +// } + +// if c.Keep != 0 { +// predictOptions = append(predictOptions, ggllm.SetNKeep(c.Keep)) +// } + +// if c.Batch != 0 { +// predictOptions = append(predictOptions, ggllm.SetBatch(c.Batch)) +// } + +// if c.IgnoreEOS { +// predictOptions = append(predictOptions, ggllm.IgnoreEOS) +// } + +// if c.Seed != 0 { +// predictOptions = append(predictOptions, ggllm.SetSeed(c.Seed)) +// } + +// //predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed)) + +// predictOptions = append(predictOptions, ggllm.SetFrequencyPenalty(c.FrequencyPenalty)) +// predictOptions = append(predictOptions, ggllm.SetMlock(c.MMlock)) +// predictOptions = append(predictOptions, ggllm.SetMemoryMap(c.MMap)) +// predictOptions = append(predictOptions, ggllm.SetPredictionMainGPU(c.MainGPU)) +// predictOptions = append(predictOptions, ggllm.SetPredictionTensorSplit(c.TensorSplit)) +// predictOptions = append(predictOptions, ggllm.SetTailFreeSamplingZ(c.TFZ)) +// predictOptions = append(predictOptions, ggllm.SetTypicalP(c.TypicalP)) + +// return predictOptions +// } + func defaultLLamaOpts(c Config) []llama.ModelOption { llamaOpts := []llama.ModelOption{} if c.ContextSize != 0 { @@ -59,11 +217,99 @@ func defaultLLamaOpts(c Config) []llama.ModelOption { return llamaOpts } +func buildLLamaPredictOptions(c Config, modelPath string) []llama.PredictOption { + // Generate the prediction using the language model + predictOptions := []llama.PredictOption{ + llama.SetTemperature(c.Temperature), + llama.SetTopP(c.TopP), + llama.SetTopK(c.TopK), + llama.SetTokens(c.Maxtokens), + llama.SetThreads(c.Threads), + } + + if c.PromptCacheAll { + predictOptions = append(predictOptions, llama.EnablePromptCacheAll) + } + + if c.PromptCacheRO { + predictOptions = append(predictOptions, llama.EnablePromptCacheRO) + } + + predictOptions = append(predictOptions, llama.WithGrammar(c.Grammar)) + + if c.PromptCachePath != "" { + // Create parent directory + p := filepath.Join(modelPath, c.PromptCachePath) + os.MkdirAll(filepath.Dir(p), 0755) + predictOptions = append(predictOptions, llama.SetPathPromptCache(p)) + } + + if c.Mirostat != 0 { + predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat)) + } + + if c.MirostatETA != 0 { + predictOptions = append(predictOptions, llama.SetMirostatETA(c.MirostatETA)) + } + + if c.MirostatTAU != 0 { + predictOptions = append(predictOptions, llama.SetMirostatTAU(c.MirostatTAU)) + } + + if c.Debug { + predictOptions = append(predictOptions, llama.Debug) + } + + predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...)) + + if c.RepeatPenalty != 0 { + predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty)) + } + + if c.Keep != 0 { + predictOptions = append(predictOptions, llama.SetNKeep(c.Keep)) + } + + if c.Batch != 0 { + predictOptions = append(predictOptions, llama.SetBatch(c.Batch)) + } + + if c.F16 { + predictOptions = append(predictOptions, llama.EnableF16KV) + } + + if c.IgnoreEOS { + predictOptions = append(predictOptions, llama.IgnoreEOS) + } + + if c.Seed != 0 { + predictOptions = append(predictOptions, llama.SetSeed(c.Seed)) + } + + //predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed)) + + predictOptions = append(predictOptions, llama.SetFrequencyPenalty(c.FrequencyPenalty)) + predictOptions = append(predictOptions, llama.SetMlock(c.MMlock)) + predictOptions = append(predictOptions, llama.SetMemoryMap(c.MMap)) + predictOptions = append(predictOptions, llama.SetPredictionMainGPU(c.MainGPU)) + predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(c.TensorSplit)) + predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(c.TFZ)) + predictOptions = append(predictOptions, llama.SetTypicalP(c.TypicalP)) + + return predictOptions +} + func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string, loader *model.ModelLoader, c Config, o *Option) (func() error, error) { if c.Backend != model.StableDiffusionBackend { return nil, fmt.Errorf("endpoint only working with stablediffusion models") } - inferenceModel, err := loader.BackendLoader(c.Backend, c.ImageGenerationAssets, []llama.ModelOption{}, uint32(c.Threads), o.assetsDestination) + + inferenceModel, err := loader.BackendLoader( + model.WithBackendString(c.Backend), + model.WithAssetDir(o.assetsDestination), + model.WithThreads(uint32(c.Threads)), + model.WithModelFile(c.ImageGenerationAssets), + ) if err != nil { return nil, err } @@ -106,13 +352,24 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config, modelFile := c.Model llamaOpts := defaultLLamaOpts(c) + grpcOpts := gRPCModelOpts(c) var inferenceModel interface{} var err error + + opts := []model.Option{ + model.WithLlamaOpts(llamaOpts...), + model.WithLoadGRPCOpts(grpcOpts), + model.WithThreads(uint32(c.Threads)), + model.WithAssetDir(o.assetsDestination), + model.WithModelFile(modelFile), + } + if c.Backend == "" { - inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads), o.assetsDestination) + inferenceModel, err = loader.GreedyLoader(opts...) } else { - inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads), o.assetsDestination) + opts = append(opts, model.WithBackendString(c.Backend)) + inferenceModel, err = loader.BackendLoader(opts...) } if err != nil { return nil, err @@ -171,100 +428,29 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config, }, nil } -func buildLLamaPredictOptions(c Config, modelPath string) []llama.PredictOption { - // Generate the prediction using the language model - predictOptions := []llama.PredictOption{ - llama.SetTemperature(c.Temperature), - llama.SetTopP(c.TopP), - llama.SetTopK(c.TopK), - llama.SetTokens(c.Maxtokens), - llama.SetThreads(c.Threads), - } - - if c.PromptCacheAll { - predictOptions = append(predictOptions, llama.EnablePromptCacheAll) - } - - if c.PromptCacheRO { - predictOptions = append(predictOptions, llama.EnablePromptCacheRO) - } - - predictOptions = append(predictOptions, llama.WithGrammar(c.Grammar)) - - if c.PromptCachePath != "" { - // Create parent directory - p := filepath.Join(modelPath, c.PromptCachePath) - os.MkdirAll(filepath.Dir(p), 0755) - predictOptions = append(predictOptions, llama.SetPathPromptCache(p)) - } - - if c.Mirostat != 0 { - predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat)) - } - - if c.MirostatETA != 0 { - predictOptions = append(predictOptions, llama.SetMirostatETA(c.MirostatETA)) - } - - if c.MirostatTAU != 0 { - predictOptions = append(predictOptions, llama.SetMirostatTAU(c.MirostatTAU)) - } - - if c.Debug { - predictOptions = append(predictOptions, llama.Debug) - } - - predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...)) - - if c.RepeatPenalty != 0 { - predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty)) - } - - if c.Keep != 0 { - predictOptions = append(predictOptions, llama.SetNKeep(c.Keep)) - } - - if c.Batch != 0 { - predictOptions = append(predictOptions, llama.SetBatch(c.Batch)) - } - - if c.F16 { - predictOptions = append(predictOptions, llama.EnableF16KV) - } - - if c.IgnoreEOS { - predictOptions = append(predictOptions, llama.IgnoreEOS) - } - - if c.Seed != 0 { - predictOptions = append(predictOptions, llama.SetSeed(c.Seed)) - } - - //predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed)) - - predictOptions = append(predictOptions, llama.SetFrequencyPenalty(c.FrequencyPenalty)) - predictOptions = append(predictOptions, llama.SetMlock(c.MMlock)) - predictOptions = append(predictOptions, llama.SetMemoryMap(c.MMap)) - predictOptions = append(predictOptions, llama.SetPredictionMainGPU(c.MainGPU)) - predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(c.TensorSplit)) - predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(c.TFZ)) - predictOptions = append(predictOptions, llama.SetTypicalP(c.TypicalP)) - - return predictOptions -} - func ModelInference(s string, loader *model.ModelLoader, c Config, o *Option, tokenCallback func(string) bool) (func() (string, error), error) { supportStreams := false modelFile := c.Model llamaOpts := defaultLLamaOpts(c) + grpcOpts := gRPCModelOpts(c) var inferenceModel interface{} var err error + + opts := []model.Option{ + model.WithLlamaOpts(llamaOpts...), + model.WithLoadGRPCOpts(grpcOpts), + model.WithThreads(uint32(c.Threads)), + model.WithAssetDir(o.assetsDestination), + model.WithModelFile(modelFile), + } + if c.Backend == "" { - inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads), o.assetsDestination) + inferenceModel, err = loader.GreedyLoader(opts...) } else { - inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads), o.assetsDestination) + opts = append(opts, model.WithBackendString(c.Backend)) + inferenceModel, err = loader.BackendLoader(opts...) } if err != nil { return nil, err @@ -552,6 +738,25 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, o *Option, to model.SetTokenCallback(nil) return str, er } + case *grpc.Client: + // in GRPC, the backend is supposed to answer to 1 single token if stream is not supported + supportStreams = true + fn = func() (string, error) { + + opts := gRPCPredictOpts(c, loader.ModelPath) + opts.Prompt = s + if tokenCallback != nil { + ss := "" + err := model.PredictStream(context.TODO(), opts, func(s string) { + tokenCallback(s) + ss += s + }) + return ss, err + } else { + reply, err := model.Predict(context.TODO(), opts) + return reply.Message, err + } + } case *langchain.HuggingFace: fn = func() (string, error) { diff --git a/cmd/grpc/falcon/main.go b/cmd/grpc/falcon/main.go new file mode 100644 index 0000000..9ccead4 --- /dev/null +++ b/cmd/grpc/falcon/main.go @@ -0,0 +1,25 @@ +package main + +// GRPC Falcon server + +// Note: this is started internally by LocalAI and a server is allocated for each model + +import ( + "flag" + + falcon "github.com/go-skynet/LocalAI/pkg/grpc/llm/falcon" + + grpc "github.com/go-skynet/LocalAI/pkg/grpc" +) + +var ( + addr = flag.String("addr", "localhost:50051", "the address to connect to") +) + +func main() { + flag.Parse() + + if err := grpc.StartServer(*addr, &falcon.LLM{}); err != nil { + panic(err) + } +} diff --git a/go.mod b/go.mod index 0f65978..1d6268c 100644 --- a/go.mod +++ b/go.mod @@ -13,20 +13,25 @@ require ( github.com/gofiber/fiber/v2 v2.47.0 github.com/google/uuid v1.3.0 github.com/hashicorp/go-multierror v1.1.1 + github.com/hpcloud/tail v1.0.0 github.com/imdario/mergo v0.3.16 github.com/json-iterator/go v1.1.12 github.com/mholt/archiver/v3 v3.5.1 + github.com/mudler/go-ggllm.cpp v0.0.0-20230708215552-a6504d5bc137 + github.com/mudler/go-processmanager v0.0.0-20220724164624-c45b5c61312d github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230708212935-d611d107479f github.com/onsi/ginkgo/v2 v2.11.0 github.com/onsi/gomega v1.27.8 github.com/otiai10/openaigo v1.5.2 + github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 github.com/rs/zerolog v1.29.1 github.com/sashabaranov/go-openai v1.13.0 - github.com/swaggo/swag v1.16.1 github.com/tmc/langchaingo v0.0.0-20230709010448-a875e6bc0c54 github.com/urfave/cli/v2 v2.25.7 github.com/valyala/fasthttp v1.48.0 + google.golang.org/grpc v1.56.2 + google.golang.org/protobuf v1.30.0 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 ) @@ -34,8 +39,10 @@ require ( require ( github.com/dlclark/regexp2 v1.8.1 // indirect github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect + github.com/golang/protobuf v1.5.3 // indirect github.com/golang/snappy v0.0.2 // indirect github.com/klauspost/pgzip v1.2.5 // indirect + github.com/kr/text v0.2.0 // indirect github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/nwaples/rardecode v1.1.0 // indirect @@ -43,33 +50,27 @@ require ( github.com/pkoukk/tiktoken-go v0.1.2 // indirect github.com/ulikunitz/xz v0.5.9 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect + google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect + gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect + gopkg.in/fsnotify.v1 v1.4.7 // indirect + gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect ) require ( - github.com/KyleBanks/depth v1.2.1 // indirect - github.com/PuerkitoBio/purell v1.1.1 // indirect - github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect github.com/andybalholm/brotli v1.0.5 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect github.com/go-audio/audio v1.0.0 // indirect github.com/go-audio/riff v1.0.0 // indirect github.com/go-logr/logr v1.2.4 // indirect - github.com/go-openapi/jsonpointer v0.19.5 // indirect - github.com/go-openapi/jsonreference v0.19.6 // indirect - github.com/go-openapi/spec v0.20.4 // indirect - github.com/go-openapi/swag v0.22.3 // indirect github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect github.com/google/go-cmp v0.5.9 // indirect github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect github.com/hashicorp/errwrap v1.0.0 // indirect - github.com/josharian/intern v1.0.0 // indirect github.com/klauspost/compress v1.16.3 // indirect - github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.19 // indirect github.com/mattn/go-runewidth v0.0.14 // indirect github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 - github.com/otiai10/mint v1.6.1 // indirect github.com/philhofer/fwd v1.1.2 // indirect github.com/rivo/uniseg v0.2.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect diff --git a/go.sum b/go.sum index 81f81e7..2906f50 100644 --- a/go.sum +++ b/go.sum @@ -1,9 +1,3 @@ -github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= -github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= -github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI= -github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= -github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M= -github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= @@ -19,13 +13,12 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dlclark/regexp2 v1.8.1 h1:6Lcdwya6GjPUNsBct8Lg/yRPwMhABj269AAzdGSiR+0= github.com/dlclark/regexp2 v1.8.1/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= -github.com/donomii/go-rwkv.cpp v0.0.0-20230619005719-f5a8c4539674 h1:G70Yf/QOCEL1v24idWnGd6rJsbqiGkJAJnMaWaolzEg= -github.com/donomii/go-rwkv.cpp v0.0.0-20230619005719-f5a8c4539674/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM= github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY= github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= -github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230606002726-57543c169e27 h1:boeMTUUBtnLU8JElZJHXrsUzROJar9/t6vGOFjkrhhI= -github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230606002726-57543c169e27/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e h1:KtbU2JR3lJuXFASHG2+sVLucfMPBjWKUUKByX6C81mQ= github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4= @@ -36,47 +29,28 @@ github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g= github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE= github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= -github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY= -github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= -github.com/go-openapi/jsonreference v0.19.6 h1:UBIxjkht+AWIgYzCDSv2GN+E/togfwXUJFRTWhl2Jjs= -github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns= -github.com/go-openapi/spec v0.20.4 h1:O8hJrt0UMnhHcluhIdUgCLRWyM2x7QkBXRvOs7m+O1M= -github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7FOEWeq8I= -github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= -github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM= -github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= -github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= -github.com/go-skynet/bloomz.cpp v0.0.0-20230529155654-1834e77b83fa h1:gxr68r/6EWroay4iI81jxqGCDbKotY4+CiwdUkBz2NQ= -github.com/go-skynet/bloomz.cpp v0.0.0-20230529155654-1834e77b83fa/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA= -github.com/go-skynet/go-bert.cpp v0.0.0-20230607105116-6069103f54b9 h1:wRGbDwNwPmSzoXVw/HLzXY4blpRvPWg7QW2OA0WKezA= -github.com/go-skynet/go-bert.cpp v0.0.0-20230607105116-6069103f54b9/go.mod h1:pXKCpYYXujMeAvgJHU6WoMfvYbr84563+J8+Ebkyr5U= -github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230617123349-32b9223ccdb1 h1:jVGgzDSfpjD/0jl/ChpGI+O4EHSAeeU6DK7IyhH8PK8= -github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230617123349-32b9223ccdb1/go.mod h1:31j1odgFXP8hDSUVfH0zErKI5aYVP18ddYnPkwCso2A= -github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230620192816-a459d2726792 h1:rozZ9gWGzq0ZhBsNCWqfLTRCebaxwTsxLMnflwe6rDU= -github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230620192816-a459d2726792/go.mod h1:31j1odgFXP8hDSUVfH0zErKI5aYVP18ddYnPkwCso2A= -github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230626202628-8e31841dcddc h1:SrNxH4U8W6cqurbxpXxm9rzifeDsCgecRT73kT0BRq0= -github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230626202628-8e31841dcddc/go.mod h1:31j1odgFXP8hDSUVfH0zErKI5aYVP18ddYnPkwCso2A= -github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230630204211-3fec197a1dc4 h1:LScGc8yWTS9wbS2RTOq6s+waeHElLIQDJg2SUCwrO3E= -github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230630204211-3fec197a1dc4/go.mod h1:31j1odgFXP8hDSUVfH0zErKI5aYVP18ddYnPkwCso2A= -github.com/go-skynet/go-llama.cpp v0.0.0-20230616223721-7ad833b67070 h1:T771FjB1yQw8j4P5x4ayFrUPNTglzxRIqDjaNkMVIME= -github.com/go-skynet/go-llama.cpp v0.0.0-20230616223721-7ad833b67070/go.mod h1:tzi97YvT1bVQ+iTG39LvpDkKG1WbizgtljC+orSoM40= -github.com/go-skynet/go-llama.cpp v0.0.0-20230626215901-f104111358e8 h1:Knh5QUvI/68erb/yWtrVa/3hvoQdENF2dH0hL2HNPrI= -github.com/go-skynet/go-llama.cpp v0.0.0-20230626215901-f104111358e8/go.mod h1:tzi97YvT1bVQ+iTG39LvpDkKG1WbizgtljC+orSoM40= -github.com/go-skynet/go-llama.cpp v0.0.0-20230627195533-582753605210 h1:9bm+vsiR3UI7xlU0G0cMU2Swq78RysoFVkSONvrujF8= -github.com/go-skynet/go-llama.cpp v0.0.0-20230627195533-582753605210/go.mod h1:tzi97YvT1bVQ+iTG39LvpDkKG1WbizgtljC+orSoM40= -github.com/go-skynet/go-llama.cpp v0.0.0-20230628194133-42ba44838369 h1:lSX1NWzRvRS2MlACvyvVVUnqXhKiuMAoN3DO5TbCe8M= -github.com/go-skynet/go-llama.cpp v0.0.0-20230628194133-42ba44838369/go.mod h1:tzi97YvT1bVQ+iTG39LvpDkKG1WbizgtljC+orSoM40= -github.com/go-skynet/go-llama.cpp v0.0.0-20230703203849-ffa57fbc3a12 h1:cfGZiZana0gPD0i8nmyOGTUQGb4N8PYqaBqhhukREPc= -github.com/go-skynet/go-llama.cpp v0.0.0-20230703203849-ffa57fbc3a12/go.mod h1:tzi97YvT1bVQ+iTG39LvpDkKG1WbizgtljC+orSoM40= +github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gofiber/fiber/v2 v2.47.0 h1:EN5lHVCc+Pyqh5OEsk8fzRiifgwpbrP0rulQ4iNf3fs= github.com/gofiber/fiber/v2 v2.47.0/go.mod h1:mbFMVN1lQuzziTkkakgtKKdjfsXSw9BKR5lmcNksUoU= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/snappy v0.0.2 h1:aeE13tS0IiQgFjYdoL8qN3K1N2bXXtI6Vi51/y7BpMw= github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= @@ -89,11 +63,11 @@ github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/U github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= +github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= -github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= -github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= @@ -103,16 +77,12 @@ github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQs github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA= -github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= -github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= @@ -128,33 +98,29 @@ github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OH github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU= -github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig= -github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af h1:XFq6OUqsWQam0OrEr05okXsJK/TQur3zoZTHbiZD3Ks= -github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw= -github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= -github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230620230702-09ae04cee90c h1:axNtjd5k6Xs4Ck7B7VRRQu6q5lQzTsjdWmaJkDADopU= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230620230702-09ae04cee90c/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230628182915-a67f8132e165 h1:zcnIdoSeLueTDxUD2A1qnyaSp8uh0Ay7OgHeBwpxSeg= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230628182915-a67f8132e165/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230708212935-d611d107479f h1:FtXRIjsBvoBQ5xmA26QbzyG4RjV2U5lOpUgP4npITOM= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230708212935-d611d107479f/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= +github.com/mudler/go-processmanager v0.0.0-20220724164624-c45b5c61312d h1:/lAg9vPAAU+s35cDMCx1IyeMn+4OYfCBPqi08Q8vXDg= +github.com/mudler/go-processmanager v0.0.0-20220724164624-c45b5c61312d/go.mod h1:HGGAOJhipApckwNV8ZTliRJqxctUv3xRY+zbQEwuytc= github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ= github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= +github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= +github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= +github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= +github.com/onsi/ginkgo v1.16.4 h1:29JGrr5oVBm5ulCWet69zQkzWipVXIol6ygQUe/EzNc= +github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0= github.com/onsi/ginkgo/v2 v2.11.0 h1:WgqUCUt/lT6yXoQ8Wef0fsNn5cAuMK7+KT9UFRz2tcU= github.com/onsi/ginkgo/v2 v2.11.0/go.mod h1:ZhrRA5XmEE3x3rhlzamx/JJvujdZoJ2uvgI7kR0iZvM= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= +github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= github.com/onsi/gomega v1.27.8 h1:gegWiwZjBsf2DgiSbf5hpokZ98JVDMcWkUiigk6/KXc= github.com/onsi/gomega v1.27.8/go.mod h1:2J8vzI/s+2shY9XHRApDkdgPo1TKT7P2u6fXeJKFnNQ= -github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks= -github.com/otiai10/mint v1.5.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM= -github.com/otiai10/mint v1.6.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM= -github.com/otiai10/openaigo v1.2.0 h1:Whq+uvgqw8NdIsVdixtBKCAI6OdfCJiGPlhUnYJQ6Ag= -github.com/otiai10/openaigo v1.2.0/go.mod h1:792bx6AWTS61weDi2EzKpHHnTF4eDMAlJ5GvAk/mgPg= -github.com/otiai10/openaigo v1.4.0 h1:BeacKb2Q5bVejjOKHFJxL2WFYal3QxwkrKtKuoU5LNU= -github.com/otiai10/openaigo v1.4.0/go.mod h1:kIaXc3V+Xy5JLplcBxehVyGYDtufHp3PFPy04jOwOAI= +github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg= github.com/otiai10/openaigo v1.5.2 h1:YnNDisZmA4syArF3IxMCIrfgZOq30PLV219gPY7n2z8= github.com/otiai10/openaigo v1.5.2/go.mod h1:kIaXc3V+Xy5JLplcBxehVyGYDtufHp3PFPy04jOwOAI= +github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 h1:Ii+DKncOVM8Cu1Hc+ETb5K+23HdAMvESYE3ZJ5b5cMI= +github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5/go.mod h1:iIss55rKnNBTvrwdmkUpLnDpZoAHvWaiq5+iMmen4AE= github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw= github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0= @@ -172,8 +138,6 @@ github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc= github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/sashabaranov/go-openai v1.11.3 h1:bvwWF8hj4UhPlswBdL9/IfOpaHXfzGCJO8WY8ml9sGc= -github.com/sashabaranov/go-openai v1.11.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/sashabaranov/go-openai v1.13.0 h1:EAusFfnhaMaaUspUZ2+MbB/ZcVeD4epJmTOlZ+8AcAE= github.com/sashabaranov/go-openai v1.13.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4= @@ -181,26 +145,14 @@ github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3 github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4= github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee h1:8Iv5m6xEo1NR1AvpV+7XmhI4r39LGNzwUL4YpMuL5vk= github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee/go.mod h1:qwtSXrKuJh/zsFQ12yEE89xfCrGKK63Rr7ctU/uCo4g= -github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= -github.com/swaggo/swag v1.16.1 h1:fTNRhKstPKxcnoKsytm4sahr8FaYzUcT7i1/3nd/fBg= -github.com/swaggo/swag v1.16.1/go.mod h1:9/LMvHycG3NFHfR6LwvikHv5iFvmPADQ359cKikGxto= github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw= github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0= github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw= -github.com/tmc/langchaingo v0.0.0-20230616220619-1b3da4433944 h1:EE9fvNENTdRc/yI/1zAs7VFbmDk6JZ7EbBIFl+TsCm0= -github.com/tmc/langchaingo v0.0.0-20230616220619-1b3da4433944/go.mod h1:6l1WoyqVDwkv7cFlY3gfcTv8yVowVyuutKv8PGlQCWI= -github.com/tmc/langchaingo v0.0.0-20230625081011-4d9d55dbcaba h1:NpAI9C0y9T4jwP7XFShwYJKGf/ggyCgZEtL/7lLRPwE= -github.com/tmc/langchaingo v0.0.0-20230625081011-4d9d55dbcaba/go.mod h1:tz9cjA9BW8/lWx/T5njr3ZLHK/dfPyr/0ICSMThmY2g= -github.com/tmc/langchaingo v0.0.0-20230625234550-7ea734523e39 h1:SpOEFXx5xXLypFnwNRQj7yOC3rMvSylGA5BQW/FAwYc= -github.com/tmc/langchaingo v0.0.0-20230625234550-7ea734523e39/go.mod h1:tz9cjA9BW8/lWx/T5njr3ZLHK/dfPyr/0ICSMThmY2g= -github.com/tmc/langchaingo v0.0.0-20230627220614-633853b5ac3b h1:xUxtya/3KRDn1rcCVZucp2KhjdqSZat9j0hOshSVh2Q= -github.com/tmc/langchaingo v0.0.0-20230627220614-633853b5ac3b/go.mod h1:F1k7uRBLM8jMMEPV3dVtWVNc+W91nxOBRKbJWM/LwpM= -github.com/tmc/langchaingo v0.0.0-20230628165432-e510561c17f9 h1:BooyHg3f058lrPcTLdfC7HTfjO5OGZAgwciQJ5e85l0= -github.com/tmc/langchaingo v0.0.0-20230628165432-e510561c17f9/go.mod h1:F1k7uRBLM8jMMEPV3dVtWVNc+W91nxOBRKbJWM/LwpM= github.com/tmc/langchaingo v0.0.0-20230709010448-a875e6bc0c54 h1:MZSC3/pdBzkoPG49uTRvtEepOQKdbdgaT1aLtaEwxx4= github.com/tmc/langchaingo v0.0.0-20230709010448-a875e6bc0c54/go.mod h1:RsMJqgUynOtr2jWNhUF41R3j6SDkKq9c8UfE0nJYBb4= github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= @@ -228,25 +180,34 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM= +golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE= golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -270,6 +231,7 @@ golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ= golang.org/x/tools v0.9.3 h1:Gn1I8+64MsuTb/HpH+LmQtNas23LhUVr3rYZ0eKuaMM= @@ -278,15 +240,33 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 h1:KpwkzHKEF7B9Zxg18WzOa7djJ+Ha5DzthMyZYQfEn2A= +google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1/go.mod h1:nKE/iIaLqn2bQwXBg8f1g2Ylh6r5MN5CmZvuzZCgsCU= +google.golang.org/grpc v1.56.2 h1:fVRFRnXvU+x6C4IlHZewvJOVHoOv1TUuQyoRsYnB4bI= +google.golang.org/grpc v1.56.2/go.mod h1:I9bI3vqKfayGqPUAwGdOSu7kt6oIJLixfffKrpXqQ9s= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= +google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= -gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/op/go-logging.v1 v1.0.0-20160211212156-b2cb9fa56473/go.mod h1:N1eN2tsCx0Ydtgjl4cqmbRCsY4/+z4cYDeqwZTk6zog= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/grpc/client.go b/pkg/grpc/client.go new file mode 100644 index 0000000..f63a89a --- /dev/null +++ b/pkg/grpc/client.go @@ -0,0 +1,98 @@ +package grpc + +import ( + "context" + "fmt" + "io" + "time" + + pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +type Client struct { + address string +} + +func NewClient(address string) *Client { + return &Client{ + address: address, + } +} + +func (c *Client) HealthCheck(ctx context.Context) bool { + conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + fmt.Println(err) + return false + } + defer conn.Close() + client := pb.NewLLMClient(conn) + + // The healthcheck call shouldn't take long time + ctx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + res, err := client.Health(ctx, &pb.HealthMessage{}) + if err != nil { + fmt.Println(err) + + return false + } + + if res.Message == "OK" { + return true + } + return false +} + +func (c *Client) Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error) { + conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return nil, err + } + defer conn.Close() + client := pb.NewLLMClient(conn) + + return client.Predict(ctx, in, opts...) +} + +func (c *Client) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error) { + conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return nil, err + } + defer conn.Close() + client := pb.NewLLMClient(conn) + return client.LoadModel(ctx, in, opts...) +} + +func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s string), opts ...grpc.CallOption) error { + conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return err + } + defer conn.Close() + client := pb.NewLLMClient(conn) + + stream, err := client.PredictStream(ctx, in, opts...) + if err != nil { + return err + } + + for { + feature, err := stream.Recv() + if err == io.EOF { + break + } + if err != nil { + fmt.Println("Error", err) + + return err + } + f(feature.GetMessage()) + } + + return nil +} diff --git a/pkg/grpc/interface.go b/pkg/grpc/interface.go new file mode 100644 index 0000000..8ac851a --- /dev/null +++ b/pkg/grpc/interface.go @@ -0,0 +1,11 @@ +package grpc + +import ( + pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" +) + +type LLM interface { + Predict(*pb.PredictOptions) (string, error) + PredictStream(*pb.PredictOptions, chan string) + Load(*pb.ModelOptions) error +} diff --git a/pkg/grpc/llm/falcon/falcon.go b/pkg/grpc/llm/falcon/falcon.go new file mode 100644 index 0000000..a0a53be --- /dev/null +++ b/pkg/grpc/llm/falcon/falcon.go @@ -0,0 +1,136 @@ +package falcon + +// This is a wrapper to statisfy the GRPC service interface +// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) +import ( + "fmt" + + pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" + + ggllm "github.com/mudler/go-ggllm.cpp" +) + +type LLM struct { + falcon *ggllm.Falcon +} + +func (llm *LLM) Load(opts *pb.ModelOptions) error { + ggllmOpts := []ggllm.ModelOption{} + if opts.ContextSize != 0 { + ggllmOpts = append(ggllmOpts, ggllm.SetContext(int(opts.ContextSize))) + } + // F16 doesn't seem to produce good output at all! + //if c.F16 { + // llamaOpts = append(llamaOpts, llama.EnableF16Memory) + //} + + if opts.NGPULayers != 0 { + ggllmOpts = append(ggllmOpts, ggllm.SetGPULayers(int(opts.NGPULayers))) + } + + ggllmOpts = append(ggllmOpts, ggllm.SetMMap(opts.MMap)) + ggllmOpts = append(ggllmOpts, ggllm.SetMainGPU(opts.MainGPU)) + ggllmOpts = append(ggllmOpts, ggllm.SetTensorSplit(opts.TensorSplit)) + if opts.NBatch != 0 { + ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(int(opts.NBatch))) + } else { + ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(512)) + } + + model, err := ggllm.New(opts.Model, ggllmOpts...) + llm.falcon = model + return err +} + +func buildPredictOptions(opts *pb.PredictOptions) []ggllm.PredictOption { + predictOptions := []ggllm.PredictOption{ + ggllm.SetTemperature(float64(opts.Temperature)), + ggllm.SetTopP(float64(opts.TopP)), + ggllm.SetTopK(int(opts.TopK)), + ggllm.SetTokens(int(opts.Tokens)), + ggllm.SetThreads(int(opts.Threads)), + } + + if opts.PromptCacheAll { + predictOptions = append(predictOptions, ggllm.EnablePromptCacheAll) + } + + if opts.PromptCacheRO { + predictOptions = append(predictOptions, ggllm.EnablePromptCacheRO) + } + + // Expected absolute path + if opts.PromptCachePath != "" { + predictOptions = append(predictOptions, ggllm.SetPathPromptCache(opts.PromptCachePath)) + } + + if opts.Mirostat != 0 { + predictOptions = append(predictOptions, ggllm.SetMirostat(int(opts.Mirostat))) + } + + if opts.MirostatETA != 0 { + predictOptions = append(predictOptions, ggllm.SetMirostatETA(float64(opts.MirostatETA))) + } + + if opts.MirostatTAU != 0 { + predictOptions = append(predictOptions, ggllm.SetMirostatTAU(float64(opts.MirostatTAU))) + } + + if opts.Debug { + predictOptions = append(predictOptions, ggllm.Debug) + } + + predictOptions = append(predictOptions, ggllm.SetStopWords(opts.StopPrompts...)) + + if opts.PresencePenalty != 0 { + predictOptions = append(predictOptions, ggllm.SetPenalty(float64(opts.PresencePenalty))) + } + + if opts.NKeep != 0 { + predictOptions = append(predictOptions, ggllm.SetNKeep(int(opts.NKeep))) + } + + if opts.Batch != 0 { + predictOptions = append(predictOptions, ggllm.SetBatch(int(opts.Batch))) + } + + if opts.IgnoreEOS { + predictOptions = append(predictOptions, ggllm.IgnoreEOS) + } + + if opts.Seed != 0 { + predictOptions = append(predictOptions, ggllm.SetSeed(int(opts.Seed))) + } + + //predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed)) + + predictOptions = append(predictOptions, ggllm.SetFrequencyPenalty(float64(opts.FrequencyPenalty))) + predictOptions = append(predictOptions, ggllm.SetMlock(opts.MLock)) + predictOptions = append(predictOptions, ggllm.SetMemoryMap(opts.MMap)) + predictOptions = append(predictOptions, ggllm.SetPredictionMainGPU(opts.MainGPU)) + predictOptions = append(predictOptions, ggllm.SetPredictionTensorSplit(opts.TensorSplit)) + predictOptions = append(predictOptions, ggllm.SetTailFreeSamplingZ(float64(opts.TailFreeSamplingZ))) + predictOptions = append(predictOptions, ggllm.SetTypicalP(float64(opts.TypicalP))) + return predictOptions +} + +func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) { + return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...) +} + +func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) { + predictOptions := buildPredictOptions(opts) + + predictOptions = append(predictOptions, ggllm.SetTokenCallback(func(token string) bool { + results <- token + return true + })) + + go func() { + _, err := llm.falcon.Predict(opts.Prompt, predictOptions...) + if err != nil { + fmt.Println("err: ", err) + } + close(results) + }() +} diff --git a/pkg/grpc/llm/ggml/starcoder.go b/pkg/grpc/llm/ggml/starcoder.go new file mode 100644 index 0000000..e69de29 diff --git a/pkg/grpc/proto/llmserver.pb.go b/pkg/grpc/proto/llmserver.pb.go new file mode 100644 index 0000000..067c3a1 --- /dev/null +++ b/pkg/grpc/proto/llmserver.pb.go @@ -0,0 +1,870 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.26.0 +// protoc v3.15.8 +// source: pkg/grpc/proto/llmserver.proto + +package proto + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type HealthMessage struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields +} + +func (x *HealthMessage) Reset() { + *x = HealthMessage{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *HealthMessage) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*HealthMessage) ProtoMessage() {} + +func (x *HealthMessage) ProtoReflect() protoreflect.Message { + mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use HealthMessage.ProtoReflect.Descriptor instead. +func (*HealthMessage) Descriptor() ([]byte, []int) { + return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{0} +} + +// The request message containing the user's name. +type PredictOptions struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Prompt string `protobuf:"bytes,1,opt,name=Prompt,proto3" json:"Prompt,omitempty"` + Seed int32 `protobuf:"varint,2,opt,name=Seed,proto3" json:"Seed,omitempty"` + Threads int32 `protobuf:"varint,3,opt,name=Threads,proto3" json:"Threads,omitempty"` + Tokens int32 `protobuf:"varint,4,opt,name=Tokens,proto3" json:"Tokens,omitempty"` + TopK int32 `protobuf:"varint,5,opt,name=TopK,proto3" json:"TopK,omitempty"` + Repeat int32 `protobuf:"varint,6,opt,name=Repeat,proto3" json:"Repeat,omitempty"` + Batch int32 `protobuf:"varint,7,opt,name=Batch,proto3" json:"Batch,omitempty"` + NKeep int32 `protobuf:"varint,8,opt,name=NKeep,proto3" json:"NKeep,omitempty"` + Temperature float32 `protobuf:"fixed32,9,opt,name=Temperature,proto3" json:"Temperature,omitempty"` + Penalty float32 `protobuf:"fixed32,10,opt,name=Penalty,proto3" json:"Penalty,omitempty"` + F16KV bool `protobuf:"varint,11,opt,name=F16KV,proto3" json:"F16KV,omitempty"` + DebugMode bool `protobuf:"varint,12,opt,name=DebugMode,proto3" json:"DebugMode,omitempty"` + StopPrompts []string `protobuf:"bytes,13,rep,name=StopPrompts,proto3" json:"StopPrompts,omitempty"` + IgnoreEOS bool `protobuf:"varint,14,opt,name=IgnoreEOS,proto3" json:"IgnoreEOS,omitempty"` + TailFreeSamplingZ float32 `protobuf:"fixed32,15,opt,name=TailFreeSamplingZ,proto3" json:"TailFreeSamplingZ,omitempty"` + TypicalP float32 `protobuf:"fixed32,16,opt,name=TypicalP,proto3" json:"TypicalP,omitempty"` + FrequencyPenalty float32 `protobuf:"fixed32,17,opt,name=FrequencyPenalty,proto3" json:"FrequencyPenalty,omitempty"` + PresencePenalty float32 `protobuf:"fixed32,18,opt,name=PresencePenalty,proto3" json:"PresencePenalty,omitempty"` + Mirostat int32 `protobuf:"varint,19,opt,name=Mirostat,proto3" json:"Mirostat,omitempty"` + MirostatETA float32 `protobuf:"fixed32,20,opt,name=MirostatETA,proto3" json:"MirostatETA,omitempty"` + MirostatTAU float32 `protobuf:"fixed32,21,opt,name=MirostatTAU,proto3" json:"MirostatTAU,omitempty"` + PenalizeNL bool `protobuf:"varint,22,opt,name=PenalizeNL,proto3" json:"PenalizeNL,omitempty"` + LogitBias string `protobuf:"bytes,23,opt,name=LogitBias,proto3" json:"LogitBias,omitempty"` + PathPromptCache string `protobuf:"bytes,24,opt,name=PathPromptCache,proto3" json:"PathPromptCache,omitempty"` + MLock bool `protobuf:"varint,25,opt,name=MLock,proto3" json:"MLock,omitempty"` + MMap bool `protobuf:"varint,26,opt,name=MMap,proto3" json:"MMap,omitempty"` + PromptCacheAll bool `protobuf:"varint,27,opt,name=PromptCacheAll,proto3" json:"PromptCacheAll,omitempty"` + PromptCacheRO bool `protobuf:"varint,28,opt,name=PromptCacheRO,proto3" json:"PromptCacheRO,omitempty"` + Grammar string `protobuf:"bytes,29,opt,name=Grammar,proto3" json:"Grammar,omitempty"` + MainGPU string `protobuf:"bytes,30,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"` + TensorSplit string `protobuf:"bytes,31,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"` + TopP float32 `protobuf:"fixed32,32,opt,name=TopP,proto3" json:"TopP,omitempty"` + PromptCachePath string `protobuf:"bytes,33,opt,name=PromptCachePath,proto3" json:"PromptCachePath,omitempty"` + Debug bool `protobuf:"varint,34,opt,name=Debug,proto3" json:"Debug,omitempty"` +} + +func (x *PredictOptions) Reset() { + *x = PredictOptions{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *PredictOptions) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PredictOptions) ProtoMessage() {} + +func (x *PredictOptions) ProtoReflect() protoreflect.Message { + mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PredictOptions.ProtoReflect.Descriptor instead. +func (*PredictOptions) Descriptor() ([]byte, []int) { + return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{1} +} + +func (x *PredictOptions) GetPrompt() string { + if x != nil { + return x.Prompt + } + return "" +} + +func (x *PredictOptions) GetSeed() int32 { + if x != nil { + return x.Seed + } + return 0 +} + +func (x *PredictOptions) GetThreads() int32 { + if x != nil { + return x.Threads + } + return 0 +} + +func (x *PredictOptions) GetTokens() int32 { + if x != nil { + return x.Tokens + } + return 0 +} + +func (x *PredictOptions) GetTopK() int32 { + if x != nil { + return x.TopK + } + return 0 +} + +func (x *PredictOptions) GetRepeat() int32 { + if x != nil { + return x.Repeat + } + return 0 +} + +func (x *PredictOptions) GetBatch() int32 { + if x != nil { + return x.Batch + } + return 0 +} + +func (x *PredictOptions) GetNKeep() int32 { + if x != nil { + return x.NKeep + } + return 0 +} + +func (x *PredictOptions) GetTemperature() float32 { + if x != nil { + return x.Temperature + } + return 0 +} + +func (x *PredictOptions) GetPenalty() float32 { + if x != nil { + return x.Penalty + } + return 0 +} + +func (x *PredictOptions) GetF16KV() bool { + if x != nil { + return x.F16KV + } + return false +} + +func (x *PredictOptions) GetDebugMode() bool { + if x != nil { + return x.DebugMode + } + return false +} + +func (x *PredictOptions) GetStopPrompts() []string { + if x != nil { + return x.StopPrompts + } + return nil +} + +func (x *PredictOptions) GetIgnoreEOS() bool { + if x != nil { + return x.IgnoreEOS + } + return false +} + +func (x *PredictOptions) GetTailFreeSamplingZ() float32 { + if x != nil { + return x.TailFreeSamplingZ + } + return 0 +} + +func (x *PredictOptions) GetTypicalP() float32 { + if x != nil { + return x.TypicalP + } + return 0 +} + +func (x *PredictOptions) GetFrequencyPenalty() float32 { + if x != nil { + return x.FrequencyPenalty + } + return 0 +} + +func (x *PredictOptions) GetPresencePenalty() float32 { + if x != nil { + return x.PresencePenalty + } + return 0 +} + +func (x *PredictOptions) GetMirostat() int32 { + if x != nil { + return x.Mirostat + } + return 0 +} + +func (x *PredictOptions) GetMirostatETA() float32 { + if x != nil { + return x.MirostatETA + } + return 0 +} + +func (x *PredictOptions) GetMirostatTAU() float32 { + if x != nil { + return x.MirostatTAU + } + return 0 +} + +func (x *PredictOptions) GetPenalizeNL() bool { + if x != nil { + return x.PenalizeNL + } + return false +} + +func (x *PredictOptions) GetLogitBias() string { + if x != nil { + return x.LogitBias + } + return "" +} + +func (x *PredictOptions) GetPathPromptCache() string { + if x != nil { + return x.PathPromptCache + } + return "" +} + +func (x *PredictOptions) GetMLock() bool { + if x != nil { + return x.MLock + } + return false +} + +func (x *PredictOptions) GetMMap() bool { + if x != nil { + return x.MMap + } + return false +} + +func (x *PredictOptions) GetPromptCacheAll() bool { + if x != nil { + return x.PromptCacheAll + } + return false +} + +func (x *PredictOptions) GetPromptCacheRO() bool { + if x != nil { + return x.PromptCacheRO + } + return false +} + +func (x *PredictOptions) GetGrammar() string { + if x != nil { + return x.Grammar + } + return "" +} + +func (x *PredictOptions) GetMainGPU() string { + if x != nil { + return x.MainGPU + } + return "" +} + +func (x *PredictOptions) GetTensorSplit() string { + if x != nil { + return x.TensorSplit + } + return "" +} + +func (x *PredictOptions) GetTopP() float32 { + if x != nil { + return x.TopP + } + return 0 +} + +func (x *PredictOptions) GetPromptCachePath() string { + if x != nil { + return x.PromptCachePath + } + return "" +} + +func (x *PredictOptions) GetDebug() bool { + if x != nil { + return x.Debug + } + return false +} + +// The response message containing the result +type Reply struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Message string `protobuf:"bytes,1,opt,name=message,proto3" json:"message,omitempty"` +} + +func (x *Reply) Reset() { + *x = Reply{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Reply) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Reply) ProtoMessage() {} + +func (x *Reply) ProtoReflect() protoreflect.Message { + mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Reply.ProtoReflect.Descriptor instead. +func (*Reply) Descriptor() ([]byte, []int) { + return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{2} +} + +func (x *Reply) GetMessage() string { + if x != nil { + return x.Message + } + return "" +} + +type ModelOptions struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Model string `protobuf:"bytes,1,opt,name=Model,proto3" json:"Model,omitempty"` + ContextSize int32 `protobuf:"varint,2,opt,name=ContextSize,proto3" json:"ContextSize,omitempty"` + Seed int32 `protobuf:"varint,3,opt,name=Seed,proto3" json:"Seed,omitempty"` + NBatch int32 `protobuf:"varint,4,opt,name=NBatch,proto3" json:"NBatch,omitempty"` + F16Memory bool `protobuf:"varint,5,opt,name=F16Memory,proto3" json:"F16Memory,omitempty"` + MLock bool `protobuf:"varint,6,opt,name=MLock,proto3" json:"MLock,omitempty"` + MMap bool `protobuf:"varint,7,opt,name=MMap,proto3" json:"MMap,omitempty"` + VocabOnly bool `protobuf:"varint,8,opt,name=VocabOnly,proto3" json:"VocabOnly,omitempty"` + LowVRAM bool `protobuf:"varint,9,opt,name=LowVRAM,proto3" json:"LowVRAM,omitempty"` + Embeddings bool `protobuf:"varint,10,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"` + NUMA bool `protobuf:"varint,11,opt,name=NUMA,proto3" json:"NUMA,omitempty"` + NGPULayers int32 `protobuf:"varint,12,opt,name=NGPULayers,proto3" json:"NGPULayers,omitempty"` + MainGPU string `protobuf:"bytes,13,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"` + TensorSplit string `protobuf:"bytes,14,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"` +} + +func (x *ModelOptions) Reset() { + *x = ModelOptions{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ModelOptions) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ModelOptions) ProtoMessage() {} + +func (x *ModelOptions) ProtoReflect() protoreflect.Message { + mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[3] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ModelOptions.ProtoReflect.Descriptor instead. +func (*ModelOptions) Descriptor() ([]byte, []int) { + return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{3} +} + +func (x *ModelOptions) GetModel() string { + if x != nil { + return x.Model + } + return "" +} + +func (x *ModelOptions) GetContextSize() int32 { + if x != nil { + return x.ContextSize + } + return 0 +} + +func (x *ModelOptions) GetSeed() int32 { + if x != nil { + return x.Seed + } + return 0 +} + +func (x *ModelOptions) GetNBatch() int32 { + if x != nil { + return x.NBatch + } + return 0 +} + +func (x *ModelOptions) GetF16Memory() bool { + if x != nil { + return x.F16Memory + } + return false +} + +func (x *ModelOptions) GetMLock() bool { + if x != nil { + return x.MLock + } + return false +} + +func (x *ModelOptions) GetMMap() bool { + if x != nil { + return x.MMap + } + return false +} + +func (x *ModelOptions) GetVocabOnly() bool { + if x != nil { + return x.VocabOnly + } + return false +} + +func (x *ModelOptions) GetLowVRAM() bool { + if x != nil { + return x.LowVRAM + } + return false +} + +func (x *ModelOptions) GetEmbeddings() bool { + if x != nil { + return x.Embeddings + } + return false +} + +func (x *ModelOptions) GetNUMA() bool { + if x != nil { + return x.NUMA + } + return false +} + +func (x *ModelOptions) GetNGPULayers() int32 { + if x != nil { + return x.NGPULayers + } + return 0 +} + +func (x *ModelOptions) GetMainGPU() string { + if x != nil { + return x.MainGPU + } + return "" +} + +func (x *ModelOptions) GetTensorSplit() string { + if x != nil { + return x.TensorSplit + } + return "" +} + +type Result struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Message string `protobuf:"bytes,1,opt,name=message,proto3" json:"message,omitempty"` + Success bool `protobuf:"varint,2,opt,name=success,proto3" json:"success,omitempty"` +} + +func (x *Result) Reset() { + *x = Result{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Result) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Result) ProtoMessage() {} + +func (x *Result) ProtoReflect() protoreflect.Message { + mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[4] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Result.ProtoReflect.Descriptor instead. +func (*Result) Descriptor() ([]byte, []int) { + return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{4} +} + +func (x *Result) GetMessage() string { + if x != nil { + return x.Message + } + return "" +} + +func (x *Result) GetSuccess() bool { + if x != nil { + return x.Success + } + return false +} + +var File_pkg_grpc_proto_llmserver_proto protoreflect.FileDescriptor + +var file_pkg_grpc_proto_llmserver_proto_rawDesc = []byte{ + 0x0a, 0x1e, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x2f, 0x6c, 0x6c, 0x6d, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x12, 0x03, 0x6c, 0x6c, 0x6d, 0x22, 0x0f, 0x0a, 0x0d, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, + 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x80, 0x08, 0x0a, 0x0e, 0x50, 0x72, 0x65, 0x64, 0x69, + 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x50, 0x72, 0x6f, + 0x6d, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, + 0x74, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, + 0x16, 0x0a, 0x06, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x06, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x18, + 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x12, 0x16, 0x0a, 0x06, 0x52, + 0x65, 0x70, 0x65, 0x61, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x52, 0x65, 0x70, + 0x65, 0x61, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x42, 0x61, 0x74, 0x63, 0x68, 0x18, 0x07, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x05, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x4e, 0x4b, 0x65, + 0x65, 0x70, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x4e, 0x4b, 0x65, 0x65, 0x70, 0x12, + 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x18, 0x09, + 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x54, 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, + 0x65, 0x12, 0x18, 0x0a, 0x07, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x0a, 0x20, 0x01, + 0x28, 0x02, 0x52, 0x07, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x46, + 0x31, 0x36, 0x4b, 0x56, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x46, 0x31, 0x36, 0x4b, + 0x56, 0x12, 0x1c, 0x0a, 0x09, 0x44, 0x65, 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x18, 0x0c, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x44, 0x65, 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x12, + 0x20, 0x0a, 0x0b, 0x53, 0x74, 0x6f, 0x70, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x73, 0x18, 0x0d, + 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x53, 0x74, 0x6f, 0x70, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, + 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x49, 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x18, 0x0e, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x49, 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x12, + 0x2c, 0x0a, 0x11, 0x54, 0x61, 0x69, 0x6c, 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, 0x70, 0x6c, + 0x69, 0x6e, 0x67, 0x5a, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x11, 0x54, 0x61, 0x69, 0x6c, + 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a, 0x12, 0x1a, 0x0a, + 0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, 0x6c, 0x50, 0x18, 0x10, 0x20, 0x01, 0x28, 0x02, 0x52, + 0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, 0x6c, 0x50, 0x12, 0x2a, 0x0a, 0x10, 0x46, 0x72, 0x65, + 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x11, 0x20, + 0x01, 0x28, 0x02, 0x52, 0x10, 0x46, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, 0x65, + 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x28, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, + 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x12, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0f, + 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, + 0x1a, 0x0a, 0x08, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x18, 0x13, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x08, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x12, 0x20, 0x0a, 0x0b, 0x4d, + 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x45, 0x54, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x02, + 0x52, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x45, 0x54, 0x41, 0x12, 0x20, 0x0a, + 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x54, 0x41, 0x55, 0x18, 0x15, 0x20, 0x01, + 0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x54, 0x41, 0x55, 0x12, + 0x1e, 0x0a, 0x0a, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x18, 0x16, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x0a, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x12, + 0x1c, 0x0a, 0x09, 0x4c, 0x6f, 0x67, 0x69, 0x74, 0x42, 0x69, 0x61, 0x73, 0x18, 0x17, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x09, 0x4c, 0x6f, 0x67, 0x69, 0x74, 0x42, 0x69, 0x61, 0x73, 0x12, 0x28, 0x0a, + 0x0f, 0x50, 0x61, 0x74, 0x68, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, + 0x18, 0x18, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x50, 0x61, 0x74, 0x68, 0x50, 0x72, 0x6f, 0x6d, + 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, + 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, + 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, + 0x70, 0x12, 0x26, 0x0a, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, + 0x41, 0x6c, 0x6c, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, + 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x41, 0x6c, 0x6c, 0x12, 0x24, 0x0a, 0x0d, 0x50, 0x72, 0x6f, + 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x18, 0x1c, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x0d, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x12, + 0x18, 0x0a, 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, + 0x6e, 0x47, 0x50, 0x55, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, + 0x47, 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, + 0x69, 0x74, 0x18, 0x1f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, + 0x53, 0x70, 0x6c, 0x69, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x18, 0x20, 0x20, + 0x01, 0x28, 0x02, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x12, 0x28, 0x0a, 0x0f, 0x50, 0x72, 0x6f, + 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, 0x61, 0x74, 0x68, 0x18, 0x21, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x0f, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, + 0x61, 0x74, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x18, 0x22, 0x20, 0x01, + 0x28, 0x08, 0x52, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, + 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x82, 0x03, 0x0a, + 0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, + 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, + 0x64, 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, + 0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, + 0x74, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61, + 0x74, 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, + 0x68, 0x12, 0x1c, 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, + 0x14, 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, + 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63, + 0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f, + 0x63, 0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, + 0x41, 0x4d, 0x18, 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, + 0x4d, 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, + 0x0a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, + 0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, + 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, + 0x65, 0x72, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, + 0x61, 0x79, 0x65, 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, + 0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, + 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, + 0x74, 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, + 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, + 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x32, + 0xc4, 0x01, 0x0a, 0x03, 0x4c, 0x4c, 0x4d, 0x12, 0x2a, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, + 0x68, 0x12, 0x12, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, + 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0a, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x70, 0x6c, + 0x79, 0x22, 0x00, 0x12, 0x2c, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x13, + 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x1a, 0x0a, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, + 0x00, 0x12, 0x2d, 0x0a, 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x11, + 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x1a, 0x0b, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, + 0x12, 0x34, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, + 0x6d, 0x12, 0x13, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0a, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x70, + 0x6c, 0x79, 0x22, 0x00, 0x30, 0x01, 0x42, 0x57, 0x0a, 0x1b, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, + 0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x6c, 0x6c, 0x6d, 0x73, + 0x65, 0x72, 0x76, 0x65, 0x72, 0x42, 0x09, 0x4c, 0x4c, 0x4d, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, + 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, + 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, + 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, + 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_pkg_grpc_proto_llmserver_proto_rawDescOnce sync.Once + file_pkg_grpc_proto_llmserver_proto_rawDescData = file_pkg_grpc_proto_llmserver_proto_rawDesc +) + +func file_pkg_grpc_proto_llmserver_proto_rawDescGZIP() []byte { + file_pkg_grpc_proto_llmserver_proto_rawDescOnce.Do(func() { + file_pkg_grpc_proto_llmserver_proto_rawDescData = protoimpl.X.CompressGZIP(file_pkg_grpc_proto_llmserver_proto_rawDescData) + }) + return file_pkg_grpc_proto_llmserver_proto_rawDescData +} + +var file_pkg_grpc_proto_llmserver_proto_msgTypes = make([]protoimpl.MessageInfo, 5) +var file_pkg_grpc_proto_llmserver_proto_goTypes = []interface{}{ + (*HealthMessage)(nil), // 0: llm.HealthMessage + (*PredictOptions)(nil), // 1: llm.PredictOptions + (*Reply)(nil), // 2: llm.Reply + (*ModelOptions)(nil), // 3: llm.ModelOptions + (*Result)(nil), // 4: llm.Result +} +var file_pkg_grpc_proto_llmserver_proto_depIdxs = []int32{ + 0, // 0: llm.LLM.Health:input_type -> llm.HealthMessage + 1, // 1: llm.LLM.Predict:input_type -> llm.PredictOptions + 3, // 2: llm.LLM.LoadModel:input_type -> llm.ModelOptions + 1, // 3: llm.LLM.PredictStream:input_type -> llm.PredictOptions + 2, // 4: llm.LLM.Health:output_type -> llm.Reply + 2, // 5: llm.LLM.Predict:output_type -> llm.Reply + 4, // 6: llm.LLM.LoadModel:output_type -> llm.Result + 2, // 7: llm.LLM.PredictStream:output_type -> llm.Reply + 4, // [4:8] is the sub-list for method output_type + 0, // [0:4] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_pkg_grpc_proto_llmserver_proto_init() } +func file_pkg_grpc_proto_llmserver_proto_init() { + if File_pkg_grpc_proto_llmserver_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_pkg_grpc_proto_llmserver_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*HealthMessage); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_grpc_proto_llmserver_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PredictOptions); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_grpc_proto_llmserver_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Reply); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_grpc_proto_llmserver_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ModelOptions); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_grpc_proto_llmserver_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Result); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_pkg_grpc_proto_llmserver_proto_rawDesc, + NumEnums: 0, + NumMessages: 5, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_pkg_grpc_proto_llmserver_proto_goTypes, + DependencyIndexes: file_pkg_grpc_proto_llmserver_proto_depIdxs, + MessageInfos: file_pkg_grpc_proto_llmserver_proto_msgTypes, + }.Build() + File_pkg_grpc_proto_llmserver_proto = out.File + file_pkg_grpc_proto_llmserver_proto_rawDesc = nil + file_pkg_grpc_proto_llmserver_proto_goTypes = nil + file_pkg_grpc_proto_llmserver_proto_depIdxs = nil +} diff --git a/pkg/grpc/proto/llmserver.proto b/pkg/grpc/proto/llmserver.proto new file mode 100644 index 0000000..ba20806 --- /dev/null +++ b/pkg/grpc/proto/llmserver.proto @@ -0,0 +1,82 @@ +syntax = "proto3"; + +option go_package = "github.com/go-skynet/LocalAI/pkg/grpc/proto"; +option java_multiple_files = true; +option java_package = "io.skynet.localai.llmserver"; +option java_outer_classname = "LLMServer"; + +package llm; + +service LLM { + rpc Health(HealthMessage) returns (Reply) {} + rpc Predict(PredictOptions) returns (Reply) {} + rpc LoadModel(ModelOptions) returns (Result) {} + rpc PredictStream(PredictOptions) returns (stream Reply) {} +} + +message HealthMessage {} + +// The request message containing the user's name. +message PredictOptions { + string Prompt = 1; + int32 Seed = 2; + int32 Threads = 3; + int32 Tokens = 4; + int32 TopK = 5; + int32 Repeat = 6; + int32 Batch = 7; + int32 NKeep = 8; + float Temperature = 9; + float Penalty = 10; + bool F16KV = 11; + bool DebugMode = 12; + repeated string StopPrompts = 13; + bool IgnoreEOS = 14; + float TailFreeSamplingZ = 15; + float TypicalP = 16; + float FrequencyPenalty = 17; + float PresencePenalty = 18; + int32 Mirostat = 19; + float MirostatETA = 20; + float MirostatTAU = 21; + bool PenalizeNL = 22; + string LogitBias = 23; + string PathPromptCache = 24; + bool MLock = 25; + bool MMap = 26; + bool PromptCacheAll = 27; + bool PromptCacheRO = 28; + string Grammar = 29; + string MainGPU = 30; + string TensorSplit = 31; + float TopP = 32; + string PromptCachePath = 33; + bool Debug = 34; +} + +// The response message containing the result +message Reply { + string message = 1; +} + +message ModelOptions { + string Model = 1; + int32 ContextSize = 2; + int32 Seed = 3; + int32 NBatch = 4; + bool F16Memory = 5; + bool MLock = 6; + bool MMap = 7; + bool VocabOnly = 8; + bool LowVRAM = 9; + bool Embeddings = 10; + bool NUMA = 11; + int32 NGPULayers = 12; + string MainGPU = 13; + string TensorSplit = 14; +} + +message Result { + string message = 1; + bool success = 2; +} \ No newline at end of file diff --git a/pkg/grpc/proto/llmserver_grpc.pb.go b/pkg/grpc/proto/llmserver_grpc.pb.go new file mode 100644 index 0000000..6cfd981 --- /dev/null +++ b/pkg/grpc/proto/llmserver_grpc.pb.go @@ -0,0 +1,241 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.2.0 +// - protoc v3.15.8 +// source: pkg/grpc/proto/llmserver.proto + +package proto + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.32.0 or later. +const _ = grpc.SupportPackageIsVersion7 + +// LLMClient is the client API for LLM service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type LLMClient interface { + Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) + Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) + LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) + PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (LLM_PredictStreamClient, error) +} + +type lLMClient struct { + cc grpc.ClientConnInterface +} + +func NewLLMClient(cc grpc.ClientConnInterface) LLMClient { + return &lLMClient{cc} +} + +func (c *lLMClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) { + out := new(Reply) + err := c.cc.Invoke(ctx, "/llm.LLM/Health", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *lLMClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) { + out := new(Reply) + err := c.cc.Invoke(ctx, "/llm.LLM/Predict", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *lLMClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) { + out := new(Result) + err := c.cc.Invoke(ctx, "/llm.LLM/LoadModel", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *lLMClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (LLM_PredictStreamClient, error) { + stream, err := c.cc.NewStream(ctx, &LLM_ServiceDesc.Streams[0], "/llm.LLM/PredictStream", opts...) + if err != nil { + return nil, err + } + x := &lLMPredictStreamClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type LLM_PredictStreamClient interface { + Recv() (*Reply, error) + grpc.ClientStream +} + +type lLMPredictStreamClient struct { + grpc.ClientStream +} + +func (x *lLMPredictStreamClient) Recv() (*Reply, error) { + m := new(Reply) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +// LLMServer is the server API for LLM service. +// All implementations must embed UnimplementedLLMServer +// for forward compatibility +type LLMServer interface { + Health(context.Context, *HealthMessage) (*Reply, error) + Predict(context.Context, *PredictOptions) (*Reply, error) + LoadModel(context.Context, *ModelOptions) (*Result, error) + PredictStream(*PredictOptions, LLM_PredictStreamServer) error + mustEmbedUnimplementedLLMServer() +} + +// UnimplementedLLMServer must be embedded to have forward compatible implementations. +type UnimplementedLLMServer struct { +} + +func (UnimplementedLLMServer) Health(context.Context, *HealthMessage) (*Reply, error) { + return nil, status.Errorf(codes.Unimplemented, "method Health not implemented") +} +func (UnimplementedLLMServer) Predict(context.Context, *PredictOptions) (*Reply, error) { + return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented") +} +func (UnimplementedLLMServer) LoadModel(context.Context, *ModelOptions) (*Result, error) { + return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented") +} +func (UnimplementedLLMServer) PredictStream(*PredictOptions, LLM_PredictStreamServer) error { + return status.Errorf(codes.Unimplemented, "method PredictStream not implemented") +} +func (UnimplementedLLMServer) mustEmbedUnimplementedLLMServer() {} + +// UnsafeLLMServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to LLMServer will +// result in compilation errors. +type UnsafeLLMServer interface { + mustEmbedUnimplementedLLMServer() +} + +func RegisterLLMServer(s grpc.ServiceRegistrar, srv LLMServer) { + s.RegisterService(&LLM_ServiceDesc, srv) +} + +func _LLM_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(HealthMessage) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(LLMServer).Health(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/llm.LLM/Health", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(LLMServer).Health(ctx, req.(*HealthMessage)) + } + return interceptor(ctx, in, info, handler) +} + +func _LLM_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(PredictOptions) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(LLMServer).Predict(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/llm.LLM/Predict", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(LLMServer).Predict(ctx, req.(*PredictOptions)) + } + return interceptor(ctx, in, info, handler) +} + +func _LLM_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ModelOptions) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(LLMServer).LoadModel(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/llm.LLM/LoadModel", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(LLMServer).LoadModel(ctx, req.(*ModelOptions)) + } + return interceptor(ctx, in, info, handler) +} + +func _LLM_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(PredictOptions) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(LLMServer).PredictStream(m, &lLMPredictStreamServer{stream}) +} + +type LLM_PredictStreamServer interface { + Send(*Reply) error + grpc.ServerStream +} + +type lLMPredictStreamServer struct { + grpc.ServerStream +} + +func (x *lLMPredictStreamServer) Send(m *Reply) error { + return x.ServerStream.SendMsg(m) +} + +// LLM_ServiceDesc is the grpc.ServiceDesc for LLM service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var LLM_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "llm.LLM", + HandlerType: (*LLMServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "Health", + Handler: _LLM_Health_Handler, + }, + { + MethodName: "Predict", + Handler: _LLM_Predict_Handler, + }, + { + MethodName: "LoadModel", + Handler: _LLM_LoadModel_Handler, + }, + }, + Streams: []grpc.StreamDesc{ + { + StreamName: "PredictStream", + Handler: _LLM_PredictStream_Handler, + ServerStreams: true, + }, + }, + Metadata: "pkg/grpc/proto/llmserver.proto", +} diff --git a/pkg/grpc/server.go b/pkg/grpc/server.go new file mode 100644 index 0000000..d449593 --- /dev/null +++ b/pkg/grpc/server.go @@ -0,0 +1,76 @@ +package grpc + +import ( + "context" + "fmt" + "log" + "net" + + pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" + "google.golang.org/grpc" +) + +// A GRPC Server that allows to run LLM inference. +// It is used by the LLMServices to expose the LLM functionalities that are called by the client. +// The GRPC Service is general, trying to encompass all the possible LLM options models. +// It depends on the real implementer then what can be done or not. +// +// The server is implemented as a GRPC service, with the following methods: +// - Predict: to run the inference with options +// - PredictStream: to run the inference with options and stream the results + +// server is used to implement helloworld.GreeterServer. +type server struct { + pb.UnimplementedLLMServer + llm LLM +} + +func (s *server) Health(ctx context.Context, in *pb.HealthMessage) (*pb.Reply, error) { + return &pb.Reply{Message: "OK"}, nil +} + +func (s *server) LoadModel(ctx context.Context, in *pb.ModelOptions) (*pb.Result, error) { + err := s.llm.Load(in) + if err != nil { + return &pb.Result{Message: fmt.Sprintf("Error loading model: %s", err.Error()), Success: false}, err + } + return &pb.Result{Message: "Loading succeeded", Success: true}, nil +} + +func (s *server) Predict(ctx context.Context, in *pb.PredictOptions) (*pb.Reply, error) { + result, err := s.llm.Predict(in) + return &pb.Reply{Message: result}, err +} + +func (s *server) PredictStream(in *pb.PredictOptions, stream pb.LLM_PredictStreamServer) error { + + resultChan := make(chan string) + + done := make(chan bool) + go func() { + for result := range resultChan { + stream.Send(&pb.Reply{Message: result}) + } + done <- true + }() + + s.llm.PredictStream(in, resultChan) + <-done + + return nil +} + +func StartServer(address string, model LLM) error { + lis, err := net.Listen("tcp", address) + if err != nil { + return err + } + s := grpc.NewServer() + pb.RegisterLLMServer(s, &server{llm: model}) + log.Printf("gRPC Server listening at %v", lis.Addr()) + if err := s.Serve(lis); err != nil { + return err + } + + return nil +} diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 3849f85..5dba7ce 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -1,12 +1,16 @@ package model import ( + "context" "fmt" + "os" "path/filepath" "strings" + "time" rwkv "github.com/donomii/go-rwkv.cpp" whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" + grpc "github.com/go-skynet/LocalAI/pkg/grpc" "github.com/go-skynet/LocalAI/pkg/langchain" "github.com/go-skynet/LocalAI/pkg/stablediffusion" "github.com/go-skynet/LocalAI/pkg/tts" @@ -15,8 +19,12 @@ import ( transformers "github.com/go-skynet/go-ggml-transformers.cpp" llama "github.com/go-skynet/go-llama.cpp" "github.com/hashicorp/go-multierror" + "github.com/hpcloud/tail" gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang" + "github.com/phayes/freeport" "github.com/rs/zerolog/log" + + process "github.com/mudler/go-processmanager" ) const tokenizerSuffix = ".tokenizer.json" @@ -42,22 +50,24 @@ const ( StableDiffusionBackend = "stablediffusion" PiperBackend = "piper" LCHuggingFaceBackend = "langchain-huggingface" + //GGLLMFalconBackend = "falcon" ) var autoLoadBackends []string = []string{ LlamaBackend, Gpt4All, RwkvBackend, - GPTNeoXBackend, + //GGLLMFalconBackend, WhisperBackend, BertEmbeddingsBackend, + GPTNeoXBackend, GPTJBackend, Gpt2Backend, DollyBackend, - FalconBackend, MPTBackend, ReplitBackend, StarcoderBackend, + FalconBackend, BloomzBackend, } @@ -73,6 +83,12 @@ var dolly = func(modelFile string) (interface{}, error) { return transformers.NewDolly(modelFile) } +// func ggllmFalcon(opts ...ggllm.ModelOption) func(string) (interface{}, error) { +// return func(s string) (interface{}, error) { +// return ggllm.New(s, opts...) +// } +// } + var gptNeoX = func(modelFile string) (interface{}, error) { return transformers.NewGPTNeoX(modelFile) } @@ -143,55 +159,157 @@ func rwkvLM(tokenFile string, threads uint32) func(string) (interface{}, error) } } -func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32, assetDir string) (model interface{}, err error) { - log.Debug().Msgf("Loading model %s from %s", backendString, modelFile) - switch strings.ToLower(backendString) { +// starts the grpcModelProcess for the backend, and returns a grpc client +// It also loads the model +func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (interface{}, error) { + return func(s string) (interface{}, error) { + log.Debug().Msgf("Loading GRPC Model", backend, *o) + + grpcProcess := filepath.Join(o.assetDir, "backend-assets", "grpc", backend) + + // Make sure the process is executable + if err := os.Chmod(grpcProcess, 0755); err != nil { + return nil, err + } + + log.Debug().Msgf("Loading GRPC Process", grpcProcess) + port, err := freeport.GetFreePort() + if err != nil { + return nil, err + } + + serverAddress := fmt.Sprintf("localhost:%d", port) + + log.Debug().Msgf("GRPC Service for '%s' (%s) will be running at: '%s'", backend, o.modelFile, serverAddress) + + grpcControlProcess := process.New( + process.WithTemporaryStateDir(), + process.WithName(grpcProcess), + process.WithArgs("--addr", serverAddress)) + + ml.grpcProcesses[o.modelFile] = grpcControlProcess + + if err := grpcControlProcess.Run(); err != nil { + return nil, err + } + + go func() { + t, err := tail.TailFile(grpcControlProcess.StderrPath(), tail.Config{Follow: true}) + if err != nil { + log.Debug().Msgf("Could not tail stderr") + } + for line := range t.Lines { + log.Debug().Msgf("GRPC(%s): stderr %s", strings.Join([]string{backend, o.modelFile, serverAddress}, "-"), line.Text) + } + }() + go func() { + t, err := tail.TailFile(grpcControlProcess.StdoutPath(), tail.Config{Follow: true}) + if err != nil { + log.Debug().Msgf("Could not tail stdout") + } + for line := range t.Lines { + log.Debug().Msgf("GRPC(%s): stderr %s", strings.Join([]string{backend, o.modelFile, serverAddress}, "-"), line.Text) + } + }() + + log.Debug().Msgf("GRPC Service Started") + + client := grpc.NewClient(serverAddress) + + // Wait for the service to start up + ready := false + for i := 0; i < 10; i++ { + if client.HealthCheck(context.Background()) { + log.Debug().Msgf("GRPC Service Ready") + ready = true + break + } + time.Sleep(1 * time.Second) + } + + if !ready { + log.Debug().Msgf("GRPC Service NOT ready") + log.Debug().Msgf("Alive: ", grpcControlProcess.IsAlive()) + log.Debug().Msgf(fmt.Sprintf("GRPC Service Exitcode:")) + + log.Debug().Msgf(grpcControlProcess.ExitCode()) + + return nil, fmt.Errorf("grpc service not ready") + } + + options := *o.gRPCOptions + options.Model = s + + log.Debug().Msgf("GRPC: Loading model with options: %+v", options) + + res, err := client.LoadModel(context.TODO(), &options) + if err != nil { + return nil, err + } + if !res.Success { + return nil, fmt.Errorf("could not load model: %s", res.Message) + } + + return client, nil + } +} + +func (ml *ModelLoader) BackendLoader(opts ...Option) (model interface{}, err error) { + + //backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32, assetDir string) (model interface{}, err error) { + + o := NewOptions(opts...) + + log.Debug().Msgf("Loading model %s from %s", o.backendString, o.modelFile) + switch strings.ToLower(o.backendString) { case LlamaBackend: - return ml.LoadModel(modelFile, llamaLM(llamaOpts...)) + return ml.LoadModel(o.modelFile, llamaLM(o.llamaOpts...)) case BloomzBackend: - return ml.LoadModel(modelFile, bloomzLM) + return ml.LoadModel(o.modelFile, bloomzLM) case GPTJBackend: - return ml.LoadModel(modelFile, gptJ) + return ml.LoadModel(o.modelFile, gptJ) case DollyBackend: - return ml.LoadModel(modelFile, dolly) + return ml.LoadModel(o.modelFile, dolly) case MPTBackend: - return ml.LoadModel(modelFile, mpt) + return ml.LoadModel(o.modelFile, mpt) case Gpt2Backend: - return ml.LoadModel(modelFile, transformersLM) + return ml.LoadModel(o.modelFile, transformersLM) case FalconBackend: - return ml.LoadModel(modelFile, falcon) + return ml.LoadModel(o.modelFile, ml.grpcModel(FalconBackend, o)) case GPTNeoXBackend: - return ml.LoadModel(modelFile, gptNeoX) + return ml.LoadModel(o.modelFile, gptNeoX) case ReplitBackend: - return ml.LoadModel(modelFile, replit) + return ml.LoadModel(o.modelFile, replit) case StableDiffusionBackend: - return ml.LoadModel(modelFile, stableDiffusion) + return ml.LoadModel(o.modelFile, stableDiffusion) case PiperBackend: - return ml.LoadModel(modelFile, piperTTS(filepath.Join(assetDir, "backend-assets", "espeak-ng-data"))) + return ml.LoadModel(o.modelFile, piperTTS(filepath.Join(o.assetDir, "backend-assets", "espeak-ng-data"))) case StarcoderBackend: - return ml.LoadModel(modelFile, starCoder) + return ml.LoadModel(o.modelFile, starCoder) case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All: - return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetLibrarySearchPath(filepath.Join(assetDir, "backend-assets", "gpt4all")))) + return ml.LoadModel(o.modelFile, gpt4allLM(gpt4all.SetThreads(int(o.threads)), gpt4all.SetLibrarySearchPath(filepath.Join(o.assetDir, "backend-assets", "gpt4all")))) case BertEmbeddingsBackend: - return ml.LoadModel(modelFile, bertEmbeddings) + return ml.LoadModel(o.modelFile, bertEmbeddings) case RwkvBackend: - return ml.LoadModel(modelFile, rwkvLM(filepath.Join(ml.ModelPath, modelFile+tokenizerSuffix), threads)) + return ml.LoadModel(o.modelFile, rwkvLM(filepath.Join(ml.ModelPath, o.modelFile+tokenizerSuffix), o.threads)) case WhisperBackend: - return ml.LoadModel(modelFile, whisperModel) + return ml.LoadModel(o.modelFile, whisperModel) case LCHuggingFaceBackend: - return ml.LoadModel(modelFile, lcHuggingFace) + return ml.LoadModel(o.modelFile, lcHuggingFace) default: - return nil, fmt.Errorf("backend unsupported: %s", backendString) + return nil, fmt.Errorf("backend unsupported: %s", o.backendString) } } -func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32, assetDir string) (interface{}, error) { - log.Debug().Msgf("Loading model '%s' greedly", modelFile) +func (ml *ModelLoader) GreedyLoader(opts ...Option) (interface{}, error) { + o := NewOptions(opts...) + + log.Debug().Msgf("Loading model '%s' greedly", o.modelFile) ml.mu.Lock() - m, exists := ml.models[modelFile] + m, exists := ml.models[o.modelFile] if exists { - log.Debug().Msgf("Model '%s' already loaded", modelFile) + log.Debug().Msgf("Model '%s' already loaded", o.modelFile) ml.mu.Unlock() return m, nil } @@ -203,7 +321,15 @@ func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOpt continue } log.Debug().Msgf("[%s] Attempting to load", b) - model, modelerr := ml.BackendLoader(b, modelFile, llamaOpts, threads, assetDir) + + model, modelerr := ml.BackendLoader( + WithBackendString(b), + WithModelFile(o.modelFile), + WithLlamaOpts(o.llamaOpts...), + WithLoadGRPCOpts(o.gRPCOptions), + WithThreads(o.threads), + WithAssetDir(o.assetDir), + ) if modelerr == nil && model != nil { log.Debug().Msgf("[%s] Loads OK", b) return model, nil diff --git a/pkg/model/loader.go b/pkg/model/loader.go index ddc7b6e..35f3cef 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -10,6 +10,7 @@ import ( "sync" "text/template" + process "github.com/mudler/go-processmanager" "github.com/rs/zerolog/log" ) @@ -18,6 +19,7 @@ type ModelLoader struct { mu sync.Mutex // TODO: this needs generics models map[string]interface{} + grpcProcesses map[string]*process.Process promptsTemplates map[string]*template.Template } @@ -26,6 +28,7 @@ func NewModelLoader(modelPath string) *ModelLoader { ModelPath: modelPath, models: make(map[string]interface{}), promptsTemplates: make(map[string]*template.Template), + grpcProcesses: make(map[string]*process.Process), } } diff --git a/pkg/model/options.go b/pkg/model/options.go new file mode 100644 index 0000000..3716330 --- /dev/null +++ b/pkg/model/options.go @@ -0,0 +1,62 @@ +package model + +import ( + pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" + llama "github.com/go-skynet/go-llama.cpp" +) + +type Options struct { + backendString string + modelFile string + llamaOpts []llama.ModelOption + threads uint32 + assetDir string + + gRPCOptions *pb.ModelOptions +} + +type Option func(*Options) + +func WithBackendString(backend string) Option { + return func(o *Options) { + o.backendString = backend + } +} + +func WithModelFile(modelFile string) Option { + return func(o *Options) { + o.modelFile = modelFile + } +} + +func WithLoadGRPCOpts(opts *pb.ModelOptions) Option { + return func(o *Options) { + o.gRPCOptions = opts + } +} + +func WithLlamaOpts(opts ...llama.ModelOption) Option { + return func(o *Options) { + o.llamaOpts = append(o.llamaOpts, opts...) + } +} + +func WithThreads(threads uint32) Option { + return func(o *Options) { + o.threads = threads + } +} + +func WithAssetDir(assetDir string) Option { + return func(o *Options) { + o.assetDir = assetDir + } +} + +func NewOptions(opts ...Option) *Options { + o := &Options{} + for _, opt := range opts { + opt(o) + } + return o +}