Signed-off-by: Ettore Di Giacinto <mudler@localai.io>renovate/github.com-imdario-mergo-1.x
parent
a84dee1be1
commit
b816009db0
@ -0,0 +1,25 @@ |
||||
package main |
||||
|
||||
// GRPC Falcon server
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import ( |
||||
"flag" |
||||
|
||||
falcon "github.com/go-skynet/LocalAI/pkg/grpc/llm/falcon" |
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc" |
||||
) |
||||
|
||||
var ( |
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to") |
||||
) |
||||
|
||||
func main() { |
||||
flag.Parse() |
||||
|
||||
if err := grpc.StartServer(*addr, &falcon.LLM{}); err != nil { |
||||
panic(err) |
||||
} |
||||
} |
@ -0,0 +1,98 @@ |
||||
package grpc |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
"io" |
||||
"time" |
||||
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" |
||||
"google.golang.org/grpc" |
||||
"google.golang.org/grpc/credentials/insecure" |
||||
) |
||||
|
||||
type Client struct { |
||||
address string |
||||
} |
||||
|
||||
func NewClient(address string) *Client { |
||||
return &Client{ |
||||
address: address, |
||||
} |
||||
} |
||||
|
||||
func (c *Client) HealthCheck(ctx context.Context) bool { |
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials())) |
||||
if err != nil { |
||||
fmt.Println(err) |
||||
return false |
||||
} |
||||
defer conn.Close() |
||||
client := pb.NewLLMClient(conn) |
||||
|
||||
// The healthcheck call shouldn't take long time
|
||||
ctx, cancel := context.WithTimeout(ctx, 10*time.Second) |
||||
defer cancel() |
||||
|
||||
res, err := client.Health(ctx, &pb.HealthMessage{}) |
||||
if err != nil { |
||||
fmt.Println(err) |
||||
|
||||
return false |
||||
} |
||||
|
||||
if res.Message == "OK" { |
||||
return true |
||||
} |
||||
return false |
||||
} |
||||
|
||||
func (c *Client) Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error) { |
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials())) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
defer conn.Close() |
||||
client := pb.NewLLMClient(conn) |
||||
|
||||
return client.Predict(ctx, in, opts...) |
||||
} |
||||
|
||||
func (c *Client) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error) { |
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials())) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
defer conn.Close() |
||||
client := pb.NewLLMClient(conn) |
||||
return client.LoadModel(ctx, in, opts...) |
||||
} |
||||
|
||||
func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s string), opts ...grpc.CallOption) error { |
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials())) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
defer conn.Close() |
||||
client := pb.NewLLMClient(conn) |
||||
|
||||
stream, err := client.PredictStream(ctx, in, opts...) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
for { |
||||
feature, err := stream.Recv() |
||||
if err == io.EOF { |
||||
break |
||||
} |
||||
if err != nil { |
||||
fmt.Println("Error", err) |
||||
|
||||
return err |
||||
} |
||||
f(feature.GetMessage()) |
||||
} |
||||
|
||||
return nil |
||||
} |
@ -0,0 +1,11 @@ |
||||
package grpc |
||||
|
||||
import ( |
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" |
||||
) |
||||
|
||||
type LLM interface { |
||||
Predict(*pb.PredictOptions) (string, error) |
||||
PredictStream(*pb.PredictOptions, chan string) |
||||
Load(*pb.ModelOptions) error |
||||
} |
@ -0,0 +1,136 @@ |
||||
package falcon |
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import ( |
||||
"fmt" |
||||
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" |
||||
|
||||
ggllm "github.com/mudler/go-ggllm.cpp" |
||||
) |
||||
|
||||
type LLM struct { |
||||
falcon *ggllm.Falcon |
||||
} |
||||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error { |
||||
ggllmOpts := []ggllm.ModelOption{} |
||||
if opts.ContextSize != 0 { |
||||
ggllmOpts = append(ggllmOpts, ggllm.SetContext(int(opts.ContextSize))) |
||||
} |
||||
// F16 doesn't seem to produce good output at all!
|
||||
//if c.F16 {
|
||||
// llamaOpts = append(llamaOpts, llama.EnableF16Memory)
|
||||
//}
|
||||
|
||||
if opts.NGPULayers != 0 { |
||||
ggllmOpts = append(ggllmOpts, ggllm.SetGPULayers(int(opts.NGPULayers))) |
||||
} |
||||
|
||||
ggllmOpts = append(ggllmOpts, ggllm.SetMMap(opts.MMap)) |
||||
ggllmOpts = append(ggllmOpts, ggllm.SetMainGPU(opts.MainGPU)) |
||||
ggllmOpts = append(ggllmOpts, ggllm.SetTensorSplit(opts.TensorSplit)) |
||||
if opts.NBatch != 0 { |
||||
ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(int(opts.NBatch))) |
||||
} else { |
||||
ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(512)) |
||||
} |
||||
|
||||
model, err := ggllm.New(opts.Model, ggllmOpts...) |
||||
llm.falcon = model |
||||
return err |
||||
} |
||||
|
||||
func buildPredictOptions(opts *pb.PredictOptions) []ggllm.PredictOption { |
||||
predictOptions := []ggllm.PredictOption{ |
||||
ggllm.SetTemperature(float64(opts.Temperature)), |
||||
ggllm.SetTopP(float64(opts.TopP)), |
||||
ggllm.SetTopK(int(opts.TopK)), |
||||
ggllm.SetTokens(int(opts.Tokens)), |
||||
ggllm.SetThreads(int(opts.Threads)), |
||||
} |
||||
|
||||
if opts.PromptCacheAll { |
||||
predictOptions = append(predictOptions, ggllm.EnablePromptCacheAll) |
||||
} |
||||
|
||||
if opts.PromptCacheRO { |
||||
predictOptions = append(predictOptions, ggllm.EnablePromptCacheRO) |
||||
} |
||||
|
||||
// Expected absolute path
|
||||
if opts.PromptCachePath != "" { |
||||
predictOptions = append(predictOptions, ggllm.SetPathPromptCache(opts.PromptCachePath)) |
||||
} |
||||
|
||||
if opts.Mirostat != 0 { |
||||
predictOptions = append(predictOptions, ggllm.SetMirostat(int(opts.Mirostat))) |
||||
} |
||||
|
||||
if opts.MirostatETA != 0 { |
||||
predictOptions = append(predictOptions, ggllm.SetMirostatETA(float64(opts.MirostatETA))) |
||||
} |
||||
|
||||
if opts.MirostatTAU != 0 { |
||||
predictOptions = append(predictOptions, ggllm.SetMirostatTAU(float64(opts.MirostatTAU))) |
||||
} |
||||
|
||||
if opts.Debug { |
||||
predictOptions = append(predictOptions, ggllm.Debug) |
||||
} |
||||
|
||||
predictOptions = append(predictOptions, ggllm.SetStopWords(opts.StopPrompts...)) |
||||
|
||||
if opts.PresencePenalty != 0 { |
||||
predictOptions = append(predictOptions, ggllm.SetPenalty(float64(opts.PresencePenalty))) |
||||
} |
||||
|
||||
if opts.NKeep != 0 { |
||||
predictOptions = append(predictOptions, ggllm.SetNKeep(int(opts.NKeep))) |
||||
} |
||||
|
||||
if opts.Batch != 0 { |
||||
predictOptions = append(predictOptions, ggllm.SetBatch(int(opts.Batch))) |
||||
} |
||||
|
||||
if opts.IgnoreEOS { |
||||
predictOptions = append(predictOptions, ggllm.IgnoreEOS) |
||||
} |
||||
|
||||
if opts.Seed != 0 { |
||||
predictOptions = append(predictOptions, ggllm.SetSeed(int(opts.Seed))) |
||||
} |
||||
|
||||
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
|
||||
|
||||
predictOptions = append(predictOptions, ggllm.SetFrequencyPenalty(float64(opts.FrequencyPenalty))) |
||||
predictOptions = append(predictOptions, ggllm.SetMlock(opts.MLock)) |
||||
predictOptions = append(predictOptions, ggllm.SetMemoryMap(opts.MMap)) |
||||
predictOptions = append(predictOptions, ggllm.SetPredictionMainGPU(opts.MainGPU)) |
||||
predictOptions = append(predictOptions, ggllm.SetPredictionTensorSplit(opts.TensorSplit)) |
||||
predictOptions = append(predictOptions, ggllm.SetTailFreeSamplingZ(float64(opts.TailFreeSamplingZ))) |
||||
predictOptions = append(predictOptions, ggllm.SetTypicalP(float64(opts.TypicalP))) |
||||
return predictOptions |
||||
} |
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) { |
||||
return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...) |
||||
} |
||||
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) { |
||||
predictOptions := buildPredictOptions(opts) |
||||
|
||||
predictOptions = append(predictOptions, ggllm.SetTokenCallback(func(token string) bool { |
||||
results <- token |
||||
return true |
||||
})) |
||||
|
||||
go func() { |
||||
_, err := llm.falcon.Predict(opts.Prompt, predictOptions...) |
||||
if err != nil { |
||||
fmt.Println("err: ", err) |
||||
} |
||||
close(results) |
||||
}() |
||||
} |
@ -0,0 +1,870 @@ |
||||
// Code generated by protoc-gen-go. DO NOT EDIT.
|
||||
// versions:
|
||||
// protoc-gen-go v1.26.0
|
||||
// protoc v3.15.8
|
||||
// source: pkg/grpc/proto/llmserver.proto
|
||||
|
||||
package proto |
||||
|
||||
import ( |
||||
protoreflect "google.golang.org/protobuf/reflect/protoreflect" |
||||
protoimpl "google.golang.org/protobuf/runtime/protoimpl" |
||||
reflect "reflect" |
||||
sync "sync" |
||||
) |
||||
|
||||
const ( |
||||
// Verify that this generated code is sufficiently up-to-date.
|
||||
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) |
||||
// Verify that runtime/protoimpl is sufficiently up-to-date.
|
||||
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) |
||||
) |
||||
|
||||
type HealthMessage struct { |
||||
state protoimpl.MessageState |
||||
sizeCache protoimpl.SizeCache |
||||
unknownFields protoimpl.UnknownFields |
||||
} |
||||
|
||||
func (x *HealthMessage) Reset() { |
||||
*x = HealthMessage{} |
||||
if protoimpl.UnsafeEnabled { |
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[0] |
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
||||
ms.StoreMessageInfo(mi) |
||||
} |
||||
} |
||||
|
||||
func (x *HealthMessage) String() string { |
||||
return protoimpl.X.MessageStringOf(x) |
||||
} |
||||
|
||||
func (*HealthMessage) ProtoMessage() {} |
||||
|
||||
func (x *HealthMessage) ProtoReflect() protoreflect.Message { |
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[0] |
||||
if protoimpl.UnsafeEnabled && x != nil { |
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
||||
if ms.LoadMessageInfo() == nil { |
||||
ms.StoreMessageInfo(mi) |
||||
} |
||||
return ms |
||||
} |
||||
return mi.MessageOf(x) |
||||
} |
||||
|
||||
// Deprecated: Use HealthMessage.ProtoReflect.Descriptor instead.
|
||||
func (*HealthMessage) Descriptor() ([]byte, []int) { |
||||
return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{0} |
||||
} |
||||
|
||||
// The request message containing the user's name.
|
||||
type PredictOptions struct { |
||||
state protoimpl.MessageState |
||||
sizeCache protoimpl.SizeCache |
||||
unknownFields protoimpl.UnknownFields |
||||
|
||||
Prompt string `protobuf:"bytes,1,opt,name=Prompt,proto3" json:"Prompt,omitempty"` |
||||
Seed int32 `protobuf:"varint,2,opt,name=Seed,proto3" json:"Seed,omitempty"` |
||||
Threads int32 `protobuf:"varint,3,opt,name=Threads,proto3" json:"Threads,omitempty"` |
||||
Tokens int32 `protobuf:"varint,4,opt,name=Tokens,proto3" json:"Tokens,omitempty"` |
||||
TopK int32 `protobuf:"varint,5,opt,name=TopK,proto3" json:"TopK,omitempty"` |
||||
Repeat int32 `protobuf:"varint,6,opt,name=Repeat,proto3" json:"Repeat,omitempty"` |
||||
Batch int32 `protobuf:"varint,7,opt,name=Batch,proto3" json:"Batch,omitempty"` |
||||
NKeep int32 `protobuf:"varint,8,opt,name=NKeep,proto3" json:"NKeep,omitempty"` |
||||
Temperature float32 `protobuf:"fixed32,9,opt,name=Temperature,proto3" json:"Temperature,omitempty"` |
||||
Penalty float32 `protobuf:"fixed32,10,opt,name=Penalty,proto3" json:"Penalty,omitempty"` |
||||
F16KV bool `protobuf:"varint,11,opt,name=F16KV,proto3" json:"F16KV,omitempty"` |
||||
DebugMode bool `protobuf:"varint,12,opt,name=DebugMode,proto3" json:"DebugMode,omitempty"` |
||||
StopPrompts []string `protobuf:"bytes,13,rep,name=StopPrompts,proto3" json:"StopPrompts,omitempty"` |
||||
IgnoreEOS bool `protobuf:"varint,14,opt,name=IgnoreEOS,proto3" json:"IgnoreEOS,omitempty"` |
||||
TailFreeSamplingZ float32 `protobuf:"fixed32,15,opt,name=TailFreeSamplingZ,proto3" json:"TailFreeSamplingZ,omitempty"` |
||||
TypicalP float32 `protobuf:"fixed32,16,opt,name=TypicalP,proto3" json:"TypicalP,omitempty"` |
||||
FrequencyPenalty float32 `protobuf:"fixed32,17,opt,name=FrequencyPenalty,proto3" json:"FrequencyPenalty,omitempty"` |
||||
PresencePenalty float32 `protobuf:"fixed32,18,opt,name=PresencePenalty,proto3" json:"PresencePenalty,omitempty"` |
||||
Mirostat int32 `protobuf:"varint,19,opt,name=Mirostat,proto3" json:"Mirostat,omitempty"` |
||||
MirostatETA float32 `protobuf:"fixed32,20,opt,name=MirostatETA,proto3" json:"MirostatETA,omitempty"` |
||||
MirostatTAU float32 `protobuf:"fixed32,21,opt,name=MirostatTAU,proto3" json:"MirostatTAU,omitempty"` |
||||
PenalizeNL bool `protobuf:"varint,22,opt,name=PenalizeNL,proto3" json:"PenalizeNL,omitempty"` |
||||
LogitBias string `protobuf:"bytes,23,opt,name=LogitBias,proto3" json:"LogitBias,omitempty"` |
||||
PathPromptCache string `protobuf:"bytes,24,opt,name=PathPromptCache,proto3" json:"PathPromptCache,omitempty"` |
||||
MLock bool `protobuf:"varint,25,opt,name=MLock,proto3" json:"MLock,omitempty"` |
||||
MMap bool `protobuf:"varint,26,opt,name=MMap,proto3" json:"MMap,omitempty"` |
||||
PromptCacheAll bool `protobuf:"varint,27,opt,name=PromptCacheAll,proto3" json:"PromptCacheAll,omitempty"` |
||||
PromptCacheRO bool `protobuf:"varint,28,opt,name=PromptCacheRO,proto3" json:"PromptCacheRO,omitempty"` |
||||
Grammar string `protobuf:"bytes,29,opt,name=Grammar,proto3" json:"Grammar,omitempty"` |
||||
MainGPU string `protobuf:"bytes,30,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"` |
||||
TensorSplit string `protobuf:"bytes,31,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"` |
||||
TopP float32 `protobuf:"fixed32,32,opt,name=TopP,proto3" json:"TopP,omitempty"` |
||||
PromptCachePath string `protobuf:"bytes,33,opt,name=PromptCachePath,proto3" json:"PromptCachePath,omitempty"` |
||||
Debug bool `protobuf:"varint,34,opt,name=Debug,proto3" json:"Debug,omitempty"` |
||||
} |
||||
|
||||
func (x *PredictOptions) Reset() { |
||||
*x = PredictOptions{} |
||||
if protoimpl.UnsafeEnabled { |
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[1] |
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
||||
ms.StoreMessageInfo(mi) |
||||
} |
||||
} |
||||
|
||||
func (x *PredictOptions) String() string { |
||||
return protoimpl.X.MessageStringOf(x) |
||||
} |
||||
|
||||
func (*PredictOptions) ProtoMessage() {} |
||||
|
||||
func (x *PredictOptions) ProtoReflect() protoreflect.Message { |
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[1] |
||||
if protoimpl.UnsafeEnabled && x != nil { |
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
||||
if ms.LoadMessageInfo() == nil { |
||||
ms.StoreMessageInfo(mi) |
||||
} |
||||
return ms |
||||
} |
||||
return mi.MessageOf(x) |
||||
} |
||||
|
||||
// Deprecated: Use PredictOptions.ProtoReflect.Descriptor instead.
|
||||
func (*PredictOptions) Descriptor() ([]byte, []int) { |
||||
return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{1} |
||||
} |
||||
|
||||
func (x *PredictOptions) GetPrompt() string { |
||||
if x != nil { |
||||
return x.Prompt |
||||
} |
||||
return "" |
||||
} |
||||
|
||||
func (x *PredictOptions) GetSeed() int32 { |
||||
if x != nil { |
||||
return x.Seed |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *PredictOptions) GetThreads() int32 { |
||||
if x != nil { |
||||
return x.Threads |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *PredictOptions) GetTokens() int32 { |
||||
if x != nil { |
||||
return x.Tokens |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *PredictOptions) GetTopK() int32 { |
||||
if x != nil { |
||||
return x.TopK |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *PredictOptions) GetRepeat() int32 { |
||||
if x != nil { |
||||
return x.Repeat |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *PredictOptions) GetBatch() int32 { |
||||
if x != nil { |
||||
return x.Batch |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *PredictOptions) GetNKeep() int32 { |
||||
if x != nil { |
||||
return x.NKeep |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *PredictOptions) GetTemperature() float32 { |
||||
if x != nil { |
||||
return x.Temperature |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *PredictOptions) GetPenalty() float32 { |
||||
if x != nil { |
||||
return x.Penalty |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *PredictOptions) GetF16KV() bool { |
||||
if x != nil { |
||||
return x.F16KV |
||||
} |
||||
return false |
||||
} |
||||
|
||||
func (x *PredictOptions) GetDebugMode() bool { |
||||
if x != nil { |
||||
return x.DebugMode |
||||
} |
||||
return false |
||||
} |
||||
|
||||
func (x *PredictOptions) GetStopPrompts() []string { |
||||
if x != nil { |
||||
return x.StopPrompts |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func (x *PredictOptions) GetIgnoreEOS() bool { |
||||
if x != nil { |
||||
return x.IgnoreEOS |
||||
} |
||||
return false |
||||
} |
||||
|
||||
func (x *PredictOptions) GetTailFreeSamplingZ() float32 { |
||||
if x != nil { |
||||
return x.TailFreeSamplingZ |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *PredictOptions) GetTypicalP() float32 { |
||||
if x != nil { |
||||
return x.TypicalP |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *PredictOptions) GetFrequencyPenalty() float32 { |
||||
if x != nil { |
||||
return x.FrequencyPenalty |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *PredictOptions) GetPresencePenalty() float32 { |
||||
if x != nil { |
||||
return x.PresencePenalty |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *PredictOptions) GetMirostat() int32 { |
||||
if x != nil { |
||||
return x.Mirostat |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *PredictOptions) GetMirostatETA() float32 { |
||||
if x != nil { |
||||
return x.MirostatETA |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *PredictOptions) GetMirostatTAU() float32 { |
||||
if x != nil { |
||||
return x.MirostatTAU |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *PredictOptions) GetPenalizeNL() bool { |
||||
if x != nil { |
||||
return x.PenalizeNL |
||||
} |
||||
return false |
||||
} |
||||
|
||||
func (x *PredictOptions) GetLogitBias() string { |
||||
if x != nil { |
||||
return x.LogitBias |
||||
} |
||||
return "" |
||||
} |
||||
|
||||
func (x *PredictOptions) GetPathPromptCache() string { |
||||
if x != nil { |
||||
return x.PathPromptCache |
||||
} |
||||
return "" |
||||
} |
||||
|
||||
func (x *PredictOptions) GetMLock() bool { |
||||
if x != nil { |
||||
return x.MLock |
||||
} |
||||
return false |
||||
} |
||||
|
||||
func (x *PredictOptions) GetMMap() bool { |
||||
if x != nil { |
||||
return x.MMap |
||||
} |
||||
return false |
||||
} |
||||
|
||||
func (x *PredictOptions) GetPromptCacheAll() bool { |
||||
if x != nil { |
||||
return x.PromptCacheAll |
||||
} |
||||
return false |
||||
} |
||||
|
||||
func (x *PredictOptions) GetPromptCacheRO() bool { |
||||
if x != nil { |
||||
return x.PromptCacheRO |
||||
} |
||||
return false |
||||
} |
||||
|
||||
func (x *PredictOptions) GetGrammar() string { |
||||
if x != nil { |
||||
return x.Grammar |
||||
} |
||||
return "" |
||||
} |
||||
|
||||
func (x *PredictOptions) GetMainGPU() string { |
||||
if x != nil { |
||||
return x.MainGPU |
||||
} |
||||
return "" |
||||
} |
||||
|
||||
func (x *PredictOptions) GetTensorSplit() string { |
||||
if x != nil { |
||||
return x.TensorSplit |
||||
} |
||||
return "" |
||||
} |
||||
|
||||
func (x *PredictOptions) GetTopP() float32 { |
||||
if x != nil { |
||||
return x.TopP |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *PredictOptions) GetPromptCachePath() string { |
||||
if x != nil { |
||||
return x.PromptCachePath |
||||
} |
||||
return "" |
||||
} |
||||
|
||||
func (x *PredictOptions) GetDebug() bool { |
||||
if x != nil { |
||||
return x.Debug |
||||
} |
||||
return false |
||||
} |
||||
|
||||
// The response message containing the result
|
||||
type Reply struct { |
||||
state protoimpl.MessageState |
||||
sizeCache protoimpl.SizeCache |
||||
unknownFields protoimpl.UnknownFields |
||||
|
||||
Message string `protobuf:"bytes,1,opt,name=message,proto3" json:"message,omitempty"` |
||||
} |
||||
|
||||
func (x *Reply) Reset() { |
||||
*x = Reply{} |
||||
if protoimpl.UnsafeEnabled { |
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[2] |
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
||||
ms.StoreMessageInfo(mi) |
||||
} |
||||
} |
||||
|
||||
func (x *Reply) String() string { |
||||
return protoimpl.X.MessageStringOf(x) |
||||
} |
||||
|
||||
func (*Reply) ProtoMessage() {} |
||||
|
||||
func (x *Reply) ProtoReflect() protoreflect.Message { |
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[2] |
||||
if protoimpl.UnsafeEnabled && x != nil { |
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
||||
if ms.LoadMessageInfo() == nil { |
||||
ms.StoreMessageInfo(mi) |
||||
} |
||||
return ms |
||||
} |
||||
return mi.MessageOf(x) |
||||
} |
||||
|
||||
// Deprecated: Use Reply.ProtoReflect.Descriptor instead.
|
||||
func (*Reply) Descriptor() ([]byte, []int) { |
||||
return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{2} |
||||
} |
||||
|
||||
func (x *Reply) GetMessage() string { |
||||
if x != nil { |
||||
return x.Message |
||||
} |
||||
return "" |
||||
} |
||||
|
||||
type ModelOptions struct { |
||||
state protoimpl.MessageState |
||||
sizeCache protoimpl.SizeCache |
||||
unknownFields protoimpl.UnknownFields |
||||
|
||||
Model string `protobuf:"bytes,1,opt,name=Model,proto3" json:"Model,omitempty"` |
||||
ContextSize int32 `protobuf:"varint,2,opt,name=ContextSize,proto3" json:"ContextSize,omitempty"` |
||||
Seed int32 `protobuf:"varint,3,opt,name=Seed,proto3" json:"Seed,omitempty"` |
||||
NBatch int32 `protobuf:"varint,4,opt,name=NBatch,proto3" json:"NBatch,omitempty"` |
||||
F16Memory bool `protobuf:"varint,5,opt,name=F16Memory,proto3" json:"F16Memory,omitempty"` |
||||
MLock bool `protobuf:"varint,6,opt,name=MLock,proto3" json:"MLock,omitempty"` |
||||
MMap bool `protobuf:"varint,7,opt,name=MMap,proto3" json:"MMap,omitempty"` |
||||
VocabOnly bool `protobuf:"varint,8,opt,name=VocabOnly,proto3" json:"VocabOnly,omitempty"` |
||||
LowVRAM bool `protobuf:"varint,9,opt,name=LowVRAM,proto3" json:"LowVRAM,omitempty"` |
||||
Embeddings bool `protobuf:"varint,10,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"` |
||||
NUMA bool `protobuf:"varint,11,opt,name=NUMA,proto3" json:"NUMA,omitempty"` |
||||
NGPULayers int32 `protobuf:"varint,12,opt,name=NGPULayers,proto3" json:"NGPULayers,omitempty"` |
||||
MainGPU string `protobuf:"bytes,13,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"` |
||||
TensorSplit string `protobuf:"bytes,14,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"` |
||||
} |
||||
|
||||
func (x *ModelOptions) Reset() { |
||||
*x = ModelOptions{} |
||||
if protoimpl.UnsafeEnabled { |
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[3] |
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
||||
ms.StoreMessageInfo(mi) |
||||
} |
||||
} |
||||
|
||||
func (x *ModelOptions) String() string { |
||||
return protoimpl.X.MessageStringOf(x) |
||||
} |
||||
|
||||
func (*ModelOptions) ProtoMessage() {} |
||||
|
||||
func (x *ModelOptions) ProtoReflect() protoreflect.Message { |
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[3] |
||||
if protoimpl.UnsafeEnabled && x != nil { |
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
||||
if ms.LoadMessageInfo() == nil { |
||||
ms.StoreMessageInfo(mi) |
||||
} |
||||
return ms |
||||
} |
||||
return mi.MessageOf(x) |
||||
} |
||||
|
||||
// Deprecated: Use ModelOptions.ProtoReflect.Descriptor instead.
|
||||
func (*ModelOptions) Descriptor() ([]byte, []int) { |
||||
return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{3} |
||||
} |
||||
|
||||
func (x *ModelOptions) GetModel() string { |
||||
if x != nil { |
||||
return x.Model |
||||
} |
||||
return "" |
||||
} |
||||
|
||||
func (x *ModelOptions) GetContextSize() int32 { |
||||
if x != nil { |
||||
return x.ContextSize |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *ModelOptions) GetSeed() int32 { |
||||
if x != nil { |
||||
return x.Seed |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *ModelOptions) GetNBatch() int32 { |
||||
if x != nil { |
||||
return x.NBatch |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *ModelOptions) GetF16Memory() bool { |
||||
if x != nil { |
||||
return x.F16Memory |
||||
} |
||||
return false |
||||
} |
||||
|
||||
func (x *ModelOptions) GetMLock() bool { |
||||
if x != nil { |
||||
return x.MLock |
||||
} |
||||
return false |
||||
} |
||||
|
||||
func (x *ModelOptions) GetMMap() bool { |
||||
if x != nil { |
||||
return x.MMap |
||||
} |
||||
return false |
||||
} |
||||
|
||||
func (x *ModelOptions) GetVocabOnly() bool { |
||||
if x != nil { |
||||
return x.VocabOnly |
||||
} |
||||
return false |
||||
} |
||||
|
||||
func (x *ModelOptions) GetLowVRAM() bool { |
||||
if x != nil { |
||||
return x.LowVRAM |
||||
} |
||||
return false |
||||
} |
||||
|
||||
func (x *ModelOptions) GetEmbeddings() bool { |
||||
if x != nil { |
||||
return x.Embeddings |
||||
} |
||||
return false |
||||
} |
||||
|
||||
func (x *ModelOptions) GetNUMA() bool { |
||||
if x != nil { |
||||
return x.NUMA |
||||
} |
||||
return false |
||||
} |
||||
|
||||
func (x *ModelOptions) GetNGPULayers() int32 { |
||||
if x != nil { |
||||
return x.NGPULayers |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (x *ModelOptions) GetMainGPU() string { |
||||
if x != nil { |
||||
return x.MainGPU |
||||
} |
||||
return "" |
||||
} |
||||
|
||||
func (x *ModelOptions) GetTensorSplit() string { |
||||
if x != nil { |
||||
return x.TensorSplit |
||||
} |
||||
return "" |
||||
} |
||||
|
||||
type Result struct { |
||||
state protoimpl.MessageState |
||||
sizeCache protoimpl.SizeCache |
||||
unknownFields protoimpl.UnknownFields |
||||
|
||||
Message string `protobuf:"bytes,1,opt,name=message,proto3" json:"message,omitempty"` |
||||
Success bool `protobuf:"varint,2,opt,name=success,proto3" json:"success,omitempty"` |
||||
} |
||||
|
||||
func (x *Result) Reset() { |
||||
*x = Result{} |
||||
if protoimpl.UnsafeEnabled { |
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[4] |
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
||||
ms.StoreMessageInfo(mi) |
||||
} |
||||
} |
||||
|
||||
func (x *Result) String() string { |
||||
return protoimpl.X.MessageStringOf(x) |
||||
} |
||||
|
||||
func (*Result) ProtoMessage() {} |
||||
|
||||
func (x *Result) ProtoReflect() protoreflect.Message { |
||||
mi := &file_pkg_grpc_proto_llmserver_proto_msgTypes[4] |
||||
if protoimpl.UnsafeEnabled && x != nil { |
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) |
||||
if ms.LoadMessageInfo() == nil { |
||||
ms.StoreMessageInfo(mi) |
||||
} |
||||
return ms |
||||
} |
||||
return mi.MessageOf(x) |
||||
} |
||||
|
||||
// Deprecated: Use Result.ProtoReflect.Descriptor instead.
|
||||
func (*Result) Descriptor() ([]byte, []int) { |
||||
return file_pkg_grpc_proto_llmserver_proto_rawDescGZIP(), []int{4} |
||||
} |
||||
|
||||
func (x *Result) GetMessage() string { |
||||
if x != nil { |
||||
return x.Message |
||||
} |
||||
return "" |
||||
} |
||||
|
||||
func (x *Result) GetSuccess() bool { |
||||
if x != nil { |
||||
return x.Success |
||||
} |
||||
return false |
||||
} |
||||
|
||||
var File_pkg_grpc_proto_llmserver_proto protoreflect.FileDescriptor |
||||
|
||||
var file_pkg_grpc_proto_llmserver_proto_rawDesc = []byte{ |
||||
0x0a, 0x1e, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, |
||||
0x2f, 0x6c, 0x6c, 0x6d, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, |
||||
0x12, 0x03, 0x6c, 0x6c, 0x6d, 0x22, 0x0f, 0x0a, 0x0d, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, |
||||
0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x80, 0x08, 0x0a, 0x0e, 0x50, 0x72, 0x65, 0x64, 0x69, |
||||
0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x50, 0x72, 0x6f, |
||||
0x6d, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x50, 0x72, 0x6f, 0x6d, 0x70, |
||||
0x74, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, |
||||
0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, |
||||
0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, |
||||
0x16, 0x0a, 0x06, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, |
||||
0x06, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x18, |
||||
0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x4b, 0x12, 0x16, 0x0a, 0x06, 0x52, |
||||
0x65, 0x70, 0x65, 0x61, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x52, 0x65, 0x70, |
||||
0x65, 0x61, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x42, 0x61, 0x74, 0x63, 0x68, 0x18, 0x07, 0x20, 0x01, |
||||
0x28, 0x05, 0x52, 0x05, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x4e, 0x4b, 0x65, |
||||
0x65, 0x70, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x4e, 0x4b, 0x65, 0x65, 0x70, 0x12, |
||||
0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x18, 0x09, |
||||
0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x54, 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, |
||||
0x65, 0x12, 0x18, 0x0a, 0x07, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x0a, 0x20, 0x01, |
||||
0x28, 0x02, 0x52, 0x07, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x46, |
||||
0x31, 0x36, 0x4b, 0x56, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x46, 0x31, 0x36, 0x4b, |
||||
0x56, 0x12, 0x1c, 0x0a, 0x09, 0x44, 0x65, 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x18, 0x0c, |
||||
0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x44, 0x65, 0x62, 0x75, 0x67, 0x4d, 0x6f, 0x64, 0x65, 0x12, |
||||
0x20, 0x0a, 0x0b, 0x53, 0x74, 0x6f, 0x70, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x73, 0x18, 0x0d, |
||||
0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x53, 0x74, 0x6f, 0x70, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, |
||||
0x73, 0x12, 0x1c, 0x0a, 0x09, 0x49, 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x18, 0x0e, |
||||
0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x49, 0x67, 0x6e, 0x6f, 0x72, 0x65, 0x45, 0x4f, 0x53, 0x12, |
||||
0x2c, 0x0a, 0x11, 0x54, 0x61, 0x69, 0x6c, 0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, 0x70, 0x6c, |
||||
0x69, 0x6e, 0x67, 0x5a, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x02, 0x52, 0x11, 0x54, 0x61, 0x69, 0x6c, |
||||
0x46, 0x72, 0x65, 0x65, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x69, 0x6e, 0x67, 0x5a, 0x12, 0x1a, 0x0a, |
||||
0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, 0x6c, 0x50, 0x18, 0x10, 0x20, 0x01, 0x28, 0x02, 0x52, |
||||
0x08, 0x54, 0x79, 0x70, 0x69, 0x63, 0x61, 0x6c, 0x50, 0x12, 0x2a, 0x0a, 0x10, 0x46, 0x72, 0x65, |
||||
0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x11, 0x20, |
||||
0x01, 0x28, 0x02, 0x52, 0x10, 0x46, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x50, 0x65, |
||||
0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x28, 0x0a, 0x0f, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, |
||||
0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x12, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0f, |
||||
0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, |
||||
0x1a, 0x0a, 0x08, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x18, 0x13, 0x20, 0x01, 0x28, |
||||
0x05, 0x52, 0x08, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x12, 0x20, 0x0a, 0x0b, 0x4d, |
||||
0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x45, 0x54, 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x02, |
||||
0x52, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x45, 0x54, 0x41, 0x12, 0x20, 0x0a, |
||||
0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x54, 0x41, 0x55, 0x18, 0x15, 0x20, 0x01, |
||||
0x28, 0x02, 0x52, 0x0b, 0x4d, 0x69, 0x72, 0x6f, 0x73, 0x74, 0x61, 0x74, 0x54, 0x41, 0x55, 0x12, |
||||
0x1e, 0x0a, 0x0a, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x18, 0x16, 0x20, |
||||
0x01, 0x28, 0x08, 0x52, 0x0a, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x4e, 0x4c, 0x12, |
||||
0x1c, 0x0a, 0x09, 0x4c, 0x6f, 0x67, 0x69, 0x74, 0x42, 0x69, 0x61, 0x73, 0x18, 0x17, 0x20, 0x01, |
||||
0x28, 0x09, 0x52, 0x09, 0x4c, 0x6f, 0x67, 0x69, 0x74, 0x42, 0x69, 0x61, 0x73, 0x12, 0x28, 0x0a, |
||||
0x0f, 0x50, 0x61, 0x74, 0x68, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, |
||||
0x18, 0x18, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x50, 0x61, 0x74, 0x68, 0x50, 0x72, 0x6f, 0x6d, |
||||
0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, |
||||
0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, |
||||
0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, |
||||
0x70, 0x12, 0x26, 0x0a, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, |
||||
0x41, 0x6c, 0x6c, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x50, 0x72, 0x6f, 0x6d, 0x70, |
||||
0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x41, 0x6c, 0x6c, 0x12, 0x24, 0x0a, 0x0d, 0x50, 0x72, 0x6f, |
||||
0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x18, 0x1c, 0x20, 0x01, 0x28, 0x08, |
||||
0x52, 0x0d, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x52, 0x4f, 0x12, |
||||
0x18, 0x0a, 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x09, |
||||
0x52, 0x07, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, |
||||
0x6e, 0x47, 0x50, 0x55, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, |
||||
0x47, 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, |
||||
0x69, 0x74, 0x18, 0x1f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, |
||||
0x53, 0x70, 0x6c, 0x69, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x18, 0x20, 0x20, |
||||
0x01, 0x28, 0x02, 0x52, 0x04, 0x54, 0x6f, 0x70, 0x50, 0x12, 0x28, 0x0a, 0x0f, 0x50, 0x72, 0x6f, |
||||
0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, 0x61, 0x74, 0x68, 0x18, 0x21, 0x20, 0x01, |
||||
0x28, 0x09, 0x52, 0x0f, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x43, 0x61, 0x63, 0x68, 0x65, 0x50, |
||||
0x61, 0x74, 0x68, 0x12, 0x14, 0x0a, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x18, 0x22, 0x20, 0x01, |
||||
0x28, 0x08, 0x52, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, |
||||
0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, |
||||
0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x82, 0x03, 0x0a, |
||||
0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, |
||||
0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, |
||||
0x64, 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, |
||||
0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, |
||||
0x74, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x53, 0x65, 0x65, 0x64, 0x18, 0x03, 0x20, |
||||
0x01, 0x28, 0x05, 0x52, 0x04, 0x53, 0x65, 0x65, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x4e, 0x42, 0x61, |
||||
0x74, 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x4e, 0x42, 0x61, 0x74, 0x63, |
||||
0x68, 0x12, 0x1c, 0x0a, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, 0x05, |
||||
0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x46, 0x31, 0x36, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, |
||||
0x14, 0x0a, 0x05, 0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, |
||||
0x4d, 0x4c, 0x6f, 0x63, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x18, 0x07, 0x20, |
||||
0x01, 0x28, 0x08, 0x52, 0x04, 0x4d, 0x4d, 0x61, 0x70, 0x12, 0x1c, 0x0a, 0x09, 0x56, 0x6f, 0x63, |
||||
0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x56, 0x6f, |
||||
0x63, 0x61, 0x62, 0x4f, 0x6e, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, |
||||
0x41, 0x4d, 0x18, 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x4c, 0x6f, 0x77, 0x56, 0x52, 0x41, |
||||
0x4d, 0x12, 0x1e, 0x0a, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, |
||||
0x0a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, |
||||
0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x55, 0x4d, 0x41, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, |
||||
0x04, 0x4e, 0x55, 0x4d, 0x41, 0x12, 0x1e, 0x0a, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, 0x61, 0x79, |
||||
0x65, 0x72, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x4e, 0x47, 0x50, 0x55, 0x4c, |
||||
0x61, 0x79, 0x65, 0x72, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, |
||||
0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, |
||||
0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, |
||||
0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, |
||||
0x74, 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, |
||||
0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, |
||||
0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, |
||||
0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x32, |
||||
0xc4, 0x01, 0x0a, 0x03, 0x4c, 0x4c, 0x4d, 0x12, 0x2a, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, |
||||
0x68, 0x12, 0x12, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, |
||||
0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0a, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x70, 0x6c, |
||||
0x79, 0x22, 0x00, 0x12, 0x2c, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x13, |
||||
0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, |
||||
0x6f, 0x6e, 0x73, 0x1a, 0x0a, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, |
||||
0x00, 0x12, 0x2d, 0x0a, 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x11, |
||||
0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, |
||||
0x73, 0x1a, 0x0b, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, |
||||
0x12, 0x34, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, |
||||
0x6d, 0x12, 0x13, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, |
||||
0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0a, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x70, |
||||
0x6c, 0x79, 0x22, 0x00, 0x30, 0x01, 0x42, 0x57, 0x0a, 0x1b, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, |
||||
0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x6c, 0x6c, 0x6d, 0x73, |
||||
0x65, 0x72, 0x76, 0x65, 0x72, 0x42, 0x09, 0x4c, 0x4c, 0x4d, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, |
||||
0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, |
||||
0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, |
||||
0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, |
||||
0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, |
||||
} |
||||
|
||||
var ( |
||||
file_pkg_grpc_proto_llmserver_proto_rawDescOnce sync.Once |
||||
file_pkg_grpc_proto_llmserver_proto_rawDescData = file_pkg_grpc_proto_llmserver_proto_rawDesc |
||||
) |
||||
|
||||
func file_pkg_grpc_proto_llmserver_proto_rawDescGZIP() []byte { |
||||
file_pkg_grpc_proto_llmserver_proto_rawDescOnce.Do(func() { |
||||
file_pkg_grpc_proto_llmserver_proto_rawDescData = protoimpl.X.CompressGZIP(file_pkg_grpc_proto_llmserver_proto_rawDescData) |
||||
}) |
||||
return file_pkg_grpc_proto_llmserver_proto_rawDescData |
||||
} |
||||
|
||||
var file_pkg_grpc_proto_llmserver_proto_msgTypes = make([]protoimpl.MessageInfo, 5) |
||||
var file_pkg_grpc_proto_llmserver_proto_goTypes = []interface{}{ |
||||
(*HealthMessage)(nil), // 0: llm.HealthMessage
|
||||
(*PredictOptions)(nil), // 1: llm.PredictOptions
|
||||
(*Reply)(nil), // 2: llm.Reply
|
||||
(*ModelOptions)(nil), // 3: llm.ModelOptions
|
||||
(*Result)(nil), // 4: llm.Result
|
||||
} |
||||
var file_pkg_grpc_proto_llmserver_proto_depIdxs = []int32{ |
||||
0, // 0: llm.LLM.Health:input_type -> llm.HealthMessage
|
||||
1, // 1: llm.LLM.Predict:input_type -> llm.PredictOptions
|
||||
3, // 2: llm.LLM.LoadModel:input_type -> llm.ModelOptions
|
||||
1, // 3: llm.LLM.PredictStream:input_type -> llm.PredictOptions
|
||||
2, // 4: llm.LLM.Health:output_type -> llm.Reply
|
||||
2, // 5: llm.LLM.Predict:output_type -> llm.Reply
|
||||
4, // 6: llm.LLM.LoadModel:output_type -> llm.Result
|
||||
2, // 7: llm.LLM.PredictStream:output_type -> llm.Reply
|
||||
4, // [4:8] is the sub-list for method output_type
|
||||
0, // [0:4] is the sub-list for method input_type
|
||||
0, // [0:0] is the sub-list for extension type_name
|
||||
0, // [0:0] is the sub-list for extension extendee
|
||||
0, // [0:0] is the sub-list for field type_name
|
||||
} |
||||
|
||||
func init() { file_pkg_grpc_proto_llmserver_proto_init() } |
||||
func file_pkg_grpc_proto_llmserver_proto_init() { |
||||
if File_pkg_grpc_proto_llmserver_proto != nil { |
||||
return |
||||
} |
||||
if !protoimpl.UnsafeEnabled { |
||||
file_pkg_grpc_proto_llmserver_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { |
||||
switch v := v.(*HealthMessage); i { |
||||
case 0: |
||||
return &v.state |
||||
case 1: |
||||
return &v.sizeCache |
||||
case 2: |
||||
return &v.unknownFields |
||||
default: |
||||
return nil |
||||
} |
||||
} |
||||
file_pkg_grpc_proto_llmserver_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { |
||||
switch v := v.(*PredictOptions); i { |
||||
case 0: |
||||
return &v.state |
||||
case 1: |
||||
return &v.sizeCache |
||||
case 2: |
||||
return &v.unknownFields |
||||
default: |
||||
return nil |
||||
} |
||||
} |
||||
file_pkg_grpc_proto_llmserver_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { |
||||
switch v := v.(*Reply); i { |
||||
case 0: |
||||
return &v.state |
||||
case 1: |
||||
return &v.sizeCache |
||||
case 2: |
||||
return &v.unknownFields |
||||
default: |
||||
return nil |
||||
} |
||||
} |
||||
file_pkg_grpc_proto_llmserver_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { |
||||
switch v := v.(*ModelOptions); i { |
||||
case 0: |
||||
return &v.state |
||||
case 1: |
||||
return &v.sizeCache |
||||
case 2: |
||||
return &v.unknownFields |
||||
default: |
||||
return nil |
||||
} |
||||
} |
||||
file_pkg_grpc_proto_llmserver_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { |
||||
switch v := v.(*Result); i { |
||||
case 0: |
||||
return &v.state |
||||
case 1: |
||||
return &v.sizeCache |
||||
case 2: |
||||
return &v.unknownFields |
||||
default: |
||||
return nil |
||||
} |
||||
} |
||||
} |
||||
type x struct{} |
||||
out := protoimpl.TypeBuilder{ |
||||
File: protoimpl.DescBuilder{ |
||||
GoPackagePath: reflect.TypeOf(x{}).PkgPath(), |
||||
RawDescriptor: file_pkg_grpc_proto_llmserver_proto_rawDesc, |
||||
NumEnums: 0, |
||||
NumMessages: 5, |
||||
NumExtensions: 0, |
||||
NumServices: 1, |
||||
}, |
||||
GoTypes: file_pkg_grpc_proto_llmserver_proto_goTypes, |
||||
DependencyIndexes: file_pkg_grpc_proto_llmserver_proto_depIdxs, |
||||
MessageInfos: file_pkg_grpc_proto_llmserver_proto_msgTypes, |
||||
}.Build() |
||||
File_pkg_grpc_proto_llmserver_proto = out.File |
||||
file_pkg_grpc_proto_llmserver_proto_rawDesc = nil |
||||
file_pkg_grpc_proto_llmserver_proto_goTypes = nil |
||||
file_pkg_grpc_proto_llmserver_proto_depIdxs = nil |
||||
} |
@ -0,0 +1,82 @@ |
||||
syntax = "proto3"; |
||||
|
||||
option go_package = "github.com/go-skynet/LocalAI/pkg/grpc/proto"; |
||||
option java_multiple_files = true; |
||||
option java_package = "io.skynet.localai.llmserver"; |
||||
option java_outer_classname = "LLMServer"; |
||||
|
||||
package llm; |
||||
|
||||
service LLM { |
||||
rpc Health(HealthMessage) returns (Reply) {} |
||||
rpc Predict(PredictOptions) returns (Reply) {} |
||||
rpc LoadModel(ModelOptions) returns (Result) {} |
||||
rpc PredictStream(PredictOptions) returns (stream Reply) {} |
||||
} |
||||
|
||||
message HealthMessage {} |
||||
|
||||
// The request message containing the user's name. |
||||
message PredictOptions { |
||||
string Prompt = 1; |
||||
int32 Seed = 2; |
||||
int32 Threads = 3; |
||||
int32 Tokens = 4; |
||||
int32 TopK = 5; |
||||
int32 Repeat = 6; |
||||
int32 Batch = 7; |
||||
int32 NKeep = 8; |
||||
float Temperature = 9; |
||||
float Penalty = 10; |
||||
bool F16KV = 11; |
||||
bool DebugMode = 12; |
||||
repeated string StopPrompts = 13; |
||||
bool IgnoreEOS = 14; |
||||
float TailFreeSamplingZ = 15; |
||||
float TypicalP = 16; |
||||
float FrequencyPenalty = 17; |
||||
float PresencePenalty = 18; |
||||
int32 Mirostat = 19; |
||||
float MirostatETA = 20; |
||||
float MirostatTAU = 21; |
||||
bool PenalizeNL = 22; |
||||
string LogitBias = 23; |
||||
string PathPromptCache = 24; |
||||
bool MLock = 25; |
||||
bool MMap = 26; |
||||
bool PromptCacheAll = 27; |
||||
bool PromptCacheRO = 28; |
||||
string Grammar = 29; |
||||
string MainGPU = 30; |
||||
string TensorSplit = 31; |
||||
float TopP = 32; |
||||
string PromptCachePath = 33; |
||||
bool Debug = 34; |
||||
} |
||||
|
||||
// The response message containing the result |
||||
message Reply { |
||||
string message = 1; |
||||
} |
||||
|
||||
message ModelOptions { |
||||
string Model = 1; |
||||
int32 ContextSize = 2; |
||||
int32 Seed = 3; |
||||
int32 NBatch = 4; |
||||
bool F16Memory = 5; |
||||
bool MLock = 6; |
||||
bool MMap = 7; |
||||
bool VocabOnly = 8; |
||||
bool LowVRAM = 9; |
||||
bool Embeddings = 10; |
||||
bool NUMA = 11; |
||||
int32 NGPULayers = 12; |
||||
string MainGPU = 13; |
||||
string TensorSplit = 14; |
||||
} |
||||
|
||||
message Result { |
||||
string message = 1; |
||||
bool success = 2; |
||||
} |
@ -0,0 +1,241 @@ |
||||
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
|
||||
// versions:
|
||||
// - protoc-gen-go-grpc v1.2.0
|
||||
// - protoc v3.15.8
|
||||
// source: pkg/grpc/proto/llmserver.proto
|
||||
|
||||
package proto |
||||
|
||||
import ( |
||||
context "context" |
||||
grpc "google.golang.org/grpc" |
||||
codes "google.golang.org/grpc/codes" |
||||
status "google.golang.org/grpc/status" |
||||
) |
||||
|
||||
// This is a compile-time assertion to ensure that this generated file
|
||||
// is compatible with the grpc package it is being compiled against.
|
||||
// Requires gRPC-Go v1.32.0 or later.
|
||||
const _ = grpc.SupportPackageIsVersion7 |
||||
|
||||
// LLMClient is the client API for LLM service.
|
||||
//
|
||||
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
|
||||
type LLMClient interface { |
||||
Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) |
||||
Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) |
||||
LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) |
||||
PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (LLM_PredictStreamClient, error) |
||||
} |
||||
|
||||
type lLMClient struct { |
||||
cc grpc.ClientConnInterface |
||||
} |
||||
|
||||
func NewLLMClient(cc grpc.ClientConnInterface) LLMClient { |
||||
return &lLMClient{cc} |
||||
} |
||||
|
||||
func (c *lLMClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) { |
||||
out := new(Reply) |
||||
err := c.cc.Invoke(ctx, "/llm.LLM/Health", in, out, opts...) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return out, nil |
||||
} |
||||
|
||||
func (c *lLMClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) { |
||||
out := new(Reply) |
||||
err := c.cc.Invoke(ctx, "/llm.LLM/Predict", in, out, opts...) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return out, nil |
||||
} |
||||
|
||||
func (c *lLMClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) { |
||||
out := new(Result) |
||||
err := c.cc.Invoke(ctx, "/llm.LLM/LoadModel", in, out, opts...) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return out, nil |
||||
} |
||||
|
||||
func (c *lLMClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (LLM_PredictStreamClient, error) { |
||||
stream, err := c.cc.NewStream(ctx, &LLM_ServiceDesc.Streams[0], "/llm.LLM/PredictStream", opts...) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
x := &lLMPredictStreamClient{stream} |
||||
if err := x.ClientStream.SendMsg(in); err != nil { |
||||
return nil, err |
||||
} |
||||
if err := x.ClientStream.CloseSend(); err != nil { |
||||
return nil, err |
||||
} |
||||
return x, nil |
||||
} |
||||
|
||||
type LLM_PredictStreamClient interface { |
||||
Recv() (*Reply, error) |
||||
grpc.ClientStream |
||||
} |
||||
|
||||
type lLMPredictStreamClient struct { |
||||
grpc.ClientStream |
||||
} |
||||
|
||||
func (x *lLMPredictStreamClient) Recv() (*Reply, error) { |
||||
m := new(Reply) |
||||
if err := x.ClientStream.RecvMsg(m); err != nil { |
||||
return nil, err |
||||
} |
||||
return m, nil |
||||
} |
||||
|
||||
// LLMServer is the server API for LLM service.
|
||||
// All implementations must embed UnimplementedLLMServer
|
||||
// for forward compatibility
|
||||
type LLMServer interface { |
||||
Health(context.Context, *HealthMessage) (*Reply, error) |
||||
Predict(context.Context, *PredictOptions) (*Reply, error) |
||||
LoadModel(context.Context, *ModelOptions) (*Result, error) |
||||
PredictStream(*PredictOptions, LLM_PredictStreamServer) error |
||||
mustEmbedUnimplementedLLMServer() |
||||
} |
||||
|
||||
// UnimplementedLLMServer must be embedded to have forward compatible implementations.
|
||||
type UnimplementedLLMServer struct { |
||||
} |
||||
|
||||
func (UnimplementedLLMServer) Health(context.Context, *HealthMessage) (*Reply, error) { |
||||
return nil, status.Errorf(codes.Unimplemented, "method Health not implemented") |
||||
} |
||||
func (UnimplementedLLMServer) Predict(context.Context, *PredictOptions) (*Reply, error) { |
||||
return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented") |
||||
} |
||||
func (UnimplementedLLMServer) LoadModel(context.Context, *ModelOptions) (*Result, error) { |
||||
return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented") |
||||
} |
||||
func (UnimplementedLLMServer) PredictStream(*PredictOptions, LLM_PredictStreamServer) error { |
||||
return status.Errorf(codes.Unimplemented, "method PredictStream not implemented") |
||||
} |
||||
func (UnimplementedLLMServer) mustEmbedUnimplementedLLMServer() {} |
||||
|
||||
// UnsafeLLMServer may be embedded to opt out of forward compatibility for this service.
|
||||
// Use of this interface is not recommended, as added methods to LLMServer will
|
||||
// result in compilation errors.
|
||||
type UnsafeLLMServer interface { |
||||
mustEmbedUnimplementedLLMServer() |
||||
} |
||||
|
||||
func RegisterLLMServer(s grpc.ServiceRegistrar, srv LLMServer) { |
||||
s.RegisterService(&LLM_ServiceDesc, srv) |
||||
} |
||||
|
||||
func _LLM_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { |
||||
in := new(HealthMessage) |
||||
if err := dec(in); err != nil { |
||||
return nil, err |
||||
} |
||||
if interceptor == nil { |
||||
return srv.(LLMServer).Health(ctx, in) |
||||
} |
||||
info := &grpc.UnaryServerInfo{ |
||||
Server: srv, |
||||
FullMethod: "/llm.LLM/Health", |
||||
} |
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) { |
||||
return srv.(LLMServer).Health(ctx, req.(*HealthMessage)) |
||||
} |
||||
return interceptor(ctx, in, info, handler) |
||||
} |
||||
|
||||
func _LLM_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { |
||||
in := new(PredictOptions) |
||||
if err := dec(in); err != nil { |
||||
return nil, err |
||||
} |
||||
if interceptor == nil { |
||||
return srv.(LLMServer).Predict(ctx, in) |
||||
} |
||||
info := &grpc.UnaryServerInfo{ |
||||
Server: srv, |
||||
FullMethod: "/llm.LLM/Predict", |
||||
} |
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) { |
||||
return srv.(LLMServer).Predict(ctx, req.(*PredictOptions)) |
||||
} |
||||
return interceptor(ctx, in, info, handler) |
||||
} |
||||
|
||||
func _LLM_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { |
||||
in := new(ModelOptions) |
||||
if err := dec(in); err != nil { |
||||
return nil, err |
||||
} |
||||
if interceptor == nil { |
||||
return srv.(LLMServer).LoadModel(ctx, in) |
||||
} |
||||
info := &grpc.UnaryServerInfo{ |
||||
Server: srv, |
||||
FullMethod: "/llm.LLM/LoadModel", |
||||
} |
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) { |
||||
return srv.(LLMServer).LoadModel(ctx, req.(*ModelOptions)) |
||||
} |
||||
return interceptor(ctx, in, info, handler) |
||||
} |
||||
|
||||
func _LLM_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error { |
||||
m := new(PredictOptions) |
||||
if err := stream.RecvMsg(m); err != nil { |
||||
return err |
||||
} |
||||
return srv.(LLMServer).PredictStream(m, &lLMPredictStreamServer{stream}) |
||||
} |
||||
|
||||
type LLM_PredictStreamServer interface { |
||||
Send(*Reply) error |
||||
grpc.ServerStream |
||||
} |
||||
|
||||
type lLMPredictStreamServer struct { |
||||
grpc.ServerStream |
||||
} |
||||
|
||||
func (x *lLMPredictStreamServer) Send(m *Reply) error { |
||||
return x.ServerStream.SendMsg(m) |
||||
} |
||||
|
||||
// LLM_ServiceDesc is the grpc.ServiceDesc for LLM service.
|
||||
// It's only intended for direct use with grpc.RegisterService,
|
||||
// and not to be introspected or modified (even as a copy)
|
||||
var LLM_ServiceDesc = grpc.ServiceDesc{ |
||||
ServiceName: "llm.LLM", |
||||
HandlerType: (*LLMServer)(nil), |
||||
Methods: []grpc.MethodDesc{ |
||||
{ |
||||
MethodName: "Health", |
||||
Handler: _LLM_Health_Handler, |
||||
}, |
||||
{ |
||||
MethodName: "Predict", |
||||
Handler: _LLM_Predict_Handler, |
||||
}, |
||||
{ |
||||
MethodName: "LoadModel", |
||||
Handler: _LLM_LoadModel_Handler, |
||||
}, |
||||
}, |
||||
Streams: []grpc.StreamDesc{ |
||||
{ |
||||
StreamName: "PredictStream", |
||||
Handler: _LLM_PredictStream_Handler, |
||||
ServerStreams: true, |
||||
}, |
||||
}, |
||||
Metadata: "pkg/grpc/proto/llmserver.proto", |
||||
} |
@ -0,0 +1,76 @@ |
||||
package grpc |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
"log" |
||||
"net" |
||||
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" |
||||
"google.golang.org/grpc" |
||||
) |
||||
|
||||
// A GRPC Server that allows to run LLM inference.
|
||||
// It is used by the LLMServices to expose the LLM functionalities that are called by the client.
|
||||
// The GRPC Service is general, trying to encompass all the possible LLM options models.
|
||||
// It depends on the real implementer then what can be done or not.
|
||||
//
|
||||
// The server is implemented as a GRPC service, with the following methods:
|
||||
// - Predict: to run the inference with options
|
||||
// - PredictStream: to run the inference with options and stream the results
|
||||
|
||||
// server is used to implement helloworld.GreeterServer.
|
||||
type server struct { |
||||
pb.UnimplementedLLMServer |
||||
llm LLM |
||||
} |
||||
|
||||
func (s *server) Health(ctx context.Context, in *pb.HealthMessage) (*pb.Reply, error) { |
||||
return &pb.Reply{Message: "OK"}, nil |
||||
} |
||||
|
||||
func (s *server) LoadModel(ctx context.Context, in *pb.ModelOptions) (*pb.Result, error) { |
||||
err := s.llm.Load(in) |
||||
if err != nil { |
||||
return &pb.Result{Message: fmt.Sprintf("Error loading model: %s", err.Error()), Success: false}, err |
||||
} |
||||
return &pb.Result{Message: "Loading succeeded", Success: true}, nil |
||||
} |
||||
|
||||
func (s *server) Predict(ctx context.Context, in *pb.PredictOptions) (*pb.Reply, error) { |
||||
result, err := s.llm.Predict(in) |
||||
return &pb.Reply{Message: result}, err |
||||
} |
||||
|
||||
func (s *server) PredictStream(in *pb.PredictOptions, stream pb.LLM_PredictStreamServer) error { |
||||
|
||||
resultChan := make(chan string) |
||||
|
||||
done := make(chan bool) |
||||
go func() { |
||||
for result := range resultChan { |
||||
stream.Send(&pb.Reply{Message: result}) |
||||
} |
||||
done <- true |
||||
}() |
||||
|
||||
s.llm.PredictStream(in, resultChan) |
||||
<-done |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func StartServer(address string, model LLM) error { |
||||
lis, err := net.Listen("tcp", address) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
s := grpc.NewServer() |
||||
pb.RegisterLLMServer(s, &server{llm: model}) |
||||
log.Printf("gRPC Server listening at %v", lis.Addr()) |
||||
if err := s.Serve(lis); err != nil { |
||||
return err |
||||
} |
||||
|
||||
return nil |
||||
} |
@ -0,0 +1,62 @@ |
||||
package model |
||||
|
||||
import ( |
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" |
||||
llama "github.com/go-skynet/go-llama.cpp" |
||||
) |
||||
|
||||
type Options struct { |
||||
backendString string |
||||
modelFile string |
||||
llamaOpts []llama.ModelOption |
||||
threads uint32 |
||||
assetDir string |
||||
|
||||
gRPCOptions *pb.ModelOptions |
||||
} |
||||
|
||||
type Option func(*Options) |
||||
|
||||
func WithBackendString(backend string) Option { |
||||
return func(o *Options) { |
||||
o.backendString = backend |
||||
} |
||||
} |
||||
|
||||
func WithModelFile(modelFile string) Option { |
||||
return func(o *Options) { |
||||
o.modelFile = modelFile |
||||
} |
||||
} |
||||
|
||||
func WithLoadGRPCOpts(opts *pb.ModelOptions) Option { |
||||
return func(o *Options) { |
||||
o.gRPCOptions = opts |
||||
} |
||||
} |
||||
|
||||
func WithLlamaOpts(opts ...llama.ModelOption) Option { |
||||
return func(o *Options) { |
||||
o.llamaOpts = append(o.llamaOpts, opts...) |
||||
} |
||||
} |
||||
|
||||
func WithThreads(threads uint32) Option { |
||||
return func(o *Options) { |
||||
o.threads = threads |
||||
} |
||||
} |
||||
|
||||
func WithAssetDir(assetDir string) Option { |
||||
return func(o *Options) { |
||||
o.assetDir = assetDir |
||||
} |
||||
} |
||||
|
||||
func NewOptions(opts ...Option) *Options { |
||||
o := &Options{} |
||||
for _, opt := range opts { |
||||
opt(o) |
||||
} |
||||
return o |
||||
} |
Loading…
Reference in new issue