syntax = "proto3"; option go_package = "github.com/go-skynet/LocalAI/pkg/grpc/proto"; option java_multiple_files = true; option java_package = "io.skynet.localai.llmserver"; option java_outer_classname = "LLMServer"; package llm; service LLM { rpc Health(HealthMessage) returns (Reply) {} rpc Predict(PredictOptions) returns (Reply) {} rpc LoadModel(ModelOptions) returns (Result) {} rpc PredictStream(PredictOptions) returns (stream Reply) {} } message HealthMessage {} // The request message containing the user's name. message PredictOptions { string Prompt = 1; int32 Seed = 2; int32 Threads = 3; int32 Tokens = 4; int32 TopK = 5; int32 Repeat = 6; int32 Batch = 7; int32 NKeep = 8; float Temperature = 9; float Penalty = 10; bool F16KV = 11; bool DebugMode = 12; repeated string StopPrompts = 13; bool IgnoreEOS = 14; float TailFreeSamplingZ = 15; float TypicalP = 16; float FrequencyPenalty = 17; float PresencePenalty = 18; int32 Mirostat = 19; float MirostatETA = 20; float MirostatTAU = 21; bool PenalizeNL = 22; string LogitBias = 23; string PathPromptCache = 24; bool MLock = 25; bool MMap = 26; bool PromptCacheAll = 27; bool PromptCacheRO = 28; string Grammar = 29; string MainGPU = 30; string TensorSplit = 31; float TopP = 32; string PromptCachePath = 33; bool Debug = 34; } // The response message containing the result message Reply { string message = 1; } message ModelOptions { string Model = 1; int32 ContextSize = 2; int32 Seed = 3; int32 NBatch = 4; bool F16Memory = 5; bool MLock = 6; bool MMap = 7; bool VocabOnly = 8; bool LowVRAM = 9; bool Embeddings = 10; bool NUMA = 11; int32 NGPULayers = 12; string MainGPU = 13; string TensorSplit = 14; } message Result { string message = 1; bool success = 2; }