From 2f5feb48418b153a2fe1cdcf0dcf116ac03a7264 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 20 Jun 2023 20:33:47 +0200 Subject: [PATCH] Add LowVRAM option parameter (#642) --- api/config.go | 1 + api/prediction.go | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/api/config.go b/api/config.go index 50aee20..c6100db 100644 --- a/api/config.go +++ b/api/config.go @@ -35,6 +35,7 @@ type Config struct { NGPULayers int `yaml:"gpu_layers"` MMap bool `yaml:"mmap"` MMlock bool `yaml:"mmlock"` + LowVRAM bool `yaml:"low_vram"` TensorSplit string `yaml:"tensor_split"` MainGPU string `yaml:"main_gpu"` diff --git a/api/prediction.go b/api/prediction.go index e7a8ca4..87eea34 100644 --- a/api/prediction.go +++ b/api/prediction.go @@ -48,6 +48,10 @@ func defaultLLamaOpts(c Config) []llama.ModelOption { llamaOpts = append(llamaOpts, llama.SetNBatch(512)) } + if c.LowVRAM { + llamaOpts = append(llamaOpts, llama.EnabelLowVRAM) + } + return llamaOpts }