feat: update llama, enable NUMA (#684)

renovate/github.com-imdario-mergo-1.x
Ettore Di Giacinto 1 year ago committed by GitHub
parent e130b208ab
commit 3593cb0c87
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      Makefile
  2. 1
      api/config.go
  3. 4
      api/prediction.go

@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
GOVET=$(GOCMD) vet
BINARY_NAME=local-ai
GOLLAMA_VERSION?=9470597ae75ad8b5f17cfab73805ee4a6685d199
GOLLAMA_VERSION?=f104111358e8098aea69ce408b85b707528179ef
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
GPT4ALL_VERSION?=3417a37c5472fb5111a7bd0ed747c8df749c595e
GOGGMLTRANSFORMERS_VERSION?=a459d2726792132541152c981ed9fbfe28f4fd20

@ -23,6 +23,7 @@ type Config struct {
TrimSpace []string `yaml:"trimspace"`
ContextSize int `yaml:"context_size"`
F16 bool `yaml:"f16"`
NUMA bool `yaml:"numa"`
Threads int `yaml:"threads"`
Debug bool `yaml:"debug"`
Roles map[string]string `yaml:"roles"`

@ -48,6 +48,10 @@ func defaultLLamaOpts(c Config) []llama.ModelOption {
llamaOpts = append(llamaOpts, llama.SetNBatch(512))
}
if c.NUMA {
llamaOpts = append(llamaOpts, llama.EnableNUMA)
}
if c.LowVRAM {
llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
}

Loading…
Cancel
Save