|
|
@ -34,6 +34,7 @@ const ( |
|
|
|
Gpt4AllMptBackend = "gpt4all-mpt" |
|
|
|
Gpt4AllMptBackend = "gpt4all-mpt" |
|
|
|
Gpt4AllJBackend = "gpt4all-j" |
|
|
|
Gpt4AllJBackend = "gpt4all-j" |
|
|
|
Gpt4All = "gpt4all" |
|
|
|
Gpt4All = "gpt4all" |
|
|
|
|
|
|
|
FalconBackend = "falcon" |
|
|
|
BertEmbeddingsBackend = "bert-embeddings" |
|
|
|
BertEmbeddingsBackend = "bert-embeddings" |
|
|
|
RwkvBackend = "rwkv" |
|
|
|
RwkvBackend = "rwkv" |
|
|
|
WhisperBackend = "whisper" |
|
|
|
WhisperBackend = "whisper" |
|
|
@ -41,7 +42,7 @@ const ( |
|
|
|
LCHuggingFaceBackend = "langchain-huggingface" |
|
|
|
LCHuggingFaceBackend = "langchain-huggingface" |
|
|
|
) |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
var backends []string = []string{ |
|
|
|
var autoLoadBackends []string = []string{ |
|
|
|
LlamaBackend, |
|
|
|
LlamaBackend, |
|
|
|
Gpt4All, |
|
|
|
Gpt4All, |
|
|
|
RwkvBackend, |
|
|
|
RwkvBackend, |
|
|
@ -51,6 +52,7 @@ var backends []string = []string{ |
|
|
|
GPTJBackend, |
|
|
|
GPTJBackend, |
|
|
|
Gpt2Backend, |
|
|
|
Gpt2Backend, |
|
|
|
DollyBackend, |
|
|
|
DollyBackend, |
|
|
|
|
|
|
|
FalconBackend, |
|
|
|
MPTBackend, |
|
|
|
MPTBackend, |
|
|
|
ReplitBackend, |
|
|
|
ReplitBackend, |
|
|
|
StarcoderBackend, |
|
|
|
StarcoderBackend, |
|
|
@ -81,6 +83,10 @@ var gptJ = func(modelFile string) (interface{}, error) { |
|
|
|
return transformers.NewGPTJ(modelFile) |
|
|
|
return transformers.NewGPTJ(modelFile) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var falcon = func(modelFile string) (interface{}, error) { |
|
|
|
|
|
|
|
return transformers.NewFalcon(modelFile) |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
var bertEmbeddings = func(modelFile string) (interface{}, error) { |
|
|
|
var bertEmbeddings = func(modelFile string) (interface{}, error) { |
|
|
|
return bert.New(modelFile) |
|
|
|
return bert.New(modelFile) |
|
|
|
} |
|
|
|
} |
|
|
@ -144,6 +150,8 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla |
|
|
|
return ml.LoadModel(modelFile, mpt) |
|
|
|
return ml.LoadModel(modelFile, mpt) |
|
|
|
case Gpt2Backend: |
|
|
|
case Gpt2Backend: |
|
|
|
return ml.LoadModel(modelFile, transformersLM) |
|
|
|
return ml.LoadModel(modelFile, transformersLM) |
|
|
|
|
|
|
|
case FalconBackend: |
|
|
|
|
|
|
|
return ml.LoadModel(modelFile, falcon) |
|
|
|
case GPTNeoXBackend: |
|
|
|
case GPTNeoXBackend: |
|
|
|
return ml.LoadModel(modelFile, gptNeoX) |
|
|
|
return ml.LoadModel(modelFile, gptNeoX) |
|
|
|
case ReplitBackend: |
|
|
|
case ReplitBackend: |
|
|
@ -180,7 +188,7 @@ func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOpt |
|
|
|
ml.mu.Unlock() |
|
|
|
ml.mu.Unlock() |
|
|
|
var err error |
|
|
|
var err error |
|
|
|
|
|
|
|
|
|
|
|
for _, b := range backends { |
|
|
|
for _, b := range autoLoadBackends { |
|
|
|
if b == BloomzBackend || b == WhisperBackend || b == RwkvBackend { // do not autoload bloomz/whisper/rwkv
|
|
|
|
if b == BloomzBackend || b == WhisperBackend || b == RwkvBackend { // do not autoload bloomz/whisper/rwkv
|
|
|
|
continue |
|
|
|
continue |
|
|
|
} |
|
|
|
} |
|
|
|