@ -10,14 +10,14 @@ import (
"sync"
"text/template"
"github.com/hashicorp/go-multierror"
"github.com/rs/zerolog/log"
rwkv "github.com/donomii/go-rwkv.cpp"
bloomz "github.com/go-skynet/bloomz.cpp"
bert "github.com/go-skynet/go-bert.cpp"
gpt2 "github.com/go-skynet/go-gpt2.cpp"
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/hashicorp/go-multierror"
"github.com/rs/zerolog/log"
)
type ModelLoader struct {
@ -28,9 +28,12 @@ type ModelLoader struct {
gptmodels map [ string ] * gptj . GPTJ
gpt2models map [ string ] * gpt2 . GPT2
gptstablelmmodels map [ string ] * gpt2 . StableLM
dollymodels map [ string ] * gpt2 . Dolly
redpajama map [ string ] * gpt2 . RedPajama
rwkv map [ string ] * rwkv . RwkvState
bert map [ string ] * bert . Bert
bloomz map [ string ] * bloomz . Bloomz
bert map [ string ] * bert . Bert
promptsTemplates map [ string ] * template . Template
}
@ -40,8 +43,11 @@ func NewModelLoader(modelPath string) *ModelLoader {
gpt2models : make ( map [ string ] * gpt2 . GPT2 ) ,
gptmodels : make ( map [ string ] * gptj . GPTJ ) ,
gptstablelmmodels : make ( map [ string ] * gpt2 . StableLM ) ,
dollymodels : make ( map [ string ] * gpt2 . Dolly ) ,
redpajama : make ( map [ string ] * gpt2 . RedPajama ) ,
models : make ( map [ string ] * llama . LLama ) ,
rwkv : make ( map [ string ] * rwkv . RwkvState ) ,
bloomz : make ( map [ string ] * bloomz . Bloomz ) ,
bert : make ( map [ string ] * bert . Bert ) ,
promptsTemplates : make ( map [ string ] * template . Template ) ,
}
@ -128,6 +134,70 @@ func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
return nil
}
func ( ml * ModelLoader ) LoadRedPajama ( modelName string ) ( * gpt2 . RedPajama , error ) {
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
// Check if we already have a loaded model
if ! ml . ExistsInModelPath ( modelName ) {
return nil , fmt . Errorf ( "model does not exist" )
}
if m , ok := ml . redpajama [ modelName ] ; ok {
log . Debug ( ) . Msgf ( "Model already loaded in memory: %s" , modelName )
return m , nil
}
// Load the model and keep it in memory for later use
modelFile := filepath . Join ( ml . ModelPath , modelName )
log . Debug ( ) . Msgf ( "Loading model in memory from file: %s" , modelFile )
model , err := gpt2 . NewRedPajama ( modelFile )
if err != nil {
return nil , err
}
// If there is a prompt template, load it
if err := ml . loadTemplateIfExists ( modelName , modelFile ) ; err != nil {
return nil , err
}
ml . redpajama [ modelName ] = model
return model , err
}
func ( ml * ModelLoader ) LoadDollyModel ( modelName string ) ( * gpt2 . Dolly , error ) {
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
// Check if we already have a loaded model
if ! ml . ExistsInModelPath ( modelName ) {
return nil , fmt . Errorf ( "model does not exist" )
}
if m , ok := ml . dollymodels [ modelName ] ; ok {
log . Debug ( ) . Msgf ( "Model already loaded in memory: %s" , modelName )
return m , nil
}
// Load the model and keep it in memory for later use
modelFile := filepath . Join ( ml . ModelPath , modelName )
log . Debug ( ) . Msgf ( "Loading model in memory from file: %s" , modelFile )
model , err := gpt2 . NewDolly ( modelFile )
if err != nil {
return nil , err
}
// If there is a prompt template, load it
if err := ml . loadTemplateIfExists ( modelName , modelFile ) ; err != nil {
return nil , err
}
ml . dollymodels [ modelName ] = model
return model , err
}
func ( ml * ModelLoader ) LoadStableLMModel ( modelName string ) ( * gpt2 . StableLM , error ) {
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
@ -192,6 +262,38 @@ func (ml *ModelLoader) LoadBERT(modelName string) (*bert.Bert, error) {
return model , err
}
func ( ml * ModelLoader ) LoadBloomz ( modelName string ) ( * bloomz . Bloomz , error ) {
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
// Check if we already have a loaded model
if ! ml . ExistsInModelPath ( modelName ) {
return nil , fmt . Errorf ( "model does not exist" )
}
if m , ok := ml . bloomz [ modelName ] ; ok {
log . Debug ( ) . Msgf ( "Model already loaded in memory: %s" , modelName )
return m , nil
}
// Load the model and keep it in memory for later use
modelFile := filepath . Join ( ml . ModelPath , modelName )
log . Debug ( ) . Msgf ( "Loading model in memory from file: %s" , modelFile )
model , err := bloomz . New ( modelFile )
if err != nil {
return nil , err
}
// If there is a prompt template, load it
if err := ml . loadTemplateIfExists ( modelName , modelFile ) ; err != nil {
return nil , err
}
ml . bloomz [ modelName ] = model
return model , err
}
func ( ml * ModelLoader ) LoadGPT2Model ( modelName string ) ( * gpt2 . GPT2 , error ) {
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
@ -329,8 +431,14 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla
switch strings . ToLower ( backendString ) {
case "llama" :
return ml . LoadLLaMAModel ( modelFile , llamaOpts ... )
case "bloomz" :
return ml . LoadBloomz ( modelFile )
case "stablelm" :
return ml . LoadStableLMModel ( modelFile )
case "dolly" :
return ml . LoadDollyModel ( modelFile )
case "redpajama" :
return ml . LoadRedPajama ( modelFile )
case "gpt2" :
return ml . LoadGPT2Model ( modelFile )
case "gptj" :
@ -391,6 +499,30 @@ func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOpt
err = multierror . Append ( err , modelerr )
}
model , modelerr = ml . LoadDollyModel ( modelFile )
if modelerr == nil {
updateModels ( model )
return model , nil
} else {
err = multierror . Append ( err , modelerr )
}
model , modelerr = ml . LoadRedPajama ( modelFile )
if modelerr == nil {
updateModels ( model )
return model , nil
} else {
err = multierror . Append ( err , modelerr )
}
model , modelerr = ml . LoadBloomz ( modelFile )
if modelerr == nil {
updateModels ( model )
return model , nil
} else {
err = multierror . Append ( err , modelerr )
}
model , modelerr = ml . LoadRWKV ( modelFile , modelFile + tokenizerSuffix , threads )
if modelerr == nil {
updateModels ( model )