FlaskAI/pkg/model/loader.go

package model

import (
	"bytes"
	"fmt"
	"io/ioutil"
	"os"
	"path/filepath"
	"strings"
	"sync"
	"text/template"

	"github.com/hashicorp/go-multierror"
	"github.com/rs/zerolog/log"

	rwkv "github.com/donomii/go-rwkv.cpp"
	bert "github.com/go-skynet/go-bert.cpp"
	gpt2 "github.com/go-skynet/go-gpt2.cpp"
	gptj "github.com/go-skynet/go-gpt4all-j.cpp"
	llama "github.com/go-skynet/go-llama.cpp"
)

type ModelLoader struct {
	ModelPath string
	mu        sync.Mutex
	// TODO: this needs generics
	models            map[string]*llama.LLama
	gptmodels         map[string]*gptj.GPTJ
	gpt2models        map[string]*gpt2.GPT2
	gptstablelmmodels map[string]*gpt2.StableLM
	rwkv              map[string]*rwkv.RwkvState
	bert              map[string]*bert.Bert

	promptsTemplates map[string]*template.Template
}

func NewModelLoader(modelPath string) *ModelLoader {
	return &ModelLoader{
		ModelPath:         modelPath,
		gpt2models:        make(map[string]*gpt2.GPT2),
		gptmodels:         make(map[string]*gptj.GPTJ),
		gptstablelmmodels: make(map[string]*gpt2.StableLM),
		models:            make(map[string]*llama.LLama),
		rwkv:              make(map[string]*rwkv.RwkvState),
		bert:              make(map[string]*bert.Bert),
		promptsTemplates:  make(map[string]*template.Template),
	}
}

func (ml *ModelLoader) ExistsInModelPath(s string) bool {
	_, err := os.Stat(filepath.Join(ml.ModelPath, s))
	return err == nil
}

func (ml *ModelLoader) ListModels() ([]string, error) {
	files, err := ioutil.ReadDir(ml.ModelPath)
	if err != nil {
		return []string{}, err
	}

	models := []string{}
	for _, file := range files {
		// Skip templates, YAML and .keep files
		if strings.HasSuffix(file.Name(), ".tmpl") || strings.HasSuffix(file.Name(), ".keep") || strings.HasSuffix(file.Name(), ".yaml") || strings.HasSuffix(file.Name(), ".yml") {
			continue
		}

		models = append(models, file.Name())
	}

	return models, nil
}

func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string, error) {
	ml.mu.Lock()
	defer ml.mu.Unlock()

	m, ok := ml.promptsTemplates[modelName]
	if !ok {
		modelFile := filepath.Join(ml.ModelPath, modelName)
		if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
			return "", err
		}

		t, exists := ml.promptsTemplates[modelName]
		if exists {
			m = t
		}
	}
	if m == nil {
		return "", fmt.Errorf("failed loading any template")
	}

	var buf bytes.Buffer

	if err := m.Execute(&buf, in); err != nil {
		return "", err
	}
	return buf.String(), nil
}

func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
	// Check if the template was already loaded
	if _, ok := ml.promptsTemplates[modelName]; ok {
		return nil
	}

	// Check if the model path exists
	// skip any error here - we run anyway if a template does not exist
	modelTemplateFile := fmt.Sprintf("%s.tmpl", modelName)

	if !ml.ExistsInModelPath(modelTemplateFile) {
		return nil
	}

	dat, err := os.ReadFile(filepath.Join(ml.ModelPath, modelTemplateFile))
	if err != nil {
		return err
	}

	// Parse the template
	tmpl, err := template.New("prompt").Parse(string(dat))
	if err != nil {
		return err
	}
	ml.promptsTemplates[modelName] = tmpl

	return nil
}

func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, error) {
	ml.mu.Lock()
	defer ml.mu.Unlock()

	// Check if we already have a loaded model
	if !ml.ExistsInModelPath(modelName) {
		return nil, fmt.Errorf("model does not exist")
	}

	if m, ok := ml.gptstablelmmodels[modelName]; ok {
		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
		return m, nil
	}

	// Load the model and keep it in memory for later use
	modelFile := filepath.Join(ml.ModelPath, modelName)
	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

	model, err := gpt2.NewStableLM(modelFile)
	if err != nil {
		return nil, err
	}

	// If there is a prompt template, load it
	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
		return nil, err
	}

	ml.gptstablelmmodels[modelName] = model
	return model, err
}

func (ml *ModelLoader) LoadBERT(modelName string) (*bert.Bert, error) {
	ml.mu.Lock()
	defer ml.mu.Unlock()

	// Check if we already have a loaded model
	if !ml.ExistsInModelPath(modelName) {
		return nil, fmt.Errorf("model does not exist")
	}

	if m, ok := ml.bert[modelName]; ok {
		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
		return m, nil
	}

	// Load the model and keep it in memory for later use
	modelFile := filepath.Join(ml.ModelPath, modelName)
	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

	model, err := bert.New(modelFile)
	if err != nil {
		return nil, err
	}

	// If there is a prompt template, load it
	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
		return nil, err
	}

	ml.bert[modelName] = model
	return model, err
}

func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
	ml.mu.Lock()
	defer ml.mu.Unlock()

	// Check if we already have a loaded model
	if !ml.ExistsInModelPath(modelName) {
		return nil, fmt.Errorf("model does not exist")
	}

	if m, ok := ml.gpt2models[modelName]; ok {
		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
		return m, nil
	}

	// Load the model and keep it in memory for later use
	modelFile := filepath.Join(ml.ModelPath, modelName)
	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

	model, err := gpt2.New(modelFile)
	if err != nil {
		return nil, err
	}

	// If there is a prompt template, load it
	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
		return nil, err
	}

	ml.gpt2models[modelName] = model
	return model, err
}

func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
	ml.mu.Lock()
	defer ml.mu.Unlock()

	// Check if we already have a loaded model
	if !ml.ExistsInModelPath(modelName) {
		return nil, fmt.Errorf("model does not exist")
	}

	if m, ok := ml.gptmodels[modelName]; ok {
		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
		return m, nil
	}

	// Load the model and keep it in memory for later use
	modelFile := filepath.Join(ml.ModelPath, modelName)
	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

	model, err := gptj.New(modelFile)
	if err != nil {
		return nil, err
	}

	// If there is a prompt template, load it
	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
		return nil, err
	}

	ml.gptmodels[modelName] = model
	return model, err
}

func (ml *ModelLoader) LoadRWKV(modelName, tokenFile string, threads uint32) (*rwkv.RwkvState, error) {
	ml.mu.Lock()
	defer ml.mu.Unlock()

	log.Debug().Msgf("Loading model name: %s", modelName)

	// Check if we already have a loaded model
	if !ml.ExistsInModelPath(modelName) {
		return nil, fmt.Errorf("model does not exist")
	}

	if m, ok := ml.rwkv[modelName]; ok {
		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
		return m, nil
	}

	// Load the model and keep it in memory for later use
	modelFile := filepath.Join(ml.ModelPath, modelName)
	tokenPath := filepath.Join(ml.ModelPath, tokenFile)
	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

	model := rwkv.LoadFiles(modelFile, tokenPath, threads)
	if model == nil {
		return nil, fmt.Errorf("could not load model")
	}

	ml.rwkv[modelName] = model
	return model, nil
}

func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOption) (*llama.LLama, error) {
	ml.mu.Lock()
	defer ml.mu.Unlock()

	log.Debug().Msgf("Loading model name: %s", modelName)

	// Check if we already have a loaded model
	if !ml.ExistsInModelPath(modelName) {
		return nil, fmt.Errorf("model does not exist")
	}

	if m, ok := ml.models[modelName]; ok {
		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
		return m, nil
	}

	// Load the model and keep it in memory for later use
	modelFile := filepath.Join(ml.ModelPath, modelName)
	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

	model, err := llama.New(modelFile, opts...)
	if err != nil {
		return nil, err
	}

	// If there is a prompt template, load it
	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
		return nil, err
	}

	ml.models[modelName] = model
	return model, err
}

const tokenizerSuffix = ".tokenizer.json"

var loadedModels map[string]interface{} = map[string]interface{}{}
var muModels sync.Mutex

func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
	switch strings.ToLower(backendString) {
	case "llama":
		return ml.LoadLLaMAModel(modelFile, llamaOpts...)
	case "stablelm":
		return ml.LoadStableLMModel(modelFile)
	case "gpt2":
		return ml.LoadGPT2Model(modelFile)
	case "gptj":
		return ml.LoadGPTJModel(modelFile)
	case "bert-embeddings":
		return ml.LoadBERT(modelFile)
	case "rwkv":
		return ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
	default:
		return nil, fmt.Errorf("backend unsupported: %s", backendString)
	}
}

func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
	updateModels := func(model interface{}) {
		muModels.Lock()
		defer muModels.Unlock()
		loadedModels[modelFile] = model
	}

	muModels.Lock()
	m, exists := loadedModels[modelFile]
	if exists {
		muModels.Unlock()
		return m, nil
	}
	muModels.Unlock()

	model, modelerr := ml.LoadLLaMAModel(modelFile, llamaOpts...)
	if modelerr == nil {
		updateModels(model)
		return model, nil
	} else {
		err = multierror.Append(err, modelerr)
	}

	model, modelerr = ml.LoadGPTJModel(modelFile)
	if modelerr == nil {
		updateModels(model)
		return model, nil
	} else {
		err = multierror.Append(err, modelerr)
	}

	model, modelerr = ml.LoadGPT2Model(modelFile)
	if modelerr == nil {
		updateModels(model)
		return model, nil
	} else {
		err = multierror.Append(err, modelerr)
	}

	model, modelerr = ml.LoadStableLMModel(modelFile)
	if modelerr == nil {
		updateModels(model)
		return model, nil
	} else {
		err = multierror.Append(err, modelerr)
	}

	model, modelerr = ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
	if modelerr == nil {
		updateModels(model)
		return model, nil
	} else {
		err = multierror.Append(err, modelerr)
	}

	model, modelerr = ml.LoadBERT(modelFile)
	if modelerr == nil {
		updateModels(model)
		return model, nil
	} else {
		err = multierror.Append(err, modelerr)
	}

	return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
}
Small refinements and refactors 2 years ago			`package model`
Make it compatible with openAI api, support multiple models Signed-off-by: mudler <mudler@c3os.io> 2 years ago
			`import (`
Allow to template model prompts inputs 2 years ago			`"bytes"`
Make it compatible with openAI api, support multiple models Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`"fmt"`
Return model list 2 years ago			`"io/ioutil"`
Make it compatible with openAI api, support multiple models Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`"os"`
			`"path/filepath"`
Return model list 2 years ago			`"strings"`
Make it compatible with openAI api, support multiple models Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`"sync"`
Allow to template model prompts inputs 2 years ago			`"text/template"`
Make it compatible with openAI api, support multiple models Signed-off-by: mudler <mudler@c3os.io> 2 years ago
feat: add embeddings for go-llama.cpp backend (#190) 2 years ago			`"github.com/hashicorp/go-multierror"`
Major API enhancements (#44) 2 years ago			`"github.com/rs/zerolog/log"`

feat: add rwkv support (#158) Signed-off-by: mudler <mudler@mocaccino.org> 2 years ago			`rwkv "github.com/donomii/go-rwkv.cpp"`
feat: add bert.cpp embeddings (#222) 2 years ago			`bert "github.com/go-skynet/go-bert.cpp"`
Add support for cerebras (#45) Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`gpt2 "github.com/go-skynet/go-gpt2.cpp"`
Enhancements (#34) Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`gptj "github.com/go-skynet/go-gpt4all-j.cpp"`
Make it compatible with openAI api, support multiple models Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`llama "github.com/go-skynet/go-llama.cpp"`
			`)`

			`type ModelLoader struct {`
feat: config files and SSE (#83) Signed-off-by: mudler <mudler@mocaccino.org> Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com> Co-authored-by: Tyler Gillson <tyler.gillson@gmail.com> 2 years ago			`ModelPath string`
Add support for cerebras (#45) Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`mu sync.Mutex`
feat: add bert.cpp embeddings (#222) 2 years ago			`// TODO: this needs generics`
Add support for stablelm (#48) Signed-off-by: mudler <mudler@mocaccino.org> 2 years ago			`models map[string]*llama.LLama`
			`gptmodels map[string]*gptj.GPTJ`
			`gpt2models map[string]*gpt2.GPT2`
			`gptstablelmmodels map[string]*gpt2.StableLM`
feat: add rwkv support (#158) Signed-off-by: mudler <mudler@mocaccino.org> 2 years ago			`rwkv map[string]*rwkv.RwkvState`
feat: add bert.cpp embeddings (#222) 2 years ago			`bert map[string]*bert.Bert`

			`promptsTemplates map[string]*template.Template`
Make it compatible with openAI api, support multiple models Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`}`

			`func NewModelLoader(modelPath string) *ModelLoader {`
Add support for stablelm (#48) Signed-off-by: mudler <mudler@mocaccino.org> 2 years ago			`return &ModelLoader{`
feat: config files and SSE (#83) Signed-off-by: mudler <mudler@mocaccino.org> Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com> Co-authored-by: Tyler Gillson <tyler.gillson@gmail.com> 2 years ago			`ModelPath: modelPath,`
Add support for stablelm (#48) Signed-off-by: mudler <mudler@mocaccino.org> 2 years ago			`gpt2models: make(map[string]*gpt2.GPT2),`
			`gptmodels: make(map[string]*gptj.GPTJ),`
			`gptstablelmmodels: make(map[string]*gpt2.StableLM),`
			`models: make(map[string]*llama.LLama),`
feat: add rwkv support (#158) Signed-off-by: mudler <mudler@mocaccino.org> 2 years ago			`rwkv: make(map[string]*rwkv.RwkvState),`
feat: add bert.cpp embeddings (#222) 2 years ago			`bert: make(map[string]*bert.Bert),`
Add support for stablelm (#48) Signed-off-by: mudler <mudler@mocaccino.org> 2 years ago			`promptsTemplates: make(map[string]*template.Template),`
			`}`
Make it compatible with openAI api, support multiple models Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`}`

Major API enhancements (#44) 2 years ago			`func (ml *ModelLoader) ExistsInModelPath(s string) bool {`
feat: config files and SSE (#83) Signed-off-by: mudler <mudler@mocaccino.org> Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com> Co-authored-by: Tyler Gillson <tyler.gillson@gmail.com> 2 years ago			`_, err := os.Stat(filepath.Join(ml.ModelPath, s))`
Major API enhancements (#44) 2 years ago			`return err == nil`
			`}`

Return model list 2 years ago			`func (ml *ModelLoader) ListModels() ([]string, error) {`
feat: config files and SSE (#83) Signed-off-by: mudler <mudler@mocaccino.org> Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com> Co-authored-by: Tyler Gillson <tyler.gillson@gmail.com> 2 years ago			`files, err := ioutil.ReadDir(ml.ModelPath)`
Return model list 2 years ago			`if err != nil {`
			`return []string{}, err`
			`}`

			`models := []string{}`
			`for _, file := range files {`
Major API enhancements (#44) 2 years ago			`// Skip templates, YAML and .keep files`
			`if strings.HasSuffix(file.Name(), ".tmpl") \|\| strings.HasSuffix(file.Name(), ".keep") \|\| strings.HasSuffix(file.Name(), ".yaml") \|\| strings.HasSuffix(file.Name(), ".yml") {`
			`continue`
Return model list 2 years ago			`}`
Major API enhancements (#44) 2 years ago
			`models = append(models, file.Name())`
Return model list 2 years ago			`}`

			`return models, nil`
			`}`

Allow to template model prompts inputs 2 years ago			`func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string, error) {`
			`ml.mu.Lock()`
			`defer ml.mu.Unlock()`

			`m, ok := ml.promptsTemplates[modelName]`
			`if !ok {`
feat: config files and SSE (#83) Signed-off-by: mudler <mudler@mocaccino.org> Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com> Co-authored-by: Tyler Gillson <tyler.gillson@gmail.com> 2 years ago			`modelFile := filepath.Join(ml.ModelPath, modelName)`
			`if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {`
			`return "", err`
			`}`

			`t, exists := ml.promptsTemplates[modelName]`
			`if exists {`
			`m = t`
			`}`
			`}`
			`if m == nil {`
fix: missing returning error and free callback stream (#187) 2 years ago			`return "", fmt.Errorf("failed loading any template")`
Allow to template model prompts inputs 2 years ago			`}`

			`var buf bytes.Buffer`

			`if err := m.Execute(&buf, in); err != nil {`
			`return "", err`
			`}`
			`return buf.String(), nil`
			`}`

Major API enhancements (#44) 2 years ago			`func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {`
			`// Check if the template was already loaded`
			`if _, ok := ml.promptsTemplates[modelName]; ok {`
			`return nil`
			`}`
Enhancements (#34) Signed-off-by: mudler <mudler@c3os.io> 2 years ago
			`// Check if the model path exists`
feat: config files and SSE (#83) Signed-off-by: mudler <mudler@mocaccino.org> Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com> Co-authored-by: Tyler Gillson <tyler.gillson@gmail.com> 2 years ago			`// skip any error here - we run anyway if a template does not exist`
Major API enhancements (#44) 2 years ago			`modelTemplateFile := fmt.Sprintf("%s.tmpl", modelName)`

			`if !ml.ExistsInModelPath(modelTemplateFile) {`
Enhancements (#34) Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`return nil`
			`}`

feat: config files and SSE (#83) Signed-off-by: mudler <mudler@mocaccino.org> Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com> Co-authored-by: Tyler Gillson <tyler.gillson@gmail.com> 2 years ago			`dat, err := os.ReadFile(filepath.Join(ml.ModelPath, modelTemplateFile))`
Enhancements (#34) Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`if err != nil {`
			`return err`
			`}`

			`// Parse the template`
			`tmpl, err := template.New("prompt").Parse(string(dat))`
			`if err != nil {`
			`return err`
			`}`
			`ml.promptsTemplates[modelName] = tmpl`

			`return nil`
			`}`

Add support for stablelm (#48) Signed-off-by: mudler <mudler@mocaccino.org> 2 years ago			`func (ml ModelLoader) LoadStableLMModel(modelName string) (gpt2.StableLM, error) {`
			`ml.mu.Lock()`
			`defer ml.mu.Unlock()`

			`// Check if we already have a loaded model`
			`if !ml.ExistsInModelPath(modelName) {`
			`return nil, fmt.Errorf("model does not exist")`
			`}`

			`if m, ok := ml.gptstablelmmodels[modelName]; ok {`
			`log.Debug().Msgf("Model already loaded in memory: %s", modelName)`
			`return m, nil`
			`}`

			`// Load the model and keep it in memory for later use`
feat: config files and SSE (#83) Signed-off-by: mudler <mudler@mocaccino.org> Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com> Co-authored-by: Tyler Gillson <tyler.gillson@gmail.com> 2 years ago			`modelFile := filepath.Join(ml.ModelPath, modelName)`
Add support for stablelm (#48) Signed-off-by: mudler <mudler@mocaccino.org> 2 years ago			`log.Debug().Msgf("Loading model in memory from file: %s", modelFile)`

			`model, err := gpt2.NewStableLM(modelFile)`
			`if err != nil {`
			`return nil, err`
			`}`

			`// If there is a prompt template, load it`
			`if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {`
			`return nil, err`
			`}`

			`ml.gptstablelmmodels[modelName] = model`
			`return model, err`
			`}`

feat: add bert.cpp embeddings (#222) 2 years ago			`func (ml ModelLoader) LoadBERT(modelName string) (bert.Bert, error) {`
			`ml.mu.Lock()`
			`defer ml.mu.Unlock()`

			`// Check if we already have a loaded model`
			`if !ml.ExistsInModelPath(modelName) {`
			`return nil, fmt.Errorf("model does not exist")`
			`}`

			`if m, ok := ml.bert[modelName]; ok {`
			`log.Debug().Msgf("Model already loaded in memory: %s", modelName)`
			`return m, nil`
			`}`

			`// Load the model and keep it in memory for later use`
			`modelFile := filepath.Join(ml.ModelPath, modelName)`
			`log.Debug().Msgf("Loading model in memory from file: %s", modelFile)`

			`model, err := bert.New(modelFile)`
			`if err != nil {`
			`return nil, err`
			`}`

			`// If there is a prompt template, load it`
			`if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {`
			`return nil, err`
			`}`

			`ml.bert[modelName] = model`
			`return model, err`
			`}`

Add support for cerebras (#45) Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`func (ml ModelLoader) LoadGPT2Model(modelName string) (gpt2.GPT2, error) {`
			`ml.mu.Lock()`
			`defer ml.mu.Unlock()`

			`// Check if we already have a loaded model`
			`if !ml.ExistsInModelPath(modelName) {`
			`return nil, fmt.Errorf("model does not exist")`
			`}`

			`if m, ok := ml.gpt2models[modelName]; ok {`
			`log.Debug().Msgf("Model already loaded in memory: %s", modelName)`
			`return m, nil`
			`}`

			`// Load the model and keep it in memory for later use`
feat: config files and SSE (#83) Signed-off-by: mudler <mudler@mocaccino.org> Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com> Co-authored-by: Tyler Gillson <tyler.gillson@gmail.com> 2 years ago			`modelFile := filepath.Join(ml.ModelPath, modelName)`
Add support for cerebras (#45) Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`log.Debug().Msgf("Loading model in memory from file: %s", modelFile)`

			`model, err := gpt2.New(modelFile)`
			`if err != nil {`
			`return nil, err`
			`}`

			`// If there is a prompt template, load it`
			`if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {`
			`return nil, err`
			`}`

			`ml.gpt2models[modelName] = model`
			`return model, err`
			`}`

Bump llama.cpp, downgrade gpt4all-j (#149) 2 years ago			`func (ml ModelLoader) LoadGPTJModel(modelName string) (gptj.GPTJ, error) {`
Make it compatible with openAI api, support multiple models Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`ml.mu.Lock()`
			`defer ml.mu.Unlock()`

			`// Check if we already have a loaded model`
Major API enhancements (#44) 2 years ago			`if !ml.ExistsInModelPath(modelName) {`
			`return nil, fmt.Errorf("model does not exist")`
Make it compatible with openAI api, support multiple models Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`}`

Major API enhancements (#44) 2 years ago			`if m, ok := ml.gptmodels[modelName]; ok {`
			`log.Debug().Msgf("Model already loaded in memory: %s", modelName)`
			`return m, nil`
Make it compatible with openAI api, support multiple models Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`}`

			`// Load the model and keep it in memory for later use`
feat: config files and SSE (#83) Signed-off-by: mudler <mudler@mocaccino.org> Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com> Co-authored-by: Tyler Gillson <tyler.gillson@gmail.com> 2 years ago			`modelFile := filepath.Join(ml.ModelPath, modelName)`
Major API enhancements (#44) 2 years ago			`log.Debug().Msgf("Loading model in memory from file: %s", modelFile)`

Bump llama.cpp, downgrade gpt4all-j (#149) 2 years ago			`model, err := gptj.New(modelFile)`
Make it compatible with openAI api, support multiple models Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`if err != nil {`
			`return nil, err`
			`}`

Allow to template model prompts inputs 2 years ago			`// If there is a prompt template, load it`
Major API enhancements (#44) 2 years ago			`if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {`
Enhancements (#34) Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`return nil, err`
			`}`

Major API enhancements (#44) 2 years ago			`ml.gptmodels[modelName] = model`
Enhancements (#34) Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`return model, err`
			`}`

feat: add rwkv support (#158) Signed-off-by: mudler <mudler@mocaccino.org> 2 years ago			`func (ml ModelLoader) LoadRWKV(modelName, tokenFile string, threads uint32) (rwkv.RwkvState, error) {`
			`ml.mu.Lock()`
			`defer ml.mu.Unlock()`

			`log.Debug().Msgf("Loading model name: %s", modelName)`

			`// Check if we already have a loaded model`
			`if !ml.ExistsInModelPath(modelName) {`
			`return nil, fmt.Errorf("model does not exist")`
			`}`

			`if m, ok := ml.rwkv[modelName]; ok {`
			`log.Debug().Msgf("Model already loaded in memory: %s", modelName)`
			`return m, nil`
			`}`

			`// Load the model and keep it in memory for later use`
			`modelFile := filepath.Join(ml.ModelPath, modelName)`
			`tokenPath := filepath.Join(ml.ModelPath, tokenFile)`
			`log.Debug().Msgf("Loading model in memory from file: %s", modelFile)`

			`model := rwkv.LoadFiles(modelFile, tokenPath, threads)`
			`if model == nil {`
			`return nil, fmt.Errorf("could not load model")`
			`}`

			`ml.rwkv[modelName] = model`
			`return model, nil`
			`}`

Enhancements (#34) Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`func (ml ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOption) (llama.LLama, error) {`
			`ml.mu.Lock()`
			`defer ml.mu.Unlock()`

Major API enhancements (#44) 2 years ago			`log.Debug().Msgf("Loading model name: %s", modelName)`

Enhancements (#34) Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`// Check if we already have a loaded model`
Major API enhancements (#44) 2 years ago			`if !ml.ExistsInModelPath(modelName) {`
			`return nil, fmt.Errorf("model does not exist")`
			`}`

			`if m, ok := ml.models[modelName]; ok {`
			`log.Debug().Msgf("Model already loaded in memory: %s", modelName)`
Enhancements (#34) Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`return m, nil`
			`}`
Major API enhancements (#44) 2 years ago
Enhancements (#34) Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`// Load the model and keep it in memory for later use`
feat: config files and SSE (#83) Signed-off-by: mudler <mudler@mocaccino.org> Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com> Co-authored-by: Tyler Gillson <tyler.gillson@gmail.com> 2 years ago			`modelFile := filepath.Join(ml.ModelPath, modelName)`
Major API enhancements (#44) 2 years ago			`log.Debug().Msgf("Loading model in memory from file: %s", modelFile)`

Enhancements (#34) Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`model, err := llama.New(modelFile, opts...)`
			`if err != nil {`
			`return nil, err`
			`}`

			`// If there is a prompt template, load it`
Major API enhancements (#44) 2 years ago			`if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {`
Enhancements (#34) Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`return nil, err`
Allow to template model prompts inputs 2 years ago			`}`

Major API enhancements (#44) 2 years ago			`ml.models[modelName] = model`
Make it compatible with openAI api, support multiple models Signed-off-by: mudler <mudler@c3os.io> 2 years ago			`return model, err`
			`}`
feat: add embeddings for go-llama.cpp backend (#190) 2 years ago
			`const tokenizerSuffix = ".tokenizer.json"`

			`var loadedModels map[string]interface{} = map[string]interface{}{}`
			`var muModels sync.Mutex`

			`func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {`
			`switch strings.ToLower(backendString) {`
			`case "llama":`
			`return ml.LoadLLaMAModel(modelFile, llamaOpts...)`
			`case "stablelm":`
			`return ml.LoadStableLMModel(modelFile)`
			`case "gpt2":`
			`return ml.LoadGPT2Model(modelFile)`
			`case "gptj":`
			`return ml.LoadGPTJModel(modelFile)`
feat: add bert.cpp embeddings (#222) 2 years ago			`case "bert-embeddings":`
			`return ml.LoadBERT(modelFile)`
feat: add embeddings for go-llama.cpp backend (#190) 2 years ago			`case "rwkv":`
			`return ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)`
			`default:`
			`return nil, fmt.Errorf("backend unsupported: %s", backendString)`
			`}`
			`}`

			`func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {`
			`updateModels := func(model interface{}) {`
			`muModels.Lock()`
			`defer muModels.Unlock()`
			`loadedModels[modelFile] = model`
			`}`

			`muModels.Lock()`
			`m, exists := loadedModels[modelFile]`
			`if exists {`
			`muModels.Unlock()`
			`return m, nil`
			`}`
			`muModels.Unlock()`

			`model, modelerr := ml.LoadLLaMAModel(modelFile, llamaOpts...)`
			`if modelerr == nil {`
			`updateModels(model)`
			`return model, nil`
			`} else {`
			`err = multierror.Append(err, modelerr)`
			`}`

			`model, modelerr = ml.LoadGPTJModel(modelFile)`
			`if modelerr == nil {`
			`updateModels(model)`
			`return model, nil`
			`} else {`
			`err = multierror.Append(err, modelerr)`
			`}`

			`model, modelerr = ml.LoadGPT2Model(modelFile)`
			`if modelerr == nil {`
			`updateModels(model)`
			`return model, nil`
			`} else {`
			`err = multierror.Append(err, modelerr)`
			`}`

			`model, modelerr = ml.LoadStableLMModel(modelFile)`
			`if modelerr == nil {`
			`updateModels(model)`
			`return model, nil`
			`} else {`
			`err = multierror.Append(err, modelerr)`
			`}`

			`model, modelerr = ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)`
			`if modelerr == nil {`
			`updateModels(model)`
			`return model, nil`
			`} else {`
			`err = multierror.Append(err, modelerr)`
			`}`

feat: add bert.cpp embeddings (#222) 2 years ago			`model, modelerr = ml.LoadBERT(modelFile)`
			`if modelerr == nil {`
			`updateModels(model)`
			`return model, nil`
			`} else {`
			`err = multierror.Append(err, modelerr)`
			`}`

feat: add embeddings for go-llama.cpp backend (#190) 2 years ago			`return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())`
			`}`