diff --git a/api/openai.go b/api/openai.go index 08e6373..1afbb06 100644 --- a/api/openai.go +++ b/api/openai.go @@ -402,6 +402,8 @@ func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread Choices: result, Object: "chat.completion", } + respData, _ := json.Marshal(resp) + log.Debug().Msgf("Response: %s", respData) // Return the prediction in the response body return c.JSON(resp) diff --git a/api/prediction.go b/api/prediction.go index 127a957..1fbb57b 100644 --- a/api/prediction.go +++ b/api/prediction.go @@ -129,12 +129,15 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback supportStreams = true fn = func() (string, error) { - //model.ProcessInput("You are a chatbot that is very good at chatting. blah blah blah") stopWord := "\n" if len(c.StopWords) > 0 { stopWord = c.StopWords[0] } + if err := model.ProcessInput(s); err != nil { + return "", err + } + response := model.GenerateResponse(c.Maxtokens, stopWord, float32(c.Temperature), float32(c.TopP), tokenCallback) return response, nil diff --git a/examples/README.md b/examples/README.md index d7fc24f..7c93955 100644 --- a/examples/README.md +++ b/examples/README.md @@ -8,6 +8,7 @@ Here is a list of projects that can easily be integrated with the LocalAI backen - [discord-bot](https://github.com/go-skynet/LocalAI/tree/master/examples/discord-bot/) (by [@mudler](https://github.com/mudler)) - [langchain](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain/) (by [@dave-gray101](https://github.com/dave-gray101)) - [langchain-python](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-python/) (by [@mudler](https://github.com/mudler)) +- [rwkv](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv/) (by [@mudler](https://github.com/mudler)) - [slack-bot](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/) (by [@mudler](https://github.com/mudler)) ## Want to contribute? diff --git a/examples/rwkv/Dockerfile.build b/examples/rwkv/Dockerfile.build new file mode 100644 index 0000000..c62024d --- /dev/null +++ b/examples/rwkv/Dockerfile.build @@ -0,0 +1,10 @@ +FROM python + +# convert the model (one-off) +RUN pip3 install torch numpy + +WORKDIR /build +COPY ./scripts/ . + +RUN git clone --recurse-submodules https://github.com/saharNooby/rwkv.cpp && cd rwkv.cpp && cmake . && cmake --build . --config Release +ENTRYPOINT [ "/build/build.sh" ] \ No newline at end of file diff --git a/examples/rwkv/README.md b/examples/rwkv/README.md new file mode 100644 index 0000000..00ca570 --- /dev/null +++ b/examples/rwkv/README.md @@ -0,0 +1,59 @@ +# rwkv + +Example of how to run rwkv models. + +## Run models + +Setup: + +```bash +# Clone LocalAI +git clone https://github.com/go-skynet/LocalAI + +cd LocalAI/examples/rwkv + +# (optional) Checkout a specific LocalAI tag +# git checkout -b build + +# build the tooling image to convert an rwkv model locally: +docker build -t rwkv-converter -f Dockerfile.build . + +# download and convert a model (one-off) - it's going to be fast on CPU too! +docker run -ti --name converter -v $PWD:/data rwkv-converter https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%25-Other1%25-20230425-ctx4096.pth /data/models/rwkv + +# Get the tokenizer +wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O models/rwkv.tokenizer.json + +# start with docker-compose +docker-compose up -d --build +``` + +Test it out: + +```bash +curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{ + "model": "gpt-3.5-turbo", + "prompt": "A long time ago, in a galaxy far away", + "max_tokens": 100, + "temperature": 0.9, "top_p": 0.8, "top_k": 80 + }' + +# {"object":"text_completion","model":"gpt-3.5-turbo","choices":[{"text":", there was a small group of five friends: Annie, Bryan, Charlie, Emily, and Jesse."}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} + +curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "gpt-3.5-turbo", + "messages": [{"role": "user", "content": "How are you?"}], + "temperature": 0.9, "top_p": 0.8, "top_k": 80 + }' + +# {"object":"chat.completion","model":"gpt-3.5-turbo","choices":[{"message":{"role":"assistant","content":" Good, thanks. I am about to go to bed. I' ll talk to you later.Bye."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} +``` + +### Fine tuning + +See [RWKV-LM](https://github.com/BlinkDL/RWKV-LM#training--fine-tuning). There is also a Google [colab](https://colab.research.google.com/github/resloved/RWKV-notebooks/blob/master/RWKV_v4_RNN_Pile_Fine_Tuning.ipynb). + +## See also + +- [RWKV-LM](https://github.com/BlinkDL/RWKV-LM) +- [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp) \ No newline at end of file diff --git a/examples/rwkv/docker-compose.yaml b/examples/rwkv/docker-compose.yaml new file mode 100644 index 0000000..ed3eaec --- /dev/null +++ b/examples/rwkv/docker-compose.yaml @@ -0,0 +1,16 @@ +version: '3.6' + +services: + api: + image: quay.io/go-skynet/local-ai:latest + build: + context: ../../ + dockerfile: Dockerfile.dev + ports: + - 8080:8080 + environment: + - DEBUG=true + - MODELS_PATH=/models + volumes: + - ./models:/models:cached + command: ["/usr/bin/local-ai" ] diff --git a/examples/rwkv/models/gpt-3.5-turbo.yaml b/examples/rwkv/models/gpt-3.5-turbo.yaml new file mode 100644 index 0000000..0193b72 --- /dev/null +++ b/examples/rwkv/models/gpt-3.5-turbo.yaml @@ -0,0 +1,19 @@ +name: gpt-3.5-turbo +parameters: + model: rwkv + top_k: 80 + temperature: 0.9 + max_tokens: 100 + top_p: 0.8 +context_size: 1024 +threads: 14 +backend: "rwkv" +cutwords: +- "Bob:.*" +roles: + user: "Bob:" + system: "Alice:" + assistant: "Alice:" +template: + completion: rwkv_completion + chat: rwkv_chat \ No newline at end of file diff --git a/examples/rwkv/models/rwkv_chat.tmpl b/examples/rwkv/models/rwkv_chat.tmpl new file mode 100644 index 0000000..d2c0511 --- /dev/null +++ b/examples/rwkv/models/rwkv_chat.tmpl @@ -0,0 +1,13 @@ +The following is a verbose detailed conversation between Bob and a woman, Alice. Alice is intelligent, friendly and likeable. Alice is likely to agree with Bob. + +Bob: Hello Alice, how are you doing? + +Alice: Hi Bob! Thanks, I'm fine. What about you? + +Bob: I am very good! It's nice to see you. Would you mind me chatting with you for a while? + +Alice: Not at all! I'm listening. + +{{.Input}} + +Alice: \ No newline at end of file diff --git a/examples/rwkv/models/rwkv_completion.tmpl b/examples/rwkv/models/rwkv_completion.tmpl new file mode 100644 index 0000000..8450377 --- /dev/null +++ b/examples/rwkv/models/rwkv_completion.tmpl @@ -0,0 +1 @@ +Complete the following sentence: {{.Input}} \ No newline at end of file