From fdf75c6d0e33bc7cf1a377e52e25b09be34509d1 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 4 May 2023 17:32:23 +0200
Subject: [PATCH] rwkv fixes and examples (#185)

---
 api/openai.go                             |  2 +
 api/prediction.go                         |  5 +-
 examples/README.md                        |  1 +
 examples/rwkv/Dockerfile.build            | 10 ++++
 examples/rwkv/README.md                   | 59 +++++++++++++++++++++++
 examples/rwkv/docker-compose.yaml         | 16 ++++++
 examples/rwkv/models/gpt-3.5-turbo.yaml   | 19 ++++++++
 examples/rwkv/models/rwkv_chat.tmpl       | 13 +++++
 examples/rwkv/models/rwkv_completion.tmpl |  1 +
 9 files changed, 125 insertions(+), 1 deletion(-)
 create mode 100644 examples/rwkv/Dockerfile.build
 create mode 100644 examples/rwkv/README.md
 create mode 100644 examples/rwkv/docker-compose.yaml
 create mode 100644 examples/rwkv/models/gpt-3.5-turbo.yaml
 create mode 100644 examples/rwkv/models/rwkv_chat.tmpl
 create mode 100644 examples/rwkv/models/rwkv_completion.tmpl
diff --git a/api/openai.go b/api/openai.go
index 08e6373..1afbb06 100644
--- a/api/openai.go
+++ b/api/openai.go
@@ -402,6 +402,8 @@ func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread
 			Choices: result,
 			Object:  "chat.completion",
 		}
+		respData, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", respData)
 
 		// Return the prediction in the response body
 		return c.JSON(resp)
diff --git a/api/prediction.go b/api/prediction.go
index 127a957..1fbb57b 100644
--- a/api/prediction.go
+++ b/api/prediction.go
@@ -129,12 +129,15 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
 		supportStreams = true
 
 		fn = func() (string, error) {
-			//model.ProcessInput("You are a chatbot that is very good at chatting.  blah blah blah")
 			stopWord := "\n"
 			if len(c.StopWords) > 0 {
 				stopWord = c.StopWords[0]
 			}
 
+			if err := model.ProcessInput(s); err != nil {
+				return "", err
+			}
+
 			response := model.GenerateResponse(c.Maxtokens, stopWord, float32(c.Temperature), float32(c.TopP), tokenCallback)
 
 			return response, nil
diff --git a/examples/README.md b/examples/README.md
index d7fc24f..7c93955 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -8,6 +8,7 @@ Here is a list of projects that can easily be integrated with the LocalAI backen
 - [discord-bot](https://github.com/go-skynet/LocalAI/tree/master/examples/discord-bot/) (by [@mudler](https://github.com/mudler))
 - [langchain](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain/) (by [@dave-gray101](https://github.com/dave-gray101))
 - [langchain-python](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-python/) (by [@mudler](https://github.com/mudler))
+- [rwkv](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv/) (by [@mudler](https://github.com/mudler))
 - [slack-bot](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/) (by [@mudler](https://github.com/mudler))
 
 ## Want to contribute?
diff --git a/examples/rwkv/Dockerfile.build b/examples/rwkv/Dockerfile.build
new file mode 100644
index 0000000..c62024d
--- /dev/null
+++ b/examples/rwkv/Dockerfile.build
@@ -0,0 +1,10 @@
+FROM python
+
+# convert the model (one-off)
+RUN pip3 install torch numpy
+
+WORKDIR /build
+COPY ./scripts/ .
+
+RUN git clone --recurse-submodules https://github.com/saharNooby/rwkv.cpp && cd rwkv.cpp && cmake . && cmake --build . --config Release
+ENTRYPOINT [ "/build/build.sh" ]
\ No newline at end of file
diff --git a/examples/rwkv/README.md b/examples/rwkv/README.md
new file mode 100644
index 0000000..00ca570
--- /dev/null
+++ b/examples/rwkv/README.md
@@ -0,0 +1,59 @@
+# rwkv
+
+Example of how to run rwkv models.
+
+## Run models
+
+Setup:
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/rwkv
+
+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
+# build the tooling image to convert an rwkv model locally:
+docker build -t rwkv-converter -f Dockerfile.build .
+
+# download and convert a model (one-off) - it's going to be fast on CPU too!
+docker run -ti --name converter -v $PWD:/data rwkv-converter https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%25-Other1%25-20230425-ctx4096.pth /data/models/rwkv
+
+# Get the tokenizer
+wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O models/rwkv.tokenizer.json
+
+# start with docker-compose
+docker-compose up -d --build
+```
+
+Test it out:
+
+```bash
+curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
+    "model": "gpt-3.5-turbo",
+    "prompt": "A long time ago, in a galaxy far away",
+    "max_tokens": 100,
+    "temperature": 0.9, "top_p": 0.8, "top_k": 80
+  }'
+
+# {"object":"text_completion","model":"gpt-3.5-turbo","choices":[{"text":", there was a small group of five friends: Annie, Bryan, Charlie, Emily, and Jesse."}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
+
+curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+     "model": "gpt-3.5-turbo",            
+     "messages": [{"role": "user", "content": "How are you?"}],
+     "temperature": 0.9, "top_p": 0.8, "top_k": 80
+   }'
+
+# {"object":"chat.completion","model":"gpt-3.5-turbo","choices":[{"message":{"role":"assistant","content":" Good, thanks. I am about to go to bed. I' ll talk to you later.Bye."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
+```
+
+### Fine tuning
+
+See [RWKV-LM](https://github.com/BlinkDL/RWKV-LM#training--fine-tuning). There is also a Google [colab](https://colab.research.google.com/github/resloved/RWKV-notebooks/blob/master/RWKV_v4_RNN_Pile_Fine_Tuning.ipynb).
+
+## See also
+
+- [RWKV-LM](https://github.com/BlinkDL/RWKV-LM)
+- [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)
\ No newline at end of file
diff --git a/examples/rwkv/docker-compose.yaml b/examples/rwkv/docker-compose.yaml
new file mode 100644
index 0000000..ed3eaec
--- /dev/null
+++ b/examples/rwkv/docker-compose.yaml
@@ -0,0 +1,16 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: ../../
+      dockerfile: Dockerfile.dev
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=true
+      - MODELS_PATH=/models
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
diff --git a/examples/rwkv/models/gpt-3.5-turbo.yaml b/examples/rwkv/models/gpt-3.5-turbo.yaml
new file mode 100644
index 0000000..0193b72
--- /dev/null
+++ b/examples/rwkv/models/gpt-3.5-turbo.yaml
@@ -0,0 +1,19 @@
+name: gpt-3.5-turbo
+parameters:
+  model: rwkv
+  top_k: 80
+  temperature: 0.9
+  max_tokens: 100
+  top_p: 0.8
+context_size: 1024
+threads: 14
+backend: "rwkv"
+cutwords:
+- "Bob:.*"
+roles:
+  user: "Bob:"
+  system: "Alice:"
+  assistant: "Alice:"
+template:
+  completion: rwkv_completion
+  chat: rwkv_chat
\ No newline at end of file
diff --git a/examples/rwkv/models/rwkv_chat.tmpl b/examples/rwkv/models/rwkv_chat.tmpl
new file mode 100644
index 0000000..d2c0511
--- /dev/null
+++ b/examples/rwkv/models/rwkv_chat.tmpl
@@ -0,0 +1,13 @@
+The following is a verbose detailed conversation between Bob and a woman, Alice. Alice is intelligent, friendly and likeable. Alice is likely to agree with Bob.
+
+Bob: Hello Alice, how are you doing?
+
+Alice: Hi Bob! Thanks, I'm fine. What about you?
+
+Bob: I am very good! It's nice to see you. Would you mind me chatting with you for a while?
+
+Alice: Not at all! I'm listening.
+
+{{.Input}}
+
+Alice: 
\ No newline at end of file
diff --git a/examples/rwkv/models/rwkv_completion.tmpl b/examples/rwkv/models/rwkv_completion.tmpl
new file mode 100644
index 0000000..8450377
--- /dev/null
+++ b/examples/rwkv/models/rwkv_completion.tmpl
@@ -0,0 +1 @@
+Complete the following sentence: {{.Input}} 
\ No newline at end of file