diff --git a/README.md b/README.md
index b178a2f..df532d5 100644
--- a/README.md
+++ b/README.md
@@ -70,6 +70,42 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
}'
```
+### Example: Use GPT4ALL-J model
+
+
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI
+
+# Download gpt4all-j to models/
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# Use a template from the examples
+cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
+
+# (optional) Edit the .env file to set things like context size and threads
+# vim .env
+
+# start with docker-compose
+docker-compose up -d --build
+
+# Now API is accessible at localhost:8080
+curl http://localhost:8080/v1/models
+# {"object":"list","data":[{"id":"ggml-gpt4all-j","object":"model"}]}
+
+curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "ggml-gpt4all-j",
+ "messages": [{"role": "user", "content": "How are you?"}],
+ "temperature": 0.9
+ }'
+
+# {"model":"ggml-gpt4all-j","choices":[{"message":{"role":"assistant","content":"I'm doing well, thanks. How about you?"}}]}
+```
+
+
## Prompt templates
The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
@@ -127,6 +163,7 @@ The API takes takes the following parameters:
| threads | THREADS | Number of Physical cores | The number of threads to use for text generation. |
| address | ADDRESS | :8080 | The address and port to listen on. |
| context-size | CONTEXT_SIZE | 512 | Default token context size. |
+| debug | DEBUG | false | Enable debug mode. |
Once the server is running, you can start making requests to it using HTTP, using the OpenAI API.
@@ -136,10 +173,16 @@ Once the server is running, you can start making requests to it using HTTP, usin
You can check out the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create).
-Following the list of endpoints/parameters supported.
+Following the list of endpoints/parameters supported.
+
+Note:
+
+- You can also specify the model a part of the OpenAI token.
+- If only one model is available, the API will use it for all the requests.
#### Chat completions
+
For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:
```
@@ -151,10 +194,12 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
```
Available additional parameters: `top_p`, `top_k`, `max_tokens`
+
#### Completions
-For example, to generate a comletion, you can send a POST request to the `/v1/completions` endpoint with the instruction as the request body:
+
+For example, to generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as the request body:
```
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
@@ -165,14 +210,19 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
Available additional parameters: `top_p`, `top_k`, `max_tokens`
+
+
#### List models
+
You can list all the models available with:
```
curl http://localhost:8080/v1/models
```
+
+
## Using other models
gpt4all (https://github.com/nomic-ai/gpt4all) works as well, however the original model needs to be converted (same applies for old alpaca models, too):
diff --git a/api/api.go b/api/api.go
index 05b0af2..5c401ad 100644
--- a/api/api.go
+++ b/api/api.go
@@ -18,6 +18,18 @@ import (
"github.com/rs/zerolog/log"
)
+// APIError provides error information returned by the OpenAI API.
+type APIError struct {
+ Code any `json:"code,omitempty"`
+ Message string `json:"message"`
+ Param *string `json:"param,omitempty"`
+ Type string `json:"type"`
+}
+
+type ErrorResponse struct {
+ Error *APIError `json:"error,omitempty"`
+}
+
type OpenAIResponse struct {
Created int `json:"created,omitempty"`
Object string `json:"chat.completion,omitempty"`
@@ -395,9 +407,11 @@ func App(loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disab
}
// Send custom error page
- return ctx.Status(code).JSON(struct {
- Error string `json:"error"`
- }{Error: err.Error()})
+ return ctx.Status(code).JSON(
+ ErrorResponse{
+ Error: &APIError{Message: err.Error(), Code: code},
+ },
+ )
},
})
diff --git a/api/api_test.go b/api/api_test.go
index 7b5baab..53d1516 100644
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -49,5 +49,10 @@ var _ = Describe("API test", func() {
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
})
+ It("returns errors", func() {
+ _, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
+ Expect(err).To(HaveOccurred())
+ Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: llama: model does not exist"))
+ })
})
})