From ad301e6ed724123fb5c4b1c86d9499f66f5a690f Mon Sep 17 00:00:00 2001
From: mudler <mudler@mocaccino.org>
Date: Fri, 5 May 2023 21:56:31 +0200
Subject: [PATCH] example(add): document query example

---
 examples/query_data/.gitignore                |  1 +
 examples/query_data/README.md                 | 49 +++++++++++++++++++
 examples/query_data/data/.keep                |  0
 examples/query_data/docker-compose.yml        | 15 ++++++
 examples/query_data/models/completion.tmpl    |  1 +
 examples/query_data/models/embeddings.yaml    | 18 +++++++
 examples/query_data/models/gpt-3.5-turbo.yaml | 18 +++++++
 examples/query_data/models/wizardlm.tmpl      |  3 ++
 examples/query_data/query.py                  | 32 ++++++++++++
 examples/query_data/store.py                  | 25 ++++++++++
 10 files changed, 162 insertions(+)
 create mode 100644 examples/query_data/.gitignore
 create mode 100644 examples/query_data/README.md
 create mode 100644 examples/query_data/data/.keep
 create mode 100644 examples/query_data/docker-compose.yml
 create mode 100644 examples/query_data/models/completion.tmpl
 create mode 100644 examples/query_data/models/embeddings.yaml
 create mode 100644 examples/query_data/models/gpt-3.5-turbo.yaml
 create mode 100644 examples/query_data/models/wizardlm.tmpl
 create mode 100644 examples/query_data/query.py
 create mode 100644 examples/query_data/store.py

diff --git a/examples/query_data/.gitignore b/examples/query_data/.gitignore
new file mode 100644
index 0000000..29ea9d5
--- /dev/null
+++ b/examples/query_data/.gitignore
@@ -0,0 +1 @@
+storage/
\ No newline at end of file
diff --git a/examples/query_data/README.md b/examples/query_data/README.md
new file mode 100644
index 0000000..fb32442
--- /dev/null
+++ b/examples/query_data/README.md
@@ -0,0 +1,49 @@
+# Data query example
+
+This example makes use of [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
+
+It loosely follows [the quickstart](https://gpt-index.readthedocs.io/en/stable/guides/primer/usage_pattern.html).
+
+## Requirements
+
+For this in order to work, you will need a model compatible with the `llama.cpp` backend. This is will not work with gpt4all.
+
+The example uses `WizardLM`. Edit the config files in `models/` accordingly to specify the model you use (change `HERE`).
+
+You will also need a training data set. Copy that over `data`.
+
+## Setup
+
+Start the API:
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/query_data
+
+# Copy your models, edit config files accordingly
+
+# start with docker-compose
+docker-compose up -d --build
+```
+
+### Create a storage:
+
+```bash
+export OPENAI_API_BASE=http://localhost:8080/v1
+export OPENAI_API_KEY=sk-
+
+python store.py
+```
+
+After it finishes, a directory "storage" will be created with the vector index database.
+
+## Query
+
+```bash
+export OPENAI_API_BASE=http://localhost:8080/v1
+export OPENAI_API_KEY=sk-
+
+python query.py
+```
\ No newline at end of file
diff --git a/examples/query_data/data/.keep b/examples/query_data/data/.keep
new file mode 100644
index 0000000..e69de29
diff --git a/examples/query_data/docker-compose.yml b/examples/query_data/docker-compose.yml
new file mode 100644
index 0000000..a59edfc
--- /dev/null
+++ b/examples/query_data/docker-compose.yml
@@ -0,0 +1,15 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - 8080:8080
+    env_file:
+      - .env
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai"]
diff --git a/examples/query_data/models/completion.tmpl b/examples/query_data/models/completion.tmpl
new file mode 100644
index 0000000..9867cfc
--- /dev/null
+++ b/examples/query_data/models/completion.tmpl
@@ -0,0 +1 @@
+{{.Input}}
\ No newline at end of file
diff --git a/examples/query_data/models/embeddings.yaml b/examples/query_data/models/embeddings.yaml
new file mode 100644
index 0000000..2173975
--- /dev/null
+++ b/examples/query_data/models/embeddings.yaml
@@ -0,0 +1,18 @@
+name: text-embedding-ada-002
+parameters:
+  model: HERE
+  top_k: 80
+  temperature: 0.2
+  top_p: 0.7
+context_size: 1024
+threads: 14
+stopwords:
+- "HUMAN:"
+- "GPT:"
+roles:
+  user: " "
+  system: " "
+embeddings: true
+template:
+  completion: completion
+  chat: gpt4all
diff --git a/examples/query_data/models/gpt-3.5-turbo.yaml b/examples/query_data/models/gpt-3.5-turbo.yaml
new file mode 100644
index 0000000..9cdb4a2
--- /dev/null
+++ b/examples/query_data/models/gpt-3.5-turbo.yaml
@@ -0,0 +1,18 @@
+name: gpt-3.5-turbo
+parameters:
+  model: HERE
+  top_k: 80
+  temperature: 0.2
+  top_p: 0.7
+context_size: 1024
+threads: 14
+embeddings: true
+stopwords:
+- "HUMAN:"
+- "GPT:"
+roles:
+  user: " "
+  system: " "
+template:
+  completion: completion
+  chat: wizardlm
diff --git a/examples/query_data/models/wizardlm.tmpl b/examples/query_data/models/wizardlm.tmpl
new file mode 100644
index 0000000..e7b1985
--- /dev/null
+++ b/examples/query_data/models/wizardlm.tmpl
@@ -0,0 +1,3 @@
+{{.Input}}
+
+### Response:
\ No newline at end of file
diff --git a/examples/query_data/query.py b/examples/query_data/query.py
new file mode 100644
index 0000000..05a288f
--- /dev/null
+++ b/examples/query_data/query.py
@@ -0,0 +1,32 @@
+import os
+
+# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
+# os.environ['OPENAI_API_KEY']= ""
+
+from llama_index import   LLMPredictor, PromptHelper, ServiceContext
+from langchain.llms.openai import OpenAI
+from llama_index import StorageContext, load_index_from_storage
+
+
+# This example uses text-davinci-003 by default; feel free to change if desired
+llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo",openai_api_base="http://localhost:8080/v1"))
+
+# Configure prompt parameters and initialise helper
+max_input_size = 1024
+num_output = 256
+max_chunk_overlap = 20
+
+prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
+
+# Load documents from the 'data' directory
+service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
+
+# rebuild storage context
+storage_context = StorageContext.from_defaults(persist_dir='./storage')
+
+# load index
+index = load_index_from_storage(storage_context,     service_context=service_context,    )
+
+query_engine = index.as_query_engine()
+response = query_engine.query("XXXXXX your question here XXXXX")
+print(response)
\ No newline at end of file
diff --git a/examples/query_data/store.py b/examples/query_data/store.py
new file mode 100644
index 0000000..56d83bb
--- /dev/null
+++ b/examples/query_data/store.py
@@ -0,0 +1,25 @@
+import os
+
+# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
+# os.environ['OPENAI_API_KEY']= ""
+
+from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, LLMPredictor, PromptHelper, ServiceContext
+from langchain.llms.openai import OpenAI
+from llama_index import StorageContext, load_index_from_storage
+
+# This example uses text-davinci-003 by default; feel free to change if desired
+llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo",openai_api_base="http://localhost:8080/v1"))
+
+# Configure prompt parameters and initialise helper
+max_input_size = 256
+num_output = 256
+max_chunk_overlap = 10
+
+prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
+
+# Load documents from the 'data' directory
+documents = SimpleDirectoryReader('data').load_data()
+service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 257)
+index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
+index.storage_context.persist(persist_dir="./storage")
+