docs: fix langchain-chroma example (#298)

Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com>
swagger2
Tyler Gillson 2 years ago committed by GitHub
parent 5a6d9d4e5b
commit 549a01b62e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 13
      .vscode/launch.json
  2. 5
      examples/langchain-chroma/.env.example
  3. 4
      examples/langchain-chroma/.gitignore
  4. 11
      examples/langchain-chroma/README.md
  5. 15
      examples/langchain-chroma/docker-compose.yml
  6. 1
      examples/langchain-chroma/models/embeddings.yaml
  7. 9
      examples/langchain-chroma/query.py
  8. 5
      examples/langchain-chroma/store.py
  9. 2
      examples/query_data/docker-compose.yml

@ -1,6 +1,19 @@
{ {
"version": "0.2.0", "version": "0.2.0",
"configurations": [ "configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": false,
"cwd": "${workspaceFolder}/examples/langchain-chroma",
"env": {
"OPENAI_API_BASE": "http://localhost:8080/v1",
"OPENAI_API_KEY": "abc"
}
},
{ {
"name": "Launch Go", "name": "Launch Go",
"type": "go", "type": "go",

@ -0,0 +1,5 @@
THREADS=4
CONTEXT_SIZE=512
MODELS_PATH=/models
DEBUG=true
# BUILD_TYPE=generic

@ -0,0 +1,4 @@
db/
state_of_the_union.txt
models/bert
models/ggml-gpt4all-j

@ -10,13 +10,20 @@ Download the models and start the API:
# Clone LocalAI # Clone LocalAI
git clone https://github.com/go-skynet/LocalAI git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/query_data cd LocalAI/examples/langchain-chroma
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# configure your .env
# NOTE: ensure that THREADS does not exceed your machine's CPU cores
mv .env.example .env
# start with docker-compose # start with docker-compose
docker-compose up -d --build docker-compose up -d --build
# tail the logs & wait until the build completes
docker logs -f langchain-chroma-api-1
``` ```
### Python requirements ### Python requirements
@ -37,7 +44,7 @@ wget https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_
python store.py python store.py
``` ```
After it finishes, a directory "storage" will be created with the vector index database. After it finishes, a directory "db" will be created with the vector index database.
## Query ## Query

@ -0,0 +1,15 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile
ports:
- 8080:8080
env_file:
- ../../.env
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai"]

@ -1,5 +1,6 @@
name: text-embedding-ada-002 name: text-embedding-ada-002
parameters: parameters:
model: bert model: bert
threads: 4
backend: bert-embeddings backend: bert-embeddings
embeddings: true embeddings: true

@ -2,8 +2,9 @@
import os import os
from langchain.vectorstores import Chroma from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI from langchain.chat_models import ChatOpenAI
from langchain.chains import VectorDBQA from langchain.chains import RetrievalQA
from langchain.vectorstores.base import VectorStoreRetriever
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1') base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
@ -12,8 +13,10 @@ embedding = OpenAIEmbeddings()
persist_directory = 'db' persist_directory = 'db'
# Now we can load the persisted database from disk, and use it as normal. # Now we can load the persisted database from disk, and use it as normal.
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path)
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding) vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
qa = VectorDBQA.from_chain_type(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path), chain_type="stuff", vectorstore=vectordb) retriever = VectorStoreRetriever(vectorstore=vectordb)
qa = RetrievalQA.from_llm(llm=llm, retriever=retriever)
query = "What the president said about taxes ?" query = "What the president said about taxes ?"
print(qa.run(query)) print(qa.run(query))

@ -2,9 +2,7 @@
import os import os
from langchain.vectorstores import Chroma from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter,TokenTextSplitter,CharacterTextSplitter from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import VectorDBQA
from langchain.document_loaders import TextLoader from langchain.document_loaders import TextLoader
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1') base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
@ -14,7 +12,6 @@ loader = TextLoader('state_of_the_union.txt')
documents = loader.load() documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=70) text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=70)
#text_splitter = TokenTextSplitter()
texts = text_splitter.split_documents(documents) texts = text_splitter.split_documents(documents)
# Embed and store the texts # Embed and store the texts

@ -4,7 +4,7 @@ services:
api: api:
image: quay.io/go-skynet/local-ai:latest image: quay.io/go-skynet/local-ai:latest
build: build:
context: . context: ../../
dockerfile: Dockerfile dockerfile: Dockerfile
ports: ports:
- 8080:8080 - 8080:8080

Loading…
Cancel
Save