You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
31 lines
1.1 KiB
31 lines
1.1 KiB
|
|
import os
|
|
from langchain.vectorstores import Chroma
|
|
from langchain.embeddings import OpenAIEmbeddings
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter,CharacterTextSplitter
|
|
from langchain.llms import OpenAI
|
|
from langchain.chains import VectorDBQA
|
|
from langchain.document_loaders import TextLoader
|
|
|
|
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
|
|
|
# Load and process the text
|
|
loader = TextLoader('state_of_the_union.txt')
|
|
documents = loader.load()
|
|
|
|
text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=70)
|
|
texts = text_splitter.split_documents(documents)
|
|
|
|
# Embed and store the texts
|
|
# Supplying a persist_directory will store the embeddings on disk
|
|
persist_directory = 'db'
|
|
|
|
embedding = OpenAIEmbeddings()
|
|
|
|
# Now we can load the persisted database from disk, and use it as normal.
|
|
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
|
|
qa = VectorDBQA.from_chain_type(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path), chain_type="stuff", vectorstore=vectordb)
|
|
|
|
query = "What the president said about taxes ?"
|
|
print(qa.run(query))
|
|
|
|
|