chore: linting
Browse files- app/app.py +18 -28
app/app.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
# Chroma compatibility issue resolution
|
2 |
# https://docs.trychroma.com/troubleshooting#sqlite
|
3 |
-
__import__(
|
4 |
import sys
|
5 |
-
|
|
|
6 |
|
7 |
from tempfile import NamedTemporaryFile
|
8 |
from typing import List
|
@@ -11,7 +12,7 @@ import chainlit as cl
|
|
11 |
from chainlit.types import AskFileResponse
|
12 |
import chromadb
|
13 |
from chromadb.config import Settings
|
14 |
-
from langchain.chains import
|
15 |
from langchain.chat_models import ChatOpenAI
|
16 |
from langchain.document_loaders import PDFPlumberLoader
|
17 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
@@ -31,7 +32,7 @@ def process_file(*, file: AskFileResponse) -> List[Document]:
|
|
31 |
|
32 |
Args:
|
33 |
file (AskFileResponse): input file to be processed
|
34 |
-
|
35 |
Raises:
|
36 |
ValueError: when we fail to process PDF files. We consider PDF file
|
37 |
processing failure when there's no text returned. For example, PDFs
|
@@ -51,8 +52,7 @@ def process_file(*, file: AskFileResponse) -> List[Document]:
|
|
51 |
documents = loader.load()
|
52 |
|
53 |
text_splitter = RecursiveCharacterTextSplitter(
|
54 |
-
chunk_size=3000,
|
55 |
-
chunk_overlap=100
|
56 |
)
|
57 |
docs = text_splitter.split_documents(documents)
|
58 |
|
@@ -66,7 +66,9 @@ def process_file(*, file: AskFileResponse) -> List[Document]:
|
|
66 |
return docs
|
67 |
|
68 |
|
69 |
-
def create_search_engine(
|
|
|
|
|
70 |
"""Takes a list of Langchain Documents and an embedding model API wrapper
|
71 |
and build a search index using a VectorStore.
|
72 |
|
@@ -80,27 +82,21 @@ def create_search_engine(*, docs: List[Document], embeddings: Embeddings) -> Vec
|
|
80 |
"""
|
81 |
# Initialize Chromadb client to enable resetting and disable telemtry
|
82 |
client = chromadb.EphemeralClient()
|
83 |
-
client_settings=Settings(
|
84 |
-
allow_reset=True,
|
85 |
-
anonymized_telemetry=False
|
86 |
-
)
|
87 |
|
88 |
# Reset the search engine to ensure we don't use old copies.
|
89 |
# NOTE: we do not need this for production
|
90 |
-
search_engine = Chroma(
|
91 |
-
client=client,
|
92 |
-
client_settings=client_settings
|
93 |
-
)
|
94 |
search_engine._client.reset()
|
95 |
search_engine = Chroma.from_documents(
|
96 |
client=client,
|
97 |
documents=docs,
|
98 |
embedding=embeddings,
|
99 |
-
client_settings=client_settings
|
100 |
)
|
101 |
|
102 |
return search_engine
|
103 |
-
|
104 |
|
105 |
@cl.on_chat_start
|
106 |
async def on_chat_start():
|
@@ -123,20 +119,17 @@ async def on_chat_start():
|
|
123 |
# Process and save data in the user session
|
124 |
msg = cl.Message(content=f"Processing `{file.name}`...")
|
125 |
await msg.send()
|
126 |
-
|
127 |
docs = process_file(file=file)
|
128 |
cl.user_session.set("docs", docs)
|
129 |
msg.content = f"`{file.name}` processed. Loading ..."
|
130 |
await msg.update()
|
131 |
|
132 |
# Indexing documents into our search engine
|
133 |
-
embeddings = OpenAIEmbeddings(
|
134 |
-
model="text-embedding-ada-002"
|
135 |
-
)
|
136 |
try:
|
137 |
search_engine = await cl.make_async(create_search_engine)(
|
138 |
-
docs=docs,
|
139 |
-
embeddings=embeddings
|
140 |
)
|
141 |
except Exception as e:
|
142 |
await cl.Message(content=f"Error: {e}").send()
|
@@ -145,9 +138,7 @@ async def on_chat_start():
|
|
145 |
await msg.update()
|
146 |
|
147 |
model = ChatOpenAI(
|
148 |
-
model="gpt-3.5-turbo-16k-0613",
|
149 |
-
temperature=0,
|
150 |
-
streaming=True
|
151 |
)
|
152 |
|
153 |
chain = RetrievalQAWithSourcesChain.from_chain_type(
|
@@ -164,13 +155,12 @@ async def on_chat_start():
|
|
164 |
|
165 |
@cl.on_message
|
166 |
async def main(message: cl.Message):
|
167 |
-
|
168 |
# Let's load the chain from user_session
|
169 |
chain = cl.user_session.get("chain") # type: RetrievalQAWithSourcesChain
|
170 |
|
171 |
response = await chain.acall(
|
172 |
message.content,
|
173 |
-
callbacks=[cl.AsyncLangchainCallbackHandler(stream_final_answer=True)]
|
174 |
)
|
175 |
answer = response["answer"]
|
176 |
sources = response["sources"].strip()
|
|
|
1 |
# Chroma compatibility issue resolution
|
2 |
# https://docs.trychroma.com/troubleshooting#sqlite
|
3 |
+
__import__("pysqlite3")
|
4 |
import sys
|
5 |
+
|
6 |
+
sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")
|
7 |
|
8 |
from tempfile import NamedTemporaryFile
|
9 |
from typing import List
|
|
|
12 |
from chainlit.types import AskFileResponse
|
13 |
import chromadb
|
14 |
from chromadb.config import Settings
|
15 |
+
from langchain.chains import RetrievalQAWithSourcesChain
|
16 |
from langchain.chat_models import ChatOpenAI
|
17 |
from langchain.document_loaders import PDFPlumberLoader
|
18 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
|
|
32 |
|
33 |
Args:
|
34 |
file (AskFileResponse): input file to be processed
|
35 |
+
|
36 |
Raises:
|
37 |
ValueError: when we fail to process PDF files. We consider PDF file
|
38 |
processing failure when there's no text returned. For example, PDFs
|
|
|
52 |
documents = loader.load()
|
53 |
|
54 |
text_splitter = RecursiveCharacterTextSplitter(
|
55 |
+
chunk_size=3000, chunk_overlap=100
|
|
|
56 |
)
|
57 |
docs = text_splitter.split_documents(documents)
|
58 |
|
|
|
66 |
return docs
|
67 |
|
68 |
|
69 |
+
def create_search_engine(
|
70 |
+
*, docs: List[Document], embeddings: Embeddings
|
71 |
+
) -> VectorStore:
|
72 |
"""Takes a list of Langchain Documents and an embedding model API wrapper
|
73 |
and build a search index using a VectorStore.
|
74 |
|
|
|
82 |
"""
|
83 |
# Initialize Chromadb client to enable resetting and disable telemtry
|
84 |
client = chromadb.EphemeralClient()
|
85 |
+
client_settings = Settings(allow_reset=True, anonymized_telemetry=False)
|
|
|
|
|
|
|
86 |
|
87 |
# Reset the search engine to ensure we don't use old copies.
|
88 |
# NOTE: we do not need this for production
|
89 |
+
search_engine = Chroma(client=client, client_settings=client_settings)
|
|
|
|
|
|
|
90 |
search_engine._client.reset()
|
91 |
search_engine = Chroma.from_documents(
|
92 |
client=client,
|
93 |
documents=docs,
|
94 |
embedding=embeddings,
|
95 |
+
client_settings=client_settings,
|
96 |
)
|
97 |
|
98 |
return search_engine
|
99 |
+
|
100 |
|
101 |
@cl.on_chat_start
|
102 |
async def on_chat_start():
|
|
|
119 |
# Process and save data in the user session
|
120 |
msg = cl.Message(content=f"Processing `{file.name}`...")
|
121 |
await msg.send()
|
122 |
+
|
123 |
docs = process_file(file=file)
|
124 |
cl.user_session.set("docs", docs)
|
125 |
msg.content = f"`{file.name}` processed. Loading ..."
|
126 |
await msg.update()
|
127 |
|
128 |
# Indexing documents into our search engine
|
129 |
+
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
|
|
|
|
|
130 |
try:
|
131 |
search_engine = await cl.make_async(create_search_engine)(
|
132 |
+
docs=docs, embeddings=embeddings
|
|
|
133 |
)
|
134 |
except Exception as e:
|
135 |
await cl.Message(content=f"Error: {e}").send()
|
|
|
138 |
await msg.update()
|
139 |
|
140 |
model = ChatOpenAI(
|
141 |
+
model="gpt-3.5-turbo-16k-0613", temperature=0, streaming=True
|
|
|
|
|
142 |
)
|
143 |
|
144 |
chain = RetrievalQAWithSourcesChain.from_chain_type(
|
|
|
155 |
|
156 |
@cl.on_message
|
157 |
async def main(message: cl.Message):
|
|
|
158 |
# Let's load the chain from user_session
|
159 |
chain = cl.user_session.get("chain") # type: RetrievalQAWithSourcesChain
|
160 |
|
161 |
response = await chain.acall(
|
162 |
message.content,
|
163 |
+
callbacks=[cl.AsyncLangchainCallbackHandler(stream_final_answer=True)],
|
164 |
)
|
165 |
answer = response["answer"]
|
166 |
sources = response["sources"].strip()
|