feat: 02_10
Browse files- app/app.py +64 -21
app/app.py
CHANGED
@@ -12,12 +12,11 @@ from chainlit.types import AskFileResponse
|
|
12 |
|
13 |
import chromadb
|
14 |
from chromadb.config import Settings
|
15 |
-
from langchain.chains import LLMChain
|
16 |
from langchain.chat_models import ChatOpenAI
|
17 |
from langchain.document_loaders import PDFPlumberLoader
|
18 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
19 |
-
from langchain.
|
20 |
-
from langchain.schema import Document, StrOutputParser
|
21 |
from langchain.schema.embeddings import Embeddings
|
22 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
23 |
from langchain.vectorstores import Chroma
|
@@ -144,24 +143,28 @@ async def on_chat_start():
|
|
144 |
msg.content = f"`{file.name}` loaded. You can now ask questions!"
|
145 |
await msg.update()
|
146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
model = ChatOpenAI(
|
148 |
model="gpt-3.5-turbo-16k-0613",
|
|
|
149 |
streaming=True
|
150 |
)
|
151 |
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
"You are Chainlit GPT, a helpful assistant.",
|
157 |
-
),
|
158 |
-
(
|
159 |
-
"human",
|
160 |
-
"{question}"
|
161 |
-
),
|
162 |
-
]
|
163 |
)
|
164 |
-
|
165 |
|
166 |
# We are saving the chain in user_session, so we do not have to rebuild
|
167 |
# it every single time.
|
@@ -172,11 +175,51 @@ async def on_chat_start():
|
|
172 |
async def main(message: cl.Message):
|
173 |
|
174 |
# Let's load the chain from user_session
|
175 |
-
chain = cl.user_session.get("chain") # type:
|
176 |
|
177 |
-
response = await chain.
|
178 |
-
|
|
|
179 |
)
|
180 |
-
|
181 |
-
|
182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
import chromadb
|
14 |
from chromadb.config import Settings
|
15 |
+
from langchain.chains import LLMChain, RetrievalQAWithSourcesChain
|
16 |
from langchain.chat_models import ChatOpenAI
|
17 |
from langchain.document_loaders import PDFPlumberLoader
|
18 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
19 |
+
from langchain.schema import Document
|
|
|
20 |
from langchain.schema.embeddings import Embeddings
|
21 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
22 |
from langchain.vectorstores import Chroma
|
|
|
143 |
msg.content = f"`{file.name}` loaded. You can now ask questions!"
|
144 |
await msg.update()
|
145 |
|
146 |
+
##########################################################################
|
147 |
+
# Exercise 1a:
|
148 |
+
# Now we have search engine setup, our Chat with PDF application can do
|
149 |
+
# RAG architecture pattern. Please use the appropriate RetrievalQA Chain
|
150 |
+
# from Langchain.
|
151 |
+
#
|
152 |
+
# Remember, we would want to set the model temperature to
|
153 |
+
# 0 to ensure model outputs do not vary across runs, and we would want to
|
154 |
+
# also return sources to our answers.
|
155 |
+
##########################################################################
|
156 |
model = ChatOpenAI(
|
157 |
model="gpt-3.5-turbo-16k-0613",
|
158 |
+
temperature=0,
|
159 |
streaming=True
|
160 |
)
|
161 |
|
162 |
+
chain = RetrievalQAWithSourcesChain.from_chain_type(
|
163 |
+
llm=model,
|
164 |
+
chain_type="stuff",
|
165 |
+
retriever=search_engine.as_retriever(max_tokens_limit=4097),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
)
|
167 |
+
##########################################################################
|
168 |
|
169 |
# We are saving the chain in user_session, so we do not have to rebuild
|
170 |
# it every single time.
|
|
|
175 |
async def main(message: cl.Message):
|
176 |
|
177 |
# Let's load the chain from user_session
|
178 |
+
chain = cl.user_session.get("chain") # type: RetrievalQAWithSourcesChain
|
179 |
|
180 |
+
response = await chain.acall(
|
181 |
+
message.content,
|
182 |
+
callbacks=[cl.AsyncLangchainCallbackHandler(stream_final_answer=True)]
|
183 |
)
|
184 |
+
answer = response["answer"]
|
185 |
+
sources = response["sources"].strip()
|
186 |
+
source_elements = []
|
187 |
+
|
188 |
+
# Get the documents from the user session
|
189 |
+
docs = cl.user_session.get("docs")
|
190 |
+
metadatas = [doc.metadata for doc in docs]
|
191 |
+
all_sources = [m["source"] for m in metadatas]
|
192 |
+
|
193 |
+
##########################################################################
|
194 |
+
# Exercise 1a:
|
195 |
+
# Now we have search engine setup, our Chat with PDF application can do
|
196 |
+
# RAG architecture pattern. Please use the appropriate RetrievalQA Chain
|
197 |
+
# from Langchain.
|
198 |
+
#
|
199 |
+
# Remember, we would want to set the model temperature to
|
200 |
+
# 0 to ensure model outputs do not vary across runs, and we would want to
|
201 |
+
# also return sources to our answers.
|
202 |
+
##########################################################################
|
203 |
+
# Adding sources to the answer
|
204 |
+
if sources:
|
205 |
+
found_sources = []
|
206 |
+
|
207 |
+
# Add the sources to the message
|
208 |
+
for source in sources.split(","):
|
209 |
+
source_name = source.strip().replace(".", "")
|
210 |
+
# Get the index of the source
|
211 |
+
try:
|
212 |
+
index = all_sources.index(source_name)
|
213 |
+
except ValueError:
|
214 |
+
continue
|
215 |
+
text = docs[index].page_content
|
216 |
+
found_sources.append(source_name)
|
217 |
+
# Create the text element referenced in the message
|
218 |
+
source_elements.append(cl.Text(content=text, name=source_name))
|
219 |
+
|
220 |
+
if found_sources:
|
221 |
+
answer += f"\nSources: {', '.join(found_sources)}"
|
222 |
+
else:
|
223 |
+
answer += "\nNo sources found"
|
224 |
+
|
225 |
+
await cl.Message(content=answer, elements=source_elements).send()
|