HanLee commited on
Commit
64fa517
·
1 Parent(s): fd20dfc

feat: 02_10

Browse files
Files changed (1) hide show
  1. app/app.py +64 -21
app/app.py CHANGED
@@ -12,12 +12,11 @@ from chainlit.types import AskFileResponse
12
 
13
  import chromadb
14
  from chromadb.config import Settings
15
- from langchain.chains import LLMChain
16
  from langchain.chat_models import ChatOpenAI
17
  from langchain.document_loaders import PDFPlumberLoader
18
  from langchain.embeddings.openai import OpenAIEmbeddings
19
- from langchain.prompts import ChatPromptTemplate
20
- from langchain.schema import Document, StrOutputParser
21
  from langchain.schema.embeddings import Embeddings
22
  from langchain.text_splitter import RecursiveCharacterTextSplitter
23
  from langchain.vectorstores import Chroma
@@ -144,24 +143,28 @@ async def on_chat_start():
144
  msg.content = f"`{file.name}` loaded. You can now ask questions!"
145
  await msg.update()
146
 
 
 
 
 
 
 
 
 
 
 
147
  model = ChatOpenAI(
148
  model="gpt-3.5-turbo-16k-0613",
 
149
  streaming=True
150
  )
151
 
152
- prompt = ChatPromptTemplate.from_messages(
153
- [
154
- (
155
- "system",
156
- "You are Chainlit GPT, a helpful assistant.",
157
- ),
158
- (
159
- "human",
160
- "{question}"
161
- ),
162
- ]
163
  )
164
- chain = LLMChain(llm=model, prompt=prompt, output_parser=StrOutputParser())
165
 
166
  # We are saving the chain in user_session, so we do not have to rebuild
167
  # it every single time.
@@ -172,11 +175,51 @@ async def on_chat_start():
172
  async def main(message: cl.Message):
173
 
174
  # Let's load the chain from user_session
175
- chain = cl.user_session.get("chain") # type: LLMChain
176
 
177
- response = await chain.arun(
178
- question=message.content, callbacks=[cl.LangchainCallbackHandler()]
 
179
  )
180
-
181
- await cl.Message(content=response).send()
182
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  import chromadb
14
  from chromadb.config import Settings
15
+ from langchain.chains import LLMChain, RetrievalQAWithSourcesChain
16
  from langchain.chat_models import ChatOpenAI
17
  from langchain.document_loaders import PDFPlumberLoader
18
  from langchain.embeddings.openai import OpenAIEmbeddings
19
+ from langchain.schema import Document
 
20
  from langchain.schema.embeddings import Embeddings
21
  from langchain.text_splitter import RecursiveCharacterTextSplitter
22
  from langchain.vectorstores import Chroma
 
143
  msg.content = f"`{file.name}` loaded. You can now ask questions!"
144
  await msg.update()
145
 
146
+ ##########################################################################
147
+ # Exercise 1a:
148
+ # Now we have search engine setup, our Chat with PDF application can do
149
+ # RAG architecture pattern. Please use the appropriate RetrievalQA Chain
150
+ # from Langchain.
151
+ #
152
+ # Remember, we would want to set the model temperature to
153
+ # 0 to ensure model outputs do not vary across runs, and we would want to
154
+ # also return sources to our answers.
155
+ ##########################################################################
156
  model = ChatOpenAI(
157
  model="gpt-3.5-turbo-16k-0613",
158
+ temperature=0,
159
  streaming=True
160
  )
161
 
162
+ chain = RetrievalQAWithSourcesChain.from_chain_type(
163
+ llm=model,
164
+ chain_type="stuff",
165
+ retriever=search_engine.as_retriever(max_tokens_limit=4097),
 
 
 
 
 
 
 
166
  )
167
+ ##########################################################################
168
 
169
  # We are saving the chain in user_session, so we do not have to rebuild
170
  # it every single time.
 
175
  async def main(message: cl.Message):
176
 
177
  # Let's load the chain from user_session
178
+ chain = cl.user_session.get("chain") # type: RetrievalQAWithSourcesChain
179
 
180
+ response = await chain.acall(
181
+ message.content,
182
+ callbacks=[cl.AsyncLangchainCallbackHandler(stream_final_answer=True)]
183
  )
184
+ answer = response["answer"]
185
+ sources = response["sources"].strip()
186
+ source_elements = []
187
+
188
+ # Get the documents from the user session
189
+ docs = cl.user_session.get("docs")
190
+ metadatas = [doc.metadata for doc in docs]
191
+ all_sources = [m["source"] for m in metadatas]
192
+
193
+ ##########################################################################
194
+ # Exercise 1a:
195
+ # Now we have search engine setup, our Chat with PDF application can do
196
+ # RAG architecture pattern. Please use the appropriate RetrievalQA Chain
197
+ # from Langchain.
198
+ #
199
+ # Remember, we would want to set the model temperature to
200
+ # 0 to ensure model outputs do not vary across runs, and we would want to
201
+ # also return sources to our answers.
202
+ ##########################################################################
203
+ # Adding sources to the answer
204
+ if sources:
205
+ found_sources = []
206
+
207
+ # Add the sources to the message
208
+ for source in sources.split(","):
209
+ source_name = source.strip().replace(".", "")
210
+ # Get the index of the source
211
+ try:
212
+ index = all_sources.index(source_name)
213
+ except ValueError:
214
+ continue
215
+ text = docs[index].page_content
216
+ found_sources.append(source_name)
217
+ # Create the text element referenced in the message
218
+ source_elements.append(cl.Text(content=text, name=source_name))
219
+
220
+ if found_sources:
221
+ answer += f"\nSources: {', '.join(found_sources)}"
222
+ else:
223
+ answer += "\nNo sources found"
224
+
225
+ await cl.Message(content=answer, elements=source_elements).send()