CyranoB commited on
Commit
8882a59
·
1 Parent(s): 615fe60
Files changed (2) hide show
  1. messages.py +3 -0
  2. search_agent.py +15 -6
messages.py CHANGED
@@ -98,6 +98,9 @@ def get_rag_prompt_template():
98
  - Synthesize the retrieved information into a clear, informative answer to the question
99
  - Format your answer in Markdown, using heading levels 2-3 as needed
100
  - Include a "References" section at the end with the full citations and link for each source you used
 
 
 
101
  """
102
  )
103
  )
 
98
  - Synthesize the retrieved information into a clear, informative answer to the question
99
  - Format your answer in Markdown, using heading levels 2-3 as needed
100
  - Include a "References" section at the end with the full citations and link for each source you used
101
+
102
+ If you cannot answer the question with confidence just say: "I'm not sure about the answer to be honest"
103
+ If the provided context is not relevant to the question, just say: "The context provided is not relevant to the question"
104
  """
105
  )
106
  )
search_agent.py CHANGED
@@ -180,26 +180,27 @@ def get_links_contents(sources):
180
  # Filter out None results
181
  return [result for result in results if result is not None]
182
 
183
- def vectorize(contents, text_chunk_size=1000,text_chunk_overlap=200,):
184
  documents = []
185
  for content in contents:
186
  page_content = content['snippet']
187
- if 'htlm' in content:
188
  page_content = content['html']
189
  if 'pdf_content' in content:
190
- page_content = content['pdf_content']
191
  try:
192
  metadata = {'title': content['title'], 'source': content['link']}
193
  doc = Document(page_content=page_content, metadata=metadata)
194
  documents.append(doc)
195
  except Exception as e:
196
  console.log(f"[gray]Error processing content for {content['link']}: {e}")
197
-
198
  text_splitter = RecursiveCharacterTextSplitter(
199
  chunk_size=text_chunk_size,
200
  chunk_overlap=text_chunk_overlap
201
  )
202
  docs = text_splitter.split_documents(documents)
 
203
  embeddings = OpenAIEmbeddings()
204
  store = FAISS.from_documents(docs, embeddings)
205
  return store
@@ -216,7 +217,7 @@ def format_docs(docs):
216
  return docs_as_json
217
 
218
 
219
- def query_rag(chat_llm, question, search_query, vectorstore):
220
  retriever_from_llm = MultiQueryRetriever.from_llm(
221
  retriever=vectorstore.as_retriever(), llm=chat_llm, include_original=True,
222
  )
@@ -229,6 +230,14 @@ def query_rag(chat_llm, question, search_query, vectorstore):
229
  return response.content
230
 
231
 
 
 
 
 
 
 
 
 
232
 
233
 
234
  console = Console()
@@ -278,7 +287,7 @@ if __name__ == '__main__':
278
  vector_store = vectorize(contents)
279
 
280
  with console.status("[bold green]Querying LLM relevant context", spinner='dots8Bit'):
281
- respomse = query_rag(chat, query, optimize_search_query, vector_store)
282
 
283
  console.rule(f"[bold green]Response from {provider}")
284
  if output == "text":
 
180
  # Filter out None results
181
  return [result for result in results if result is not None]
182
 
183
+ def vectorize(contents, text_chunk_size=500,text_chunk_overlap=50):
184
  documents = []
185
  for content in contents:
186
  page_content = content['snippet']
187
+ if 'html' in content:
188
  page_content = content['html']
189
  if 'pdf_content' in content:
190
+ page_content = content['pdf_content']
191
  try:
192
  metadata = {'title': content['title'], 'source': content['link']}
193
  doc = Document(page_content=page_content, metadata=metadata)
194
  documents.append(doc)
195
  except Exception as e:
196
  console.log(f"[gray]Error processing content for {content['link']}: {e}")
197
+
198
  text_splitter = RecursiveCharacterTextSplitter(
199
  chunk_size=text_chunk_size,
200
  chunk_overlap=text_chunk_overlap
201
  )
202
  docs = text_splitter.split_documents(documents)
203
+ console.log(f"Vectorizing {len(docs)} document chunks")
204
  embeddings = OpenAIEmbeddings()
205
  store = FAISS.from_documents(docs, embeddings)
206
  return store
 
217
  return docs_as_json
218
 
219
 
220
+ def multi_query_rag(chat_llm, question, search_query, vectorstore):
221
  retriever_from_llm = MultiQueryRetriever.from_llm(
222
  retriever=vectorstore.as_retriever(), llm=chat_llm, include_original=True,
223
  )
 
230
  return response.content
231
 
232
 
233
+ def query_rag(chat_llm, question, search_query, vectorstore):
234
+ retriver = vectorstore.as_retriever()
235
+ unique_docs = retriver.get_relevant_documents(search_query, callbacks=callbacks, verbose=True)
236
+ context = format_docs(unique_docs)
237
+ prompt = get_rag_prompt_template().format(query=question, context=context)
238
+ response = chat_llm.invoke(prompt, config={"callbacks": callbacks})
239
+ return response.content
240
+
241
 
242
 
243
  console = Console()
 
287
  vector_store = vectorize(contents)
288
 
289
  with console.status("[bold green]Querying LLM relevant context", spinner='dots8Bit'):
290
+ respomse = multi_query_rag(chat, query, optimize_search_query, vector_store)
291
 
292
  console.rule(f"[bold green]Response from {provider}")
293
  if output == "text":