Spaces:
Running
Running
Fix typo
Browse files- messages.py +3 -0
- search_agent.py +15 -6
messages.py
CHANGED
@@ -98,6 +98,9 @@ def get_rag_prompt_template():
|
|
98 |
- Synthesize the retrieved information into a clear, informative answer to the question
|
99 |
- Format your answer in Markdown, using heading levels 2-3 as needed
|
100 |
- Include a "References" section at the end with the full citations and link for each source you used
|
|
|
|
|
|
|
101 |
"""
|
102 |
)
|
103 |
)
|
|
|
98 |
- Synthesize the retrieved information into a clear, informative answer to the question
|
99 |
- Format your answer in Markdown, using heading levels 2-3 as needed
|
100 |
- Include a "References" section at the end with the full citations and link for each source you used
|
101 |
+
|
102 |
+
If you cannot answer the question with confidence just say: "I'm not sure about the answer to be honest"
|
103 |
+
If the provided context is not relevant to the question, just say: "The context provided is not relevant to the question"
|
104 |
"""
|
105 |
)
|
106 |
)
|
search_agent.py
CHANGED
@@ -180,26 +180,27 @@ def get_links_contents(sources):
|
|
180 |
# Filter out None results
|
181 |
return [result for result in results if result is not None]
|
182 |
|
183 |
-
def vectorize(contents, text_chunk_size=
|
184 |
documents = []
|
185 |
for content in contents:
|
186 |
page_content = content['snippet']
|
187 |
-
if '
|
188 |
page_content = content['html']
|
189 |
if 'pdf_content' in content:
|
190 |
-
page_content = content['pdf_content']
|
191 |
try:
|
192 |
metadata = {'title': content['title'], 'source': content['link']}
|
193 |
doc = Document(page_content=page_content, metadata=metadata)
|
194 |
documents.append(doc)
|
195 |
except Exception as e:
|
196 |
console.log(f"[gray]Error processing content for {content['link']}: {e}")
|
197 |
-
|
198 |
text_splitter = RecursiveCharacterTextSplitter(
|
199 |
chunk_size=text_chunk_size,
|
200 |
chunk_overlap=text_chunk_overlap
|
201 |
)
|
202 |
docs = text_splitter.split_documents(documents)
|
|
|
203 |
embeddings = OpenAIEmbeddings()
|
204 |
store = FAISS.from_documents(docs, embeddings)
|
205 |
return store
|
@@ -216,7 +217,7 @@ def format_docs(docs):
|
|
216 |
return docs_as_json
|
217 |
|
218 |
|
219 |
-
def
|
220 |
retriever_from_llm = MultiQueryRetriever.from_llm(
|
221 |
retriever=vectorstore.as_retriever(), llm=chat_llm, include_original=True,
|
222 |
)
|
@@ -229,6 +230,14 @@ def query_rag(chat_llm, question, search_query, vectorstore):
|
|
229 |
return response.content
|
230 |
|
231 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
|
233 |
|
234 |
console = Console()
|
@@ -278,7 +287,7 @@ if __name__ == '__main__':
|
|
278 |
vector_store = vectorize(contents)
|
279 |
|
280 |
with console.status("[bold green]Querying LLM relevant context", spinner='dots8Bit'):
|
281 |
-
respomse =
|
282 |
|
283 |
console.rule(f"[bold green]Response from {provider}")
|
284 |
if output == "text":
|
|
|
180 |
# Filter out None results
|
181 |
return [result for result in results if result is not None]
|
182 |
|
183 |
+
def vectorize(contents, text_chunk_size=500,text_chunk_overlap=50):
|
184 |
documents = []
|
185 |
for content in contents:
|
186 |
page_content = content['snippet']
|
187 |
+
if 'html' in content:
|
188 |
page_content = content['html']
|
189 |
if 'pdf_content' in content:
|
190 |
+
page_content = content['pdf_content']
|
191 |
try:
|
192 |
metadata = {'title': content['title'], 'source': content['link']}
|
193 |
doc = Document(page_content=page_content, metadata=metadata)
|
194 |
documents.append(doc)
|
195 |
except Exception as e:
|
196 |
console.log(f"[gray]Error processing content for {content['link']}: {e}")
|
197 |
+
|
198 |
text_splitter = RecursiveCharacterTextSplitter(
|
199 |
chunk_size=text_chunk_size,
|
200 |
chunk_overlap=text_chunk_overlap
|
201 |
)
|
202 |
docs = text_splitter.split_documents(documents)
|
203 |
+
console.log(f"Vectorizing {len(docs)} document chunks")
|
204 |
embeddings = OpenAIEmbeddings()
|
205 |
store = FAISS.from_documents(docs, embeddings)
|
206 |
return store
|
|
|
217 |
return docs_as_json
|
218 |
|
219 |
|
220 |
+
def multi_query_rag(chat_llm, question, search_query, vectorstore):
|
221 |
retriever_from_llm = MultiQueryRetriever.from_llm(
|
222 |
retriever=vectorstore.as_retriever(), llm=chat_llm, include_original=True,
|
223 |
)
|
|
|
230 |
return response.content
|
231 |
|
232 |
|
233 |
+
def query_rag(chat_llm, question, search_query, vectorstore):
|
234 |
+
retriver = vectorstore.as_retriever()
|
235 |
+
unique_docs = retriver.get_relevant_documents(search_query, callbacks=callbacks, verbose=True)
|
236 |
+
context = format_docs(unique_docs)
|
237 |
+
prompt = get_rag_prompt_template().format(query=question, context=context)
|
238 |
+
response = chat_llm.invoke(prompt, config={"callbacks": callbacks})
|
239 |
+
return response.content
|
240 |
+
|
241 |
|
242 |
|
243 |
console = Console()
|
|
|
287 |
vector_store = vectorize(contents)
|
288 |
|
289 |
with console.status("[bold green]Querying LLM relevant context", spinner='dots8Bit'):
|
290 |
+
respomse = multi_query_rag(chat, query, optimize_search_query, vector_store)
|
291 |
|
292 |
console.rule(f"[bold green]Response from {provider}")
|
293 |
if output == "text":
|