Spaces:
Running
Running
Update agent.py
Browse files
agent.py
CHANGED
@@ -37,7 +37,7 @@ import re
|
|
37 |
from langchain_community.document_loaders import TextLoader, PyMuPDFLoader
|
38 |
from docx import Document as DocxDocument
|
39 |
import openpyxl
|
40 |
-
|
41 |
|
42 |
load_dotenv()
|
43 |
|
@@ -312,16 +312,59 @@ for task in tasks:
|
|
312 |
# Step 4: Set up HuggingFace Embeddings and FAISS VectorStore
|
313 |
# -------------------------------
|
314 |
# Initialize HuggingFace Embedding model
|
315 |
-
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
|
|
316 |
|
317 |
|
318 |
-
from langchain_community.document_loaders import WikipediaLoader
|
319 |
|
320 |
-
|
321 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
322 |
vector_store = FAISS.from_documents(all_docs, embedding_model)
|
323 |
vector_store.save_local("faiss_index")
|
324 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
325 |
|
326 |
|
327 |
# -------------------------------
|
@@ -336,6 +379,30 @@ question_retriever_tool = create_retriever_tool(
|
|
336 |
description="A tool to retrieve documents related to a user's question."
|
337 |
)
|
338 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
|
340 |
|
341 |
tools = [
|
|
|
37 |
from langchain_community.document_loaders import TextLoader, PyMuPDFLoader
|
38 |
from docx import Document as DocxDocument
|
39 |
import openpyxl
|
40 |
+
from io import StringIO
|
41 |
|
42 |
load_dotenv()
|
43 |
|
|
|
312 |
# Step 4: Set up HuggingFace Embeddings and FAISS VectorStore
|
313 |
# -------------------------------
|
314 |
# Initialize HuggingFace Embedding model
|
315 |
+
#embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
316 |
+
embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
|
317 |
|
318 |
|
|
|
319 |
|
320 |
+
# -----------------------------
|
321 |
+
# Step 1: Load CSV Questions
|
322 |
+
# -----------------------------
|
323 |
+
csv_path = "questions.csv" # Change to your CSV file
|
324 |
+
df = pd.read_csv(csv_path)
|
325 |
+
|
326 |
+
docs = []
|
327 |
+
for _, row in df.iterrows():
|
328 |
+
question = str(row.get("question", "")).strip()
|
329 |
+
if question:
|
330 |
+
docs.append(Document(page_content=question, metadata={"source": "csv"}))
|
331 |
+
docs = []
|
332 |
+
for _, row in df.iterrows():
|
333 |
+
question = str(row.get("question", "")).strip()
|
334 |
+
if question:
|
335 |
+
docs.append(Document(page_content=question, metadata={"source": "csv"}))
|
336 |
+
|
337 |
+
# -----------------------------
|
338 |
+
# Step 2: Add Wikipedia Docs
|
339 |
+
# -----------------------------
|
340 |
+
wiki_docs = []
|
341 |
+
for doc in docs:
|
342 |
+
try:
|
343 |
+
wiki_results = WikipediaLoader(query=doc.page_content, load_max_docs=1).load()
|
344 |
+
wiki_docs.extend(wiki_results)
|
345 |
+
except Exception as e:
|
346 |
+
print(f"Failed to load Wikipedia for: {doc.page_content} — {e}")
|
347 |
+
|
348 |
+
all_docs = docs + wiki_docs
|
349 |
+
|
350 |
+
# -----------------------------
|
351 |
+
# Step 3: Build FAISS Index
|
352 |
+
# -----------------------------
|
353 |
+
embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
|
354 |
vector_store = FAISS.from_documents(all_docs, embedding_model)
|
355 |
vector_store.save_local("faiss_index")
|
356 |
|
357 |
+
# -----------------------------
|
358 |
+
# Step 4: Create Retriever Tool
|
359 |
+
# -----------------------------
|
360 |
+
retriever = vector_store.as_retriever()
|
361 |
+
|
362 |
+
question_retriever_tool = create_retriever_tool(
|
363 |
+
retriever=retriever,
|
364 |
+
name="Question_Search",
|
365 |
+
description="A tool to retrieve documents related to a user's question."
|
366 |
+
)
|
367 |
+
|
368 |
|
369 |
|
370 |
# -------------------------------
|
|
|
379 |
description="A tool to retrieve documents related to a user's question."
|
380 |
)
|
381 |
|
382 |
+
vector_store = FAISS.from_documents(all_docs, embedding_model)
|
383 |
+
vector_store.save_local("faiss_index")
|
384 |
+
|
385 |
+
|
386 |
+
|
387 |
+
def retriever(state: MessagesState):
|
388 |
+
"""Retriever node using similarity scores for filtering"""
|
389 |
+
query = state["messages"][0].content
|
390 |
+
results = vector_store.similarity_search_with_score(query, k=4) # top 4 matches
|
391 |
+
|
392 |
+
# Filter by score (lower is more similar; adjust threshold as needed)
|
393 |
+
threshold = 0.8
|
394 |
+
filtered = [doc for doc, score in results if score < threshold]
|
395 |
+
|
396 |
+
if not filtered:
|
397 |
+
example_msg = HumanMessage(content="No relevant documents found.")
|
398 |
+
else:
|
399 |
+
content = "\n\n".join(doc.page_content for doc in filtered)
|
400 |
+
example_msg = HumanMessage(
|
401 |
+
content=f"Here are relevant reference documents:\n\n{content}"
|
402 |
+
)
|
403 |
+
|
404 |
+
return {"messages": [sys_msg] + state["messages"] + [example_msg]}
|
405 |
+
|
406 |
|
407 |
|
408 |
tools = [
|