Final_Assignment_Project

Sleeping

App Files Files Community

wt002 commited on May 11

Commit

b7e4e52

verified ·

1 Parent(s): 0a24bc8

Update agent.py

Browse files

Files changed (1) hide show

agent.py +42 -24

agent.py CHANGED Viewed

@@ -21,10 +21,17 @@ from langchain.embeddings.base import Embeddings
 from typing import List
 import numpy as np
 import pandas as pd
 import uuid
 from langchain_community.vectorstores import FAISS
 from langchain.schema import Document
 load_dotenv()
@@ -137,32 +144,42 @@ sys_msg = SystemMessage(content=system_prompt)
 # Step 1: Load documents from CSV file (max 165 rows)
 # -------------------------------
 # -------------------------------
-# Step 1: Load documents from CSV URL (max 165 rows)
 # -------------------------------
-csv_url = "https://huggingface.co/spaces/wt002/Final_Assignment_Project/blob/main/documents.csv"  # Replace with your actual URL
-df = pd.read_csv(csv_url).head(165)
-# Check if 'content' column exists
-assert 'content' in df.columns, "'content' column is required in the CSV file."
-# Add 'id' and 'metadata' column
-df['id'] = [str(uuid.uuid4()) for _ in range(len(df))]
-if 'metadata' not in df.columns:
-    df['metadata'] = [{} for _ in range(len(df))]
-else:
-    # If metadata is a JSON string, convert it to dict
-    import json
-    df['metadata'] = df['metadata'].apply(lambda x: json.loads(x) if isinstance(x, str) else x)
-# Convert each row into a Document
-docs = [
-    Document(page_content=row['content'], metadata={'id': row['id'], **row['metadata']})
-    for _, row in df.iterrows()
-]
 # -------------------------------
-# Step 2: Set up HuggingFace Embeddings and FAISS VectorStore
 # -------------------------------
 # Initialize HuggingFace Embedding model
 embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
@@ -173,10 +190,10 @@ vector_store = FAISS.from_documents(docs, embedding_model)
 # Save the FAISS index locally
 vector_store.save_local("faiss_index")
-#print("✅ FAISS index created and saved locally.")
 # -------------------------------
-# Step 3: Create Retriever Tool (for use in LangChain)
 # -------------------------------
 retriever = vector_store.as_retriever()
@@ -190,6 +207,7 @@ question_retriever_tool = create_retriever_tool(
 tools = [
     multiply,
     add,

 from typing import List
 import numpy as np
 import pandas as pd
 import uuid
 from langchain_community.vectorstores import FAISS
 from langchain.schema import Document
+import requests
+import json
+#from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.schema import Document
+#from langchain.agents import create_retriever_tool
 load_dotenv()
 # Step 1: Load documents from CSV file (max 165 rows)
 # -------------------------------
 # -------------------------------
+# Step 1: Load JSON data from URL
 # -------------------------------
+json_url = "https://huggingface.co/spaces/wt002/Final_Assignment_Project/blob/main/questions.json"  # Replace with your actual JSON URL
+response = requests.get(json_url)
+# Ensure the request was successful
+if response.status_code != 200:
+    raise Exception(f"Failed to load JSON from {json_url}. Status code: {response.status_code}")
+# Parse the JSON content
+data = response.json()
+# Make sure we have the correct structure in the JSON
+assert isinstance(data, list), "The JSON should contain a list of documents."
 # -------------------------------
+# Step 2: Prepare documents
+# -------------------------------
+docs = []
+for doc in data:
+    # Ensure the document has 'content' field
+    content = doc.get('content', "").strip()
+    if not content:
+        continue  # Skip documents with no content
+    # Ensure unique ID for each document
+    doc['id'] = str(uuid.uuid4())
+    # Create Document objects from the data
+    docs.append(Document(page_content=content, metadata=doc))
+# -------------------------------
+# Step 3: Set up HuggingFace Embeddings and FAISS VectorStore
 # -------------------------------
 # Initialize HuggingFace Embedding model
 embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
 # Save the FAISS index locally
 vector_store.save_local("faiss_index")
+print("✅ FAISS index created and saved locally.")
 # -------------------------------
+# Step 4: Create Retriever Tool (for use in LangChain)
 # -------------------------------
 retriever = vector_store.as_retriever()
 tools = [
     multiply,
     add,