wt002 commited on
Commit
b7e4e52
·
verified ·
1 Parent(s): 0a24bc8

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +42 -24
agent.py CHANGED
@@ -21,10 +21,17 @@ from langchain.embeddings.base import Embeddings
21
  from typing import List
22
  import numpy as np
23
 
 
24
  import pandas as pd
25
  import uuid
26
  from langchain_community.vectorstores import FAISS
27
  from langchain.schema import Document
 
 
 
 
 
 
28
 
29
 
30
  load_dotenv()
@@ -137,32 +144,42 @@ sys_msg = SystemMessage(content=system_prompt)
137
  # Step 1: Load documents from CSV file (max 165 rows)
138
  # -------------------------------
139
 
 
 
140
  # -------------------------------
141
- # Step 1: Load documents from CSV URL (max 165 rows)
142
  # -------------------------------
143
- csv_url = "https://huggingface.co/spaces/wt002/Final_Assignment_Project/blob/main/documents.csv" # Replace with your actual URL
144
- df = pd.read_csv(csv_url).head(165)
145
-
146
- # Check if 'content' column exists
147
- assert 'content' in df.columns, "'content' column is required in the CSV file."
148
-
149
- # Add 'id' and 'metadata' column
150
- df['id'] = [str(uuid.uuid4()) for _ in range(len(df))]
151
- if 'metadata' not in df.columns:
152
- df['metadata'] = [{} for _ in range(len(df))]
153
- else:
154
- # If metadata is a JSON string, convert it to dict
155
- import json
156
- df['metadata'] = df['metadata'].apply(lambda x: json.loads(x) if isinstance(x, str) else x)
157
-
158
- # Convert each row into a Document
159
- docs = [
160
- Document(page_content=row['content'], metadata={'id': row['id'], **row['metadata']})
161
- for _, row in df.iterrows()
162
- ]
163
 
164
  # -------------------------------
165
- # Step 2: Set up HuggingFace Embeddings and FAISS VectorStore
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  # -------------------------------
167
  # Initialize HuggingFace Embedding model
168
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
@@ -173,10 +190,10 @@ vector_store = FAISS.from_documents(docs, embedding_model)
173
  # Save the FAISS index locally
174
  vector_store.save_local("faiss_index")
175
 
176
- #print("✅ FAISS index created and saved locally.")
177
 
178
  # -------------------------------
179
- # Step 3: Create Retriever Tool (for use in LangChain)
180
  # -------------------------------
181
  retriever = vector_store.as_retriever()
182
 
@@ -190,6 +207,7 @@ question_retriever_tool = create_retriever_tool(
190
 
191
 
192
 
 
193
  tools = [
194
  multiply,
195
  add,
 
21
  from typing import List
22
  import numpy as np
23
 
24
+
25
  import pandas as pd
26
  import uuid
27
  from langchain_community.vectorstores import FAISS
28
  from langchain.schema import Document
29
+ import requests
30
+ import json
31
+ #from langchain.embeddings import HuggingFaceEmbeddings
32
+ from langchain.vectorstores import FAISS
33
+ from langchain.schema import Document
34
+ #from langchain.agents import create_retriever_tool
35
 
36
 
37
  load_dotenv()
 
144
  # Step 1: Load documents from CSV file (max 165 rows)
145
  # -------------------------------
146
 
147
+
148
+
149
  # -------------------------------
150
+ # Step 1: Load JSON data from URL
151
  # -------------------------------
152
+ json_url = "https://huggingface.co/spaces/wt002/Final_Assignment_Project/blob/main/questions.json" # Replace with your actual JSON URL
153
+ response = requests.get(json_url)
154
+
155
+ # Ensure the request was successful
156
+ if response.status_code != 200:
157
+ raise Exception(f"Failed to load JSON from {json_url}. Status code: {response.status_code}")
158
+
159
+ # Parse the JSON content
160
+ data = response.json()
161
+
162
+ # Make sure we have the correct structure in the JSON
163
+ assert isinstance(data, list), "The JSON should contain a list of documents."
 
 
 
 
 
 
 
 
164
 
165
  # -------------------------------
166
+ # Step 2: Prepare documents
167
+ # -------------------------------
168
+ docs = []
169
+ for doc in data:
170
+ # Ensure the document has 'content' field
171
+ content = doc.get('content', "").strip()
172
+ if not content:
173
+ continue # Skip documents with no content
174
+
175
+ # Ensure unique ID for each document
176
+ doc['id'] = str(uuid.uuid4())
177
+
178
+ # Create Document objects from the data
179
+ docs.append(Document(page_content=content, metadata=doc))
180
+
181
+ # -------------------------------
182
+ # Step 3: Set up HuggingFace Embeddings and FAISS VectorStore
183
  # -------------------------------
184
  # Initialize HuggingFace Embedding model
185
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
 
190
  # Save the FAISS index locally
191
  vector_store.save_local("faiss_index")
192
 
193
+ print("✅ FAISS index created and saved locally.")
194
 
195
  # -------------------------------
196
+ # Step 4: Create Retriever Tool (for use in LangChain)
197
  # -------------------------------
198
  retriever = vector_store.as_retriever()
199
 
 
207
 
208
 
209
 
210
+
211
  tools = [
212
  multiply,
213
  add,