wt002 commited on
Commit
b280aa5
·
verified ·
1 Parent(s): 9eb90d2

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +22 -44
agent.py CHANGED
@@ -140,61 +140,39 @@ sys_msg = SystemMessage(content=system_prompt)
140
 
141
 
142
  # -------------------------------
143
- # Step 1: Load documents from CSV file (max 165 rows)
144
  # -------------------------------
145
 
146
-
147
-
148
- # -------------------------------
149
- # Step 1: Load JSON data from URL
150
- # -------------------------------
151
- jsonl_url = "https://huggingface.co/spaces/wt002/Final_Assignment_Project/blob/main/metedata.jsonl" # Replace with your actual JSONL URL
152
- response = requests.get(jsonl_url)
153
-
154
- # Ensure the request was successful
155
- if response.status_code != 200:
156
- raise Exception(f"Failed to load JSONL from {jsonl_url}. Status code: {response.status_code}")
157
-
158
 
159
  # Ensure the request was successful
160
  if response.status_code != 200:
161
- raise Exception(f"Failed to load JSONL from {jsonl_url}. Status code: {response.status_code}")
162
-
163
- # Read and parse the JSONL file line by line
164
- docs = []
165
- for line_number, line in enumerate(response.text.splitlines(), 1):
166
- try:
167
- doc = json.loads(line) # Parse each line as a separate JSON object
168
- content = doc.get('content', "").strip()
169
- if not content:
170
- continue # Skip documents with no content
171
 
172
- # Add unique ID to each document
173
- doc['id'] = str(uuid.uuid4())
174
-
175
- # Convert the document into a Document object
176
- docs.append(Document(page_content=content, metadata=doc))
177
-
178
- except json.JSONDecodeError as e:
179
- print(f"Skipping malformed JSONL line at line {line_number}: {line}")
180
- print(f"Error: {e}")
181
 
 
 
 
182
 
183
  # -------------------------------
184
- # Step 2: Prepare documents
185
  # -------------------------------
186
- docs = []
187
- for doc in data:
188
- # Ensure the document has 'content' field
189
- content = doc.get('content', "").strip()
190
- if not content:
191
- continue # Skip documents with no content
 
 
 
192
 
193
- # Ensure unique ID for each document
194
- doc['id'] = str(uuid.uuid4())
195
 
196
- # Create Document objects from the data
197
- docs.append(Document(page_content=content, metadata=doc))
198
 
199
  # -------------------------------
200
  # Step 3: Set up HuggingFace Embeddings and FAISS VectorStore
@@ -208,7 +186,7 @@ vector_store = FAISS.from_documents(docs, embedding_model)
208
  # Save the FAISS index locally
209
  vector_store.save_local("faiss_index")
210
 
211
- print("✅ FAISS index created and saved locally.")
212
 
213
  # -------------------------------
214
  # Step 4: Create Retriever Tool (for use in LangChain)
 
140
 
141
 
142
  # -------------------------------
143
+ # Step 1: Load the JSON from a URL
144
  # -------------------------------
145
 
146
+ json_url = "https://agents-course-unit4-scoring.hf.space/questions" # Replace with your actual JSON file URL
147
+ response = requests.get(json_url)
 
 
 
 
 
 
 
 
 
 
148
 
149
  # Ensure the request was successful
150
  if response.status_code != 200:
151
+ raise Exception(f"Failed to load JSON from {json_url}. Status code: {response.status_code}")
 
 
 
 
 
 
 
 
 
152
 
153
+ # Parse the JSON object
154
+ data = response.json()
 
 
 
 
 
 
 
155
 
156
+ # Ensure the required field 'question' exists
157
+ if 'question' not in data:
158
+ raise ValueError("The JSON object must contain a 'question' field.")
159
 
160
  # -------------------------------
161
+ # Step 2: Create a Document from the JSON Object
162
  # -------------------------------
163
+ content = data.get('question', "").strip()
164
+ if not content:
165
+ raise ValueError("The 'question' field in the JSON object cannot be empty.")
166
+
167
+ # Create a document and add metadata from the JSON object
168
+ document = Document(
169
+ page_content=content,
170
+ metadata=data
171
+ )
172
 
173
+ # Wrap the document in a list to work with LangChain (as it expects a list of documents)
174
+ docs = [document]
175
 
 
 
176
 
177
  # -------------------------------
178
  # Step 3: Set up HuggingFace Embeddings and FAISS VectorStore
 
186
  # Save the FAISS index locally
187
  vector_store.save_local("faiss_index")
188
 
189
+ #print("✅ FAISS index created and saved locally.")
190
 
191
  # -------------------------------
192
  # Step 4: Create Retriever Tool (for use in LangChain)