Final_Assignment_Project

Sleeping

App Files Files Community

wt002 commited on May 11

Commit

b280aa5

verified ·

1 Parent(s): 9eb90d2

Update agent.py

Browse files

Files changed (1) hide show

agent.py +22 -44

agent.py CHANGED Viewed

@@ -140,61 +140,39 @@ sys_msg = SystemMessage(content=system_prompt)
 # -------------------------------
-# Step 1: Load documents from CSV file (max 165 rows)
 # -------------------------------
-# -------------------------------
-# Step 1: Load JSON data from URL
-# -------------------------------
-jsonl_url = "https://huggingface.co/spaces/wt002/Final_Assignment_Project/blob/main/metedata.jsonl"  # Replace with your actual JSONL URL
-response = requests.get(jsonl_url)
-# Ensure the request was successful
-if response.status_code != 200:
-    raise Exception(f"Failed to load JSONL from {jsonl_url}. Status code: {response.status_code}")
 # Ensure the request was successful
 if response.status_code != 200:
-    raise Exception(f"Failed to load JSONL from {jsonl_url}. Status code: {response.status_code}")
-# Read and parse the JSONL file line by line
-docs = []
-for line_number, line in enumerate(response.text.splitlines(), 1):
-    try:
-        doc = json.loads(line)  # Parse each line as a separate JSON object
-        content = doc.get('content', "").strip()
-        if not content:
-            continue  # Skip documents with no content
-        # Add unique ID to each document
-        doc['id'] = str(uuid.uuid4())
-        # Convert the document into a Document object
-        docs.append(Document(page_content=content, metadata=doc))
-    except json.JSONDecodeError as e:
-        print(f"Skipping malformed JSONL line at line {line_number}: {line}")
-        print(f"Error: {e}")
 # -------------------------------
-# Step 2: Prepare documents
 # -------------------------------
-docs = []
-for doc in data:
-    # Ensure the document has 'content' field
-    content = doc.get('content', "").strip()
-    if not content:
-        continue  # Skip documents with no content
-    # Ensure unique ID for each document
-    doc['id'] = str(uuid.uuid4())
-    # Create Document objects from the data
-    docs.append(Document(page_content=content, metadata=doc))
 # -------------------------------
 # Step 3: Set up HuggingFace Embeddings and FAISS VectorStore
@@ -208,7 +186,7 @@ vector_store = FAISS.from_documents(docs, embedding_model)
 # Save the FAISS index locally
 vector_store.save_local("faiss_index")
-print("✅ FAISS index created and saved locally.")
 # -------------------------------
 # Step 4: Create Retriever Tool (for use in LangChain)

 # -------------------------------
+# Step 1: Load the JSON from a URL
 # -------------------------------
+json_url = "https://agents-course-unit4-scoring.hf.space/questions"  # Replace with your actual JSON file URL
+response = requests.get(json_url)
 # Ensure the request was successful
 if response.status_code != 200:
+    raise Exception(f"Failed to load JSON from {json_url}. Status code: {response.status_code}")
+# Parse the JSON object
+data = response.json()
+# Ensure the required field 'question' exists
+if 'question' not in data:
+    raise ValueError("The JSON object must contain a 'question' field.")
 # -------------------------------
+# Step 2: Create a Document from the JSON Object
 # -------------------------------
+content = data.get('question', "").strip()
+if not content:
+    raise ValueError("The 'question' field in the JSON object cannot be empty.")
+# Create a document and add metadata from the JSON object
+document = Document(
+    page_content=content,
+    metadata=data
+)
+# Wrap the document in a list to work with LangChain (as it expects a list of documents)
+docs = [document]
 # -------------------------------
 # Step 3: Set up HuggingFace Embeddings and FAISS VectorStore
 # Save the FAISS index locally
 vector_store.save_local("faiss_index")
+#print("✅ FAISS index created and saved locally.")
 # -------------------------------
 # Step 4: Create Retriever Tool (for use in LangChain)