Spaces:
Running
Running
Update agent.py
Browse files
agent.py
CHANGED
@@ -29,7 +29,6 @@ from langchain.schema import Document
|
|
29 |
import requests
|
30 |
import json
|
31 |
#from langchain.embeddings import HuggingFaceEmbeddings
|
32 |
-
from langchain.vectorstores import FAISS
|
33 |
from langchain.schema import Document
|
34 |
#from langchain.agents import create_retriever_tool
|
35 |
|
@@ -149,18 +148,33 @@ sys_msg = SystemMessage(content=system_prompt)
|
|
149 |
# -------------------------------
|
150 |
# Step 1: Load JSON data from URL
|
151 |
# -------------------------------
|
152 |
-
|
153 |
-
response = requests.get(
|
154 |
|
155 |
# Ensure the request was successful
|
156 |
if response.status_code != 200:
|
157 |
-
raise Exception(f"Failed to load
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
-
# Parse the JSON content
|
160 |
-
data = response.json()
|
161 |
|
162 |
-
# Make sure we have the correct structure in the JSON
|
163 |
-
assert isinstance(data, list), "The JSON should contain a list of documents."
|
164 |
|
165 |
# -------------------------------
|
166 |
# Step 2: Prepare documents
|
|
|
29 |
import requests
|
30 |
import json
|
31 |
#from langchain.embeddings import HuggingFaceEmbeddings
|
|
|
32 |
from langchain.schema import Document
|
33 |
#from langchain.agents import create_retriever_tool
|
34 |
|
|
|
148 |
# -------------------------------
|
149 |
# Step 1: Load JSON data from URL
|
150 |
# -------------------------------
|
151 |
+
jsonl_url = "https://example.com/documents.jsonl" # Replace with your actual JSONL URL
|
152 |
+
response = requests.get(jsonl_url)
|
153 |
|
154 |
# Ensure the request was successful
|
155 |
if response.status_code != 200:
|
156 |
+
raise Exception(f"Failed to load JSONL from {jsonl_url}. Status code: {response.status_code}")
|
157 |
+
|
158 |
+
|
159 |
+
# Read and parse the JSONL file line by line
|
160 |
+
docs = []
|
161 |
+
for line in response.text.splitlines():
|
162 |
+
try:
|
163 |
+
doc = json.loads(line) # Parse each line as a separate JSON object
|
164 |
+
content = doc.get('content', "").strip()
|
165 |
+
if not content:
|
166 |
+
continue # Skip documents with no content
|
167 |
+
|
168 |
+
# Add unique ID to each document
|
169 |
+
doc['id'] = str(uuid.uuid4())
|
170 |
+
|
171 |
+
# Convert the document into a Document object
|
172 |
+
docs.append(Document(page_content=content, metadata=doc))
|
173 |
+
|
174 |
+
except json.JSONDecodeError:
|
175 |
+
print("Skipping malformed JSONL line.")
|
176 |
|
|
|
|
|
177 |
|
|
|
|
|
178 |
|
179 |
# -------------------------------
|
180 |
# Step 2: Prepare documents
|