Spaces:
Sleeping
Sleeping
Update agent.py
Browse files
agent.py
CHANGED
@@ -368,18 +368,18 @@ def reverse_text(text: str) -> str:
|
|
368 |
return text[::-1].replace("\\", "").strip() if text.startswith(('.', ',')) else text
|
369 |
|
370 |
# 3. Unified Document Creation
|
371 |
-
def create_documents(data_source: str, data:
|
372 |
"""Handle both Gradio chat and JSON questions"""
|
373 |
docs = []
|
374 |
|
375 |
for item in data:
|
|
|
376 |
# Process different data sources
|
377 |
-
if data_source == "
|
378 |
-
content = "\n".join(item["messages"])
|
379 |
-
elif data_source == "json":
|
380 |
raw_question = item.get("question", "")
|
381 |
-
content =
|
382 |
else:
|
|
|
383 |
continue
|
384 |
|
385 |
# Ensure metadata type safety
|
@@ -389,14 +389,28 @@ def create_documents(data_source: str, data: List[dict]) -> List[Document]:
|
|
389 |
"file_name": str(item.get("file_name", ""))
|
390 |
}
|
391 |
|
392 |
-
#
|
393 |
if content.strip(): # Only append non-empty content
|
394 |
docs.append(Document(page_content=content, metadata=metadata))
|
395 |
else:
|
396 |
-
print(f"Skipping invalid entry: {item}")
|
397 |
|
398 |
return docs
|
399 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
400 |
|
401 |
|
402 |
|
@@ -420,7 +434,7 @@ class MyVector_Store:
|
|
420 |
# -----------------------------
|
421 |
# Process JSON data and create documents
|
422 |
# -----------------------------
|
423 |
-
file_path = "/home/wendy/
|
424 |
|
425 |
try:
|
426 |
with open(file_path, "r", encoding="utf-8") as f:
|
|
|
368 |
return text[::-1].replace("\\", "").strip() if text.startswith(('.', ',')) else text
|
369 |
|
370 |
# 3. Unified Document Creation
|
371 |
+
def create_documents(data_source: str, data: list) -> list:
|
372 |
"""Handle both Gradio chat and JSON questions"""
|
373 |
docs = []
|
374 |
|
375 |
for item in data:
|
376 |
+
content = ""
|
377 |
# Process different data sources
|
378 |
+
if data_source == "json":
|
|
|
|
|
379 |
raw_question = item.get("question", "")
|
380 |
+
content = raw_question # Adjust as per your content processing logic
|
381 |
else:
|
382 |
+
print(f"Skipping invalid data source: {data_source}")
|
383 |
continue
|
384 |
|
385 |
# Ensure metadata type safety
|
|
|
389 |
"file_name": str(item.get("file_name", ""))
|
390 |
}
|
391 |
|
392 |
+
# Check if content is non-empty
|
393 |
if content.strip(): # Only append non-empty content
|
394 |
docs.append(Document(page_content=content, metadata=metadata))
|
395 |
else:
|
396 |
+
print(f"Skipping invalid entry with empty content: {item}")
|
397 |
|
398 |
return docs
|
399 |
|
400 |
+
# Path to your data.json
|
401 |
+
file_path = "/home/wendy/my_hf_agent_course_projects/src/data.json"
|
402 |
+
|
403 |
+
# Check if the file exists
|
404 |
+
if os.path.exists(file_path):
|
405 |
+
# Load the data from the JSON file
|
406 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
407 |
+
data = json.load(f)
|
408 |
+
|
409 |
+
# Create documents from the loaded data
|
410 |
+
docs = create_documents("json", data)
|
411 |
+
print(f"Documents created: {len(docs)}")
|
412 |
+
else:
|
413 |
+
print(f"Error: File {file_path} not found.")
|
414 |
|
415 |
|
416 |
|
|
|
434 |
# -----------------------------
|
435 |
# Process JSON data and create documents
|
436 |
# -----------------------------
|
437 |
+
file_path = "/home/wendy/Downloads/questions.json"
|
438 |
|
439 |
try:
|
440 |
with open(file_path, "r", encoding="utf-8") as f:
|