Spaces:
Sleeping
Sleeping
Update agent.py
Browse files
agent.py
CHANGED
@@ -367,8 +367,57 @@ def reverse_text(text: str) -> str:
|
|
367 |
"""Fix reversed text patterns"""
|
368 |
return text[::-1].replace("\\", "").strip() if text.startswith(('.', ',')) else text
|
369 |
|
|
|
370 |
# 3. Unified Document Creation
|
371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
372 |
|
373 |
|
374 |
# 4. Vector Store Integration
|
@@ -392,6 +441,20 @@ class MyVector_Store:
|
|
392 |
# Process JSON data and create documents
|
393 |
# -----------------------------
|
394 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
|
396 |
# -----------------------------
|
397 |
# Initialize embedding model
|
|
|
367 |
"""Fix reversed text patterns"""
|
368 |
return text[::-1].replace("\\", "").strip() if text.startswith(('.', ',')) else text
|
369 |
|
370 |
+
|
371 |
# 3. Unified Document Creation
|
372 |
|
373 |
+
def create_documents(data_source: str, data: list) -> list:
|
374 |
+
"""Handle both Gradio chat and JSON questions"""
|
375 |
+
docs = []
|
376 |
+
|
377 |
+
for item in data:
|
378 |
+
content = ""
|
379 |
+
# Process different data sources
|
380 |
+
if data_source == "json":
|
381 |
+
raw_question = item.get("question", "")
|
382 |
+
content = raw_question # Adjust as per your content processing logic
|
383 |
+
else:
|
384 |
+
print(f"Skipping invalid data source: {data_source}")
|
385 |
+
continue
|
386 |
+
|
387 |
+
# Ensure metadata type safety
|
388 |
+
metadata = {
|
389 |
+
"task_id": str(item.get("task_id", "")),
|
390 |
+
"level": str(item.get("Level", "")),
|
391 |
+
"file_name": str(item.get("file_name", ""))
|
392 |
+
}
|
393 |
+
|
394 |
+
# Check if content is non-empty
|
395 |
+
if content.strip(): # Only append non-empty content
|
396 |
+
docs.append(Document(page_content=content, metadata=metadata))
|
397 |
+
else:
|
398 |
+
print(f"Skipping invalid entry with empty content: {item}")
|
399 |
+
|
400 |
+
return docs
|
401 |
+
|
402 |
+
# Path to your data.json
|
403 |
+
file_path = "/home/wendy/Downloads/data.json"
|
404 |
+
|
405 |
+
def load_data(file_path: str) -> list[dict]:
|
406 |
+
"""Safe JSON data loading with error handling"""
|
407 |
+
if not os.path.exists(file_path):
|
408 |
+
raise FileNotFoundError(f"Data file not found: {file_path}")
|
409 |
+
|
410 |
+
if not file_path.endswith('.json'):
|
411 |
+
raise ValueError("Invalid file format. Only JSON files supported")
|
412 |
+
|
413 |
+
try:
|
414 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
415 |
+
return json.load(f)
|
416 |
+
except json.JSONDecodeError:
|
417 |
+
raise ValueError("Invalid JSON format in data file")
|
418 |
+
except Exception as e:
|
419 |
+
raise RuntimeError(f"Error loading data: {str(e)}")
|
420 |
+
|
421 |
|
422 |
|
423 |
# 4. Vector Store Integration
|
|
|
441 |
# Process JSON data and create documents
|
442 |
# -----------------------------
|
443 |
|
444 |
+
file_path = "/home/wendy/Downloads/data.json"
|
445 |
+
|
446 |
+
try:
|
447 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
448 |
+
data = json.load(f)
|
449 |
+
print(data)
|
450 |
+
except FileNotFoundError as e:
|
451 |
+
print(f"Error: {e}")
|
452 |
+
except json.JSONDecodeError as e:
|
453 |
+
print(f"Error decoding JSON: {e}")
|
454 |
+
|
455 |
+
docs = create_documents("json", data)
|
456 |
+
texts = [doc.page_content for doc in docs]
|
457 |
+
|
458 |
|
459 |
# -----------------------------
|
460 |
# Initialize embedding model
|