wt002 commited on
Commit
73ff364
·
verified ·
1 Parent(s): 8a16fdc

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +63 -0
agent.py CHANGED
@@ -367,8 +367,57 @@ def reverse_text(text: str) -> str:
367
  """Fix reversed text patterns"""
368
  return text[::-1].replace("\\", "").strip() if text.startswith(('.', ',')) else text
369
 
 
370
  # 3. Unified Document Creation
371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
 
373
 
374
  # 4. Vector Store Integration
@@ -392,6 +441,20 @@ class MyVector_Store:
392
  # Process JSON data and create documents
393
  # -----------------------------
394
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
395
 
396
  # -----------------------------
397
  # Initialize embedding model
 
367
  """Fix reversed text patterns"""
368
  return text[::-1].replace("\\", "").strip() if text.startswith(('.', ',')) else text
369
 
370
+
371
  # 3. Unified Document Creation
372
 
373
+ def create_documents(data_source: str, data: list) -> list:
374
+ """Handle both Gradio chat and JSON questions"""
375
+ docs = []
376
+
377
+ for item in data:
378
+ content = ""
379
+ # Process different data sources
380
+ if data_source == "json":
381
+ raw_question = item.get("question", "")
382
+ content = raw_question # Adjust as per your content processing logic
383
+ else:
384
+ print(f"Skipping invalid data source: {data_source}")
385
+ continue
386
+
387
+ # Ensure metadata type safety
388
+ metadata = {
389
+ "task_id": str(item.get("task_id", "")),
390
+ "level": str(item.get("Level", "")),
391
+ "file_name": str(item.get("file_name", ""))
392
+ }
393
+
394
+ # Check if content is non-empty
395
+ if content.strip(): # Only append non-empty content
396
+ docs.append(Document(page_content=content, metadata=metadata))
397
+ else:
398
+ print(f"Skipping invalid entry with empty content: {item}")
399
+
400
+ return docs
401
+
402
+ # Path to your data.json
403
+ file_path = "/home/wendy/Downloads/data.json"
404
+
405
+ def load_data(file_path: str) -> list[dict]:
406
+ """Safe JSON data loading with error handling"""
407
+ if not os.path.exists(file_path):
408
+ raise FileNotFoundError(f"Data file not found: {file_path}")
409
+
410
+ if not file_path.endswith('.json'):
411
+ raise ValueError("Invalid file format. Only JSON files supported")
412
+
413
+ try:
414
+ with open(file_path, "r", encoding="utf-8") as f:
415
+ return json.load(f)
416
+ except json.JSONDecodeError:
417
+ raise ValueError("Invalid JSON format in data file")
418
+ except Exception as e:
419
+ raise RuntimeError(f"Error loading data: {str(e)}")
420
+
421
 
422
 
423
  # 4. Vector Store Integration
 
441
  # Process JSON data and create documents
442
  # -----------------------------
443
 
444
+ file_path = "/home/wendy/Downloads/data.json"
445
+
446
+ try:
447
+ with open(file_path, "r", encoding="utf-8") as f:
448
+ data = json.load(f)
449
+ print(data)
450
+ except FileNotFoundError as e:
451
+ print(f"Error: {e}")
452
+ except json.JSONDecodeError as e:
453
+ print(f"Error decoding JSON: {e}")
454
+
455
+ docs = create_documents("json", data)
456
+ texts = [doc.page_content for doc in docs]
457
+
458
 
459
  # -----------------------------
460
  # Initialize embedding model