wt002 commited on
Commit
59d0991
·
verified ·
1 Parent(s): bd864cc

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +22 -8
agent.py CHANGED
@@ -368,18 +368,18 @@ def reverse_text(text: str) -> str:
368
  return text[::-1].replace("\\", "").strip() if text.startswith(('.', ',')) else text
369
 
370
  # 3. Unified Document Creation
371
- def create_documents(data_source: str, data: List[dict]) -> List[Document]:
372
  """Handle both Gradio chat and JSON questions"""
373
  docs = []
374
 
375
  for item in data:
 
376
  # Process different data sources
377
- if data_source == "gradio":
378
- content = "\n".join(item["messages"])
379
- elif data_source == "json":
380
  raw_question = item.get("question", "")
381
- content = reverse_text(process_content(raw_question))
382
  else:
 
383
  continue
384
 
385
  # Ensure metadata type safety
@@ -389,14 +389,28 @@ def create_documents(data_source: str, data: List[dict]) -> List[Document]:
389
  "file_name": str(item.get("file_name", ""))
390
  }
391
 
392
- # Make sure that the content and metadata are valid
393
  if content.strip(): # Only append non-empty content
394
  docs.append(Document(page_content=content, metadata=metadata))
395
  else:
396
- print(f"Skipping invalid entry: {item}")
397
 
398
  return docs
399
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
 
401
 
402
 
@@ -420,7 +434,7 @@ class MyVector_Store:
420
  # -----------------------------
421
  # Process JSON data and create documents
422
  # -----------------------------
423
- file_path = "/home/wendy/my_hf_agent_course_projects/src/data.json"
424
 
425
  try:
426
  with open(file_path, "r", encoding="utf-8") as f:
 
368
  return text[::-1].replace("\\", "").strip() if text.startswith(('.', ',')) else text
369
 
370
  # 3. Unified Document Creation
371
+ def create_documents(data_source: str, data: list) -> list:
372
  """Handle both Gradio chat and JSON questions"""
373
  docs = []
374
 
375
  for item in data:
376
+ content = ""
377
  # Process different data sources
378
+ if data_source == "json":
 
 
379
  raw_question = item.get("question", "")
380
+ content = raw_question # Adjust as per your content processing logic
381
  else:
382
+ print(f"Skipping invalid data source: {data_source}")
383
  continue
384
 
385
  # Ensure metadata type safety
 
389
  "file_name": str(item.get("file_name", ""))
390
  }
391
 
392
+ # Check if content is non-empty
393
  if content.strip(): # Only append non-empty content
394
  docs.append(Document(page_content=content, metadata=metadata))
395
  else:
396
+ print(f"Skipping invalid entry with empty content: {item}")
397
 
398
  return docs
399
 
400
+ # Path to your data.json
401
+ file_path = "/home/wendy/my_hf_agent_course_projects/src/data.json"
402
+
403
+ # Check if the file exists
404
+ if os.path.exists(file_path):
405
+ # Load the data from the JSON file
406
+ with open(file_path, "r", encoding="utf-8") as f:
407
+ data = json.load(f)
408
+
409
+ # Create documents from the loaded data
410
+ docs = create_documents("json", data)
411
+ print(f"Documents created: {len(docs)}")
412
+ else:
413
+ print(f"Error: File {file_path} not found.")
414
 
415
 
416
 
 
434
  # -----------------------------
435
  # Process JSON data and create documents
436
  # -----------------------------
437
+ file_path = "/home/wendy/Downloads/questions.json"
438
 
439
  try:
440
  with open(file_path, "r", encoding="utf-8") as f: