wt002 commited on
Commit
57de9d7
·
verified ·
1 Parent(s): 7f0bbf2

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +19 -14
agent.py CHANGED
@@ -368,18 +368,23 @@ class MyVectorStore:
368
 
369
  import json
370
  from langchain.schema import Document
371
-
372
- def get_question(raw_question) -> str:
373
- """Convert list to string and fix reversed text"""
374
- # Handle lists (join with spaces)
375
- if isinstance(raw_question, list):
376
- text = " ".join(raw_question)
377
- else:
378
- text = str(raw_question)
 
379
 
380
- # Fix reversed text like ".rewsna..."
 
 
 
381
  if text.startswith(('.', ',')):
382
- return text[::-1].replace("\\", "").strip()
 
383
  return text
384
 
385
  with open("questions.json", "r", encoding="utf-8") as f:
@@ -387,15 +392,15 @@ with open("questions.json", "r", encoding="utf-8") as f:
387
 
388
  docs = [
389
  Document(
390
- page_content=get_question(item["question"]),
391
  metadata={
392
  "task_id": item["task_id"],
393
- "level": item.get("Level", "unknown"),
394
- "file_name": item.get("file_name", "")
395
  }
396
  )
397
  for item in data
398
- if "question" in item and item["question"] # Skip missing/empty
399
  ]
400
 
401
  texts = [doc.page_content for doc in docs]
 
368
 
369
  import json
370
  from langchain.schema import Document
371
+ from typing import Any
372
+
373
+ def process_content(raw_content: Any) -> str:
374
+ """Convert any input to a clean string, handling lists and reversed text"""
375
+ # Flatten nested lists
376
+ def flatten(nested):
377
+ if isinstance(nested, list):
378
+ return " ".join(flatten(e) for e in nested)
379
+ return str(nested)
380
 
381
+ # Convert to string and clean
382
+ text = flatten(raw_content)
383
+
384
+ # Fix reversed text patterns like ".rewsna..."
385
  if text.startswith(('.', ',')):
386
+ text = text[::-1].replace("\\", "").strip()
387
+
388
  return text
389
 
390
  with open("questions.json", "r", encoding="utf-8") as f:
 
392
 
393
  docs = [
394
  Document(
395
+ page_content=process_content(item["question"]),
396
  metadata={
397
  "task_id": item["task_id"],
398
+ "level": str(item.get("Level", "")), # Force string conversion
399
+ "file_name": str(item.get("file_name", ""))
400
  }
401
  )
402
  for item in data
403
+ if "question" in item # Skip items without questions
404
  ]
405
 
406
  texts = [doc.page_content for doc in docs]