wt002 commited on
Commit
f740521
·
verified ·
1 Parent(s): 850193d

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +8 -17
agent.py CHANGED
@@ -370,35 +370,26 @@ import json
370
  from langchain.schema import Document
371
 
372
  def reverse_text(text: str) -> str:
373
- return text[::-1].replace("\\", "") # Handle escaped quotes
 
374
 
375
- # Load the JSON file
376
  with open("questions.json", "r", encoding="utf-8") as f:
377
  data = json.load(f)
378
 
379
- # Convert each question into a Document
380
  docs = [
381
  Document(
382
- page_content=(
383
- str(reverse_text(item["question"]))
384
- if isinstance(item["question"], (list, bytes))
385
- else reverse_text(item["question"])
386
- if item["question"].startswith(('.', ','))
387
- else item["question"]
388
- ),
389
  metadata={
390
  "task_id": item["task_id"],
391
- "level": item["Level"],
392
- "file_name": item["file_name"] # Added from your URL example
393
  }
394
  )
395
- for item in data
396
- if "question" in item and item["question"] # Skip empty questions
397
  ]
398
 
399
-
400
- # Now extract texts
401
- texts = [doc.page_content for doc in docs]
402
 
403
  # Initialize the embedding model
404
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
370
  from langchain.schema import Document
371
 
372
  def reverse_text(text: str) -> str:
373
+ """Fix backward questions like '.rewsna...'"""
374
+ return text[::-1].replace("\\", "") if text.startswith(('.', ',')) else text
375
 
 
376
  with open("questions.json", "r", encoding="utf-8") as f:
377
  data = json.load(f)
378
 
 
379
  docs = [
380
  Document(
381
+ page_content=reverse_text(str(item["question"])), # Ensure string + fix reversed text
 
 
 
 
 
 
382
  metadata={
383
  "task_id": item["task_id"],
384
+ "level": item.get("Level", "Unknown"),
385
+ "file_name": item.get("file_name", "")
386
  }
387
  )
388
+ for item in data
389
+ if "question" in item and item["question"] # Skip missing/empty questions
390
  ]
391
 
392
+ texts = [doc.page_content for doc in docs] # Now this will work
 
 
393
 
394
  # Initialize the embedding model
395
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")