Spaces:
Sleeping
Sleeping
Update agent.py
Browse files
agent.py
CHANGED
@@ -368,18 +368,23 @@ class MyVectorStore:
|
|
368 |
|
369 |
import json
|
370 |
from langchain.schema import Document
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
|
|
379 |
|
380 |
-
#
|
|
|
|
|
|
|
381 |
if text.startswith(('.', ',')):
|
382 |
-
|
|
|
383 |
return text
|
384 |
|
385 |
with open("questions.json", "r", encoding="utf-8") as f:
|
@@ -387,15 +392,15 @@ with open("questions.json", "r", encoding="utf-8") as f:
|
|
387 |
|
388 |
docs = [
|
389 |
Document(
|
390 |
-
page_content=
|
391 |
metadata={
|
392 |
"task_id": item["task_id"],
|
393 |
-
"level": item.get("Level", "
|
394 |
-
"file_name": item.get("file_name", "")
|
395 |
}
|
396 |
)
|
397 |
for item in data
|
398 |
-
if "question" in item
|
399 |
]
|
400 |
|
401 |
texts = [doc.page_content for doc in docs]
|
|
|
368 |
|
369 |
import json
|
370 |
from langchain.schema import Document
|
371 |
+
from typing import Any
|
372 |
+
|
373 |
+
def process_content(raw_content: Any) -> str:
|
374 |
+
"""Convert any input to a clean string, handling lists and reversed text"""
|
375 |
+
# Flatten nested lists
|
376 |
+
def flatten(nested):
|
377 |
+
if isinstance(nested, list):
|
378 |
+
return " ".join(flatten(e) for e in nested)
|
379 |
+
return str(nested)
|
380 |
|
381 |
+
# Convert to string and clean
|
382 |
+
text = flatten(raw_content)
|
383 |
+
|
384 |
+
# Fix reversed text patterns like ".rewsna..."
|
385 |
if text.startswith(('.', ',')):
|
386 |
+
text = text[::-1].replace("\\", "").strip()
|
387 |
+
|
388 |
return text
|
389 |
|
390 |
with open("questions.json", "r", encoding="utf-8") as f:
|
|
|
392 |
|
393 |
docs = [
|
394 |
Document(
|
395 |
+
page_content=process_content(item["question"]),
|
396 |
metadata={
|
397 |
"task_id": item["task_id"],
|
398 |
+
"level": str(item.get("Level", "")), # Force string conversion
|
399 |
+
"file_name": str(item.get("file_name", ""))
|
400 |
}
|
401 |
)
|
402 |
for item in data
|
403 |
+
if "question" in item # Skip items without questions
|
404 |
]
|
405 |
|
406 |
texts = [doc.page_content for doc in docs]
|