Spaces:
Sleeping
Sleeping
Update agent.py
Browse files
agent.py
CHANGED
@@ -366,42 +366,55 @@ class MyVectorStore:
|
|
366 |
# -----------------------------
|
367 |
# Define the URL where the JSON file is hosted
|
368 |
|
369 |
-
import
|
|
|
370 |
from langchain.schema import Document
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
# Convert to string and clean
|
382 |
-
text = flatten(raw_content)
|
383 |
-
|
384 |
-
# Fix reversed text patterns like ".rewsna..."
|
385 |
-
if text.startswith(('.', ',')):
|
386 |
-
text = text[::-1].replace("\\", "").strip()
|
387 |
-
|
388 |
-
return text
|
389 |
|
390 |
-
|
391 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
"task_id": item["task_id"],
|
398 |
-
"level": str(item.get("Level", "")), # Force string conversion
|
399 |
"file_name": str(item.get("file_name", ""))
|
400 |
}
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
|
406 |
texts = [doc.page_content for doc in docs]
|
407 |
|
|
|
366 |
# -----------------------------
|
367 |
# Define the URL where the JSON file is hosted
|
368 |
|
369 |
+
from typing import TypedDict, Annotated, List
|
370 |
+
import gradio as gr
|
371 |
from langchain.schema import Document
|
372 |
+
import json
|
373 |
+
|
374 |
+
# 1. Type-Checked State for Gradio
|
375 |
+
class ChatState(TypedDict):
|
376 |
+
messages: Annotated[
|
377 |
+
List[str],
|
378 |
+
gr.State(render=False),
|
379 |
+
"Stores chat history as list of strings"
|
380 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
381 |
|
382 |
+
# 2. Content Processing Utilities
|
383 |
+
def process_content(raw_content) -> str:
|
384 |
+
"""Convert any input to a clean string"""
|
385 |
+
if isinstance(raw_content, list):
|
386 |
+
return " ".join(str(item) for item in raw_content)
|
387 |
+
return str(raw_content)
|
388 |
+
|
389 |
+
def reverse_text(text: str) -> str:
|
390 |
+
"""Fix reversed text patterns"""
|
391 |
+
return text[::-1].replace("\\", "").strip() if text.startswith(('.', ',')) else text
|
392 |
+
|
393 |
+
# 3. Unified Document Creation
|
394 |
+
def create_documents(data_source: str, data: List[dict]) -> List[Document]:
|
395 |
+
"""Handle both Gradio chat and JSON questions"""
|
396 |
+
docs = []
|
397 |
+
|
398 |
+
for item in data:
|
399 |
+
# Process different data sources
|
400 |
+
if data_source == "gradio":
|
401 |
+
content = "\n".join(item["messages"])
|
402 |
+
elif data_source == "json":
|
403 |
+
raw_question = item.get("question", "")
|
404 |
+
content = reverse_text(process_content(raw_question))
|
405 |
+
else:
|
406 |
+
continue
|
407 |
|
408 |
+
# Ensure metadata type safety
|
409 |
+
metadata = {
|
410 |
+
"task_id": str(item.get("task_id", "")),
|
411 |
+
"level": str(item.get("Level", "")),
|
|
|
|
|
412 |
"file_name": str(item.get("file_name", ""))
|
413 |
}
|
414 |
+
|
415 |
+
docs.append(Document(page_content=content, metadata=metadata))
|
416 |
+
|
417 |
+
return docs
|
418 |
|
419 |
texts = [doc.page_content for doc in docs]
|
420 |
|