wt002 commited on
Commit
00163ca
·
verified ·
1 Parent(s): 57de9d7

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +44 -31
agent.py CHANGED
@@ -366,42 +366,55 @@ class MyVectorStore:
366
  # -----------------------------
367
  # Define the URL where the JSON file is hosted
368
 
369
- import json
 
370
  from langchain.schema import Document
371
- from typing import Any
372
-
373
- def process_content(raw_content: Any) -> str:
374
- """Convert any input to a clean string, handling lists and reversed text"""
375
- # Flatten nested lists
376
- def flatten(nested):
377
- if isinstance(nested, list):
378
- return " ".join(flatten(e) for e in nested)
379
- return str(nested)
380
-
381
- # Convert to string and clean
382
- text = flatten(raw_content)
383
-
384
- # Fix reversed text patterns like ".rewsna..."
385
- if text.startswith(('.', ',')):
386
- text = text[::-1].replace("\\", "").strip()
387
-
388
- return text
389
 
390
- with open("questions.json", "r", encoding="utf-8") as f:
391
- data = json.load(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
 
393
- docs = [
394
- Document(
395
- page_content=process_content(item["question"]),
396
- metadata={
397
- "task_id": item["task_id"],
398
- "level": str(item.get("Level", "")), # Force string conversion
399
  "file_name": str(item.get("file_name", ""))
400
  }
401
- )
402
- for item in data
403
- if "question" in item # Skip items without questions
404
- ]
405
 
406
  texts = [doc.page_content for doc in docs]
407
 
 
366
  # -----------------------------
367
  # Define the URL where the JSON file is hosted
368
 
369
+ from typing import TypedDict, Annotated, List
370
+ import gradio as gr
371
  from langchain.schema import Document
372
+ import json
373
+
374
+ # 1. Type-Checked State for Gradio
375
+ class ChatState(TypedDict):
376
+ messages: Annotated[
377
+ List[str],
378
+ gr.State(render=False),
379
+ "Stores chat history as list of strings"
380
+ ]
 
 
 
 
 
 
 
 
 
381
 
382
+ # 2. Content Processing Utilities
383
+ def process_content(raw_content) -> str:
384
+ """Convert any input to a clean string"""
385
+ if isinstance(raw_content, list):
386
+ return " ".join(str(item) for item in raw_content)
387
+ return str(raw_content)
388
+
389
+ def reverse_text(text: str) -> str:
390
+ """Fix reversed text patterns"""
391
+ return text[::-1].replace("\\", "").strip() if text.startswith(('.', ',')) else text
392
+
393
+ # 3. Unified Document Creation
394
+ def create_documents(data_source: str, data: List[dict]) -> List[Document]:
395
+ """Handle both Gradio chat and JSON questions"""
396
+ docs = []
397
+
398
+ for item in data:
399
+ # Process different data sources
400
+ if data_source == "gradio":
401
+ content = "\n".join(item["messages"])
402
+ elif data_source == "json":
403
+ raw_question = item.get("question", "")
404
+ content = reverse_text(process_content(raw_question))
405
+ else:
406
+ continue
407
 
408
+ # Ensure metadata type safety
409
+ metadata = {
410
+ "task_id": str(item.get("task_id", "")),
411
+ "level": str(item.get("Level", "")),
 
 
412
  "file_name": str(item.get("file_name", ""))
413
  }
414
+
415
+ docs.append(Document(page_content=content, metadata=metadata))
416
+
417
+ return docs
418
 
419
  texts = [doc.page_content for doc in docs]
420