Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- app.py +468 -164
- requirements.txt +2 -1
app.py
CHANGED
@@ -17,6 +17,8 @@ import pdfplumber
|
|
17 |
import docx
|
18 |
import speech_recognition as sr
|
19 |
import base64
|
|
|
|
|
20 |
|
21 |
from io import BytesIO, StringIO
|
22 |
from dotenv import load_dotenv
|
@@ -46,89 +48,124 @@ llm = AzureOpenAI(
|
|
46 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
47 |
|
48 |
# File parsing tool
|
49 |
-
def parse_file(file_url: str,
|
50 |
try:
|
51 |
-
#
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
# --- TXT ---
|
67 |
-
if file_type == ".txt":
|
68 |
-
text = content.decode(errors='ignore')
|
69 |
-
return f"Text File Content:\n{text[:3500]}"
|
70 |
-
|
71 |
-
# --- PDF ---
|
72 |
-
if file_type == ".pdf" and pdfplumber:
|
73 |
-
with pdfplumber.open(BytesIO(content)) as pdf:
|
74 |
-
text = "\n".join(page.extract_text() or "" for page in pdf.pages)
|
75 |
-
return f"PDF Content (first 3500 chars):\n{text[:3500]}"
|
76 |
-
|
77 |
-
# --- DOCX ---
|
78 |
-
if file_type == ".docx" and docx:
|
79 |
-
d = docx.Document(BytesIO(content))
|
80 |
-
text = "\n".join(p.text for p in d.paragraphs)
|
81 |
-
return f"DOCX Content (first 3500 chars):\n{text[:3500]}"
|
82 |
-
|
83 |
-
# --- MP3 (Audio to Text) ---
|
84 |
-
if file_type == ".mp3" and sr:
|
85 |
-
# Save MP3 to local
|
86 |
-
mp3_path = "temp.mp3"
|
87 |
-
with open(mp3_path, "wb") as f:
|
88 |
-
f.write(content)
|
89 |
-
try:
|
90 |
-
# Convert MP3 to WAV using pydub if available
|
91 |
-
wav_path = "temp.wav"
|
92 |
try:
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
except Exception:
|
98 |
-
audio_file = mp3_path # Try raw mp3 if conversion fails
|
99 |
-
|
100 |
-
recognizer = sr.Recognizer()
|
101 |
-
with sr.AudioFile(audio_file) as source:
|
102 |
-
audio = recognizer.record(source)
|
103 |
-
transcript = recognizer.recognize_google(audio)
|
104 |
-
# Clean up
|
105 |
-
if os.path.exists(mp3_path): os.remove(mp3_path)
|
106 |
-
if os.path.exists(wav_path): os.remove(wav_path)
|
107 |
-
return f"Audio Transcript:\n{transcript}"
|
108 |
-
except Exception as e:
|
109 |
-
if os.path.exists(mp3_path): os.remove(mp3_path)
|
110 |
-
if os.path.exists("temp.wav"): os.remove("temp.wav")
|
111 |
-
return f"Could not transcribe audio: {e}"
|
112 |
-
|
113 |
-
# --- Python file ---
|
114 |
-
if file_type == ".py":
|
115 |
-
text = content.decode(errors='ignore')
|
116 |
-
return f"Python Script Content:\n{text[:3500]}"
|
117 |
-
|
118 |
-
# --- Fallback ---
|
119 |
-
return f"File type {file_type} is not supported yet, or required package is missing."
|
120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
except Exception as e:
|
122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
|
|
|
|
|
|
|
|
124 |
# YouTube transcript tool
|
125 |
def get_youtube_transcript(url: str) -> str:
|
126 |
try:
|
127 |
-
video_id = url.split("v=")[-1]
|
128 |
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
129 |
return " ".join([e['text'] for e in transcript])
|
130 |
-
except
|
131 |
-
return "No transcript available
|
|
|
|
|
|
|
132 |
|
133 |
# ------------ DuckDuckGo Search and Extract -------------------------
|
134 |
def scrape_text_from_url(url: str, max_chars=4000) -> str:
|
@@ -142,35 +179,63 @@ def scrape_text_from_url(url: str, max_chars=4000) -> str:
|
|
142 |
except Exception as e:
|
143 |
return f"Could not scrape {url}: {e}"
|
144 |
|
145 |
-
def duckduckgo_search_and_scrape(
|
|
|
|
|
|
|
|
|
|
|
146 |
"""
|
147 |
-
|
|
|
|
|
|
|
|
|
148 |
"""
|
149 |
-
# Step 1: Search
|
150 |
ddg_spec = DuckDuckGoSearchToolSpec()
|
151 |
-
results = ddg_spec.duckduckgo_full_search(question)
|
152 |
-
|
|
|
153 |
return "No search results found."
|
154 |
-
|
155 |
-
|
156 |
-
|
|
|
157 |
href = entry.get("href", "")
|
158 |
-
if href:
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
return (
|
169 |
-
|
170 |
-
|
171 |
-
"
|
|
|
172 |
)
|
173 |
|
|
|
|
|
174 |
# ------------ Image Processing Tool Functions -------------------------
|
175 |
# MIME type mapping for images
|
176 |
MIME_MAP = {
|
@@ -226,9 +291,163 @@ def process_image(file_url: str, question: str) -> str:
|
|
226 |
except Exception as e:
|
227 |
return f"Vision API error: {e}"
|
228 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
# ------------------------------
|
230 |
# 2. BasicAgent Class Definition
|
231 |
# ------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
class BasicAgent:
|
233 |
def __init__(self):
|
234 |
"""Initialize the BasicAgent with all tools and agent workflow."""
|
@@ -246,29 +465,34 @@ class BasicAgent:
|
|
246 |
|
247 |
# Define routing instruction
|
248 |
self.routing_instruction = (
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
|
|
|
|
|
|
|
|
271 |
)
|
|
|
272 |
|
273 |
def _setup_tools(self):
|
274 |
"""Initialize all the tools."""
|
@@ -278,7 +502,10 @@ class BasicAgent:
|
|
278 |
self.ddg_tool = FunctionTool.from_defaults(
|
279 |
fn=duckduckgo_search_and_scrape,
|
280 |
name="web_search",
|
281 |
-
description=
|
|
|
|
|
|
|
282 |
)
|
283 |
|
284 |
self.image_processing_tool = FunctionTool.from_defaults(
|
@@ -289,16 +516,29 @@ class BasicAgent:
|
|
289 |
|
290 |
def _setup_agents(self):
|
291 |
"""Initialize all the specialized agents."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
# File Parsing ReActAgent
|
293 |
self.file_agent = ReActAgent(
|
294 |
name="file_agent",
|
295 |
description="Expert at reading and extracting info from files",
|
296 |
-
system_prompt="""You are
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
|
|
|
|
|
|
|
|
302 |
tools=[self.file_parser_tool],
|
303 |
llm=self.llm,
|
304 |
)
|
@@ -307,7 +547,17 @@ class BasicAgent:
|
|
307 |
self.youtube_agent = ReActAgent(
|
308 |
name="youtube_agent",
|
309 |
description="Expert at extracting info from YouTube videos by transcript.",
|
310 |
-
system_prompt="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
tools=[self.youtube_transcript_tool],
|
312 |
llm=self.llm,
|
313 |
)
|
@@ -315,26 +565,42 @@ class BasicAgent:
|
|
315 |
# DuckDuckGo Web Search ReActAgent
|
316 |
self.search_agent = ReActAgent(
|
317 |
name="websearch_agent",
|
318 |
-
description="Web search expert.
|
319 |
system_prompt=(
|
320 |
-
"You are
|
321 |
-
"
|
322 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
),
|
324 |
tools=[self.ddg_tool],
|
325 |
llm=self.llm,
|
326 |
)
|
327 |
|
|
|
328 |
# Image Agent
|
329 |
self.image_agent = ReActAgent(
|
330 |
name="image_agent",
|
331 |
description="Analyzes images and answers questions using the image_processing tool.",
|
332 |
system_prompt=(
|
333 |
-
"
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
),
|
339 |
tools=[self.image_processing_tool],
|
340 |
llm=self.llm,
|
@@ -343,22 +609,29 @@ class BasicAgent:
|
|
343 |
def _setup_workflow(self):
|
344 |
"""Initialize the agent workflow."""
|
345 |
self.agentflow = AgentWorkflow(
|
346 |
-
agents=[self.
|
347 |
-
|
|
|
|
|
|
|
|
|
348 |
)
|
349 |
-
|
350 |
-
def _extract_final_answer(self, response_text: str) -> str:
|
351 |
-
"""Extract the final answer from the response, removing 'FINAL ANSWER:' prefix if present."""
|
352 |
-
# Look for FINAL ANSWER: pattern and extract what comes after
|
353 |
-
if "FINAL ANSWER:" in response_text:
|
354 |
-
parts = response_text.split("FINAL ANSWER:", 1)
|
355 |
-
if len(parts) > 1:
|
356 |
-
return parts[1].strip()
|
357 |
|
358 |
-
# If no FINAL ANSWER: pattern found, return the full response stripped
|
359 |
-
return response_text.strip()
|
360 |
|
361 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
362 |
"""
|
363 |
Main method to process a question and return an answer.
|
364 |
This method will be called by the evaluation system.
|
@@ -366,7 +639,8 @@ class BasicAgent:
|
|
366 |
Args:
|
367 |
question (str): The question to answer
|
368 |
task_id (str, optional): Task ID for file retrieval
|
369 |
-
|
|
|
370 |
Returns:
|
371 |
str: The answer to the question
|
372 |
"""
|
@@ -375,28 +649,56 @@ class BasicAgent:
|
|
375 |
# The evaluation system should provide file info in the question or via task_id
|
376 |
enhanced_question = question
|
377 |
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
# You might need to adjust this logic based on how files are provided
|
383 |
-
enhanced_question += f"\nFile URL: {file_url}"
|
384 |
-
|
385 |
-
# Construct the full prompt with routing instructions
|
386 |
-
full_prompt = f"{self.routing_instruction}\n\nUser Question:\n{enhanced_question}"
|
387 |
|
388 |
-
# Run the agent workflow
|
389 |
-
response = asyncio.run(self.agentflow.run(user_msg=full_prompt))
|
390 |
|
391 |
-
#
|
392 |
-
|
393 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
394 |
return final_answer
|
395 |
|
396 |
except Exception as e:
|
397 |
print(f"Error in BasicAgent.__call__: {e}")
|
398 |
return f"Error processing question: {str(e)}"
|
399 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
400 |
# ------------------------------
|
401 |
# 3. Modified answer_questions_batch function (kept for reference)
|
402 |
# ------------------------------
|
@@ -412,10 +714,11 @@ async def answer_questions_batch(questions_data):
|
|
412 |
question = question_data.get("question", "")
|
413 |
file_name = question_data.get("file_name", "")
|
414 |
task_id = question_data.get("task_id", "")
|
|
|
415 |
|
416 |
try:
|
417 |
# Let the BasicAgent handle the question processing
|
418 |
-
answer = agent(question, task_id)
|
419 |
|
420 |
answers.append({
|
421 |
"task_id": task_id,
|
@@ -504,13 +807,14 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
504 |
try:
|
505 |
# Prepare enhanced question with file information if present
|
506 |
enhanced_question = question_text
|
507 |
-
if file_name:
|
508 |
file_type = Path(file_name).suffix.lower().split("?")[0]
|
509 |
file_url = f"{api_url}/files/{task_id}"
|
510 |
enhanced_question += f"\nThis question relates to the file at {file_url} (filename: {file_name} and file type: {file_type}). Please analyze its contents using the appropriate tool."
|
511 |
-
|
|
|
512 |
# Call the agent
|
513 |
-
submitted_answer = agent(enhanced_question, task_id)
|
514 |
|
515 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
516 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
|
|
17 |
import docx
|
18 |
import speech_recognition as sr
|
19 |
import base64
|
20 |
+
import tempfile
|
21 |
+
import re
|
22 |
|
23 |
from io import BytesIO, StringIO
|
24 |
from dotenv import load_dotenv
|
|
|
48 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
49 |
|
50 |
# File parsing tool
|
51 |
+
def parse_file(file_url: str, file_name: str) -> str:
|
52 |
try:
|
53 |
+
# Determine file type from file_name or URL
|
54 |
+
if len(file_name)>0:
|
55 |
+
file_type = Path(file_name).suffix.lower()
|
56 |
+
file_type = file_type.split("?")[0]
|
57 |
+
else:
|
58 |
+
file_type = None
|
59 |
+
# Remove query params
|
60 |
+
if file_type:
|
61 |
+
resp = requests.get(file_url, timeout=30)
|
62 |
+
resp.raise_for_status()
|
63 |
+
content = resp.content
|
64 |
+
|
65 |
+
# --- Excel Files ---
|
66 |
+
if file_type in [".xlsx", ".xls"]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
try:
|
68 |
+
df = pd.read_excel(BytesIO(content))
|
69 |
+
return f"Excel Content:\n{df.head(5).to_string(index=False)}" # Only first 5 rows
|
70 |
+
except Exception as e:
|
71 |
+
return f"Excel parsing error: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
+
# --- CSV Files ---
|
74 |
+
elif file_type == ".csv":
|
75 |
+
try:
|
76 |
+
df = pd.read_csv(BytesIO(content))
|
77 |
+
return f"CSV Content:\n{df.head(5).to_string(index=False)}" # Only first 5 rows
|
78 |
+
except Exception as e:
|
79 |
+
return f"CSV parsing error: {str(e)}"
|
80 |
+
|
81 |
+
# --- Text Files ---
|
82 |
+
elif file_type == ".txt":
|
83 |
+
text = content.decode(errors='ignore')
|
84 |
+
return f"Text Content:\n{text[:3500]}"
|
85 |
+
|
86 |
+
# --- PDF Files ---
|
87 |
+
elif file_type == ".pdf":
|
88 |
+
try:
|
89 |
+
with pdfplumber.open(BytesIO(content)) as pdf:
|
90 |
+
text = "\n".join(page.extract_text() or "" for page in pdf.pages[:3]) # First 3 pages
|
91 |
+
return f"PDF Content:\n{text[:3500]}"
|
92 |
+
except Exception as e:
|
93 |
+
return f"PDF parsing error: {str(e)}"
|
94 |
+
|
95 |
+
# --- DOCX Files ---
|
96 |
+
elif file_type == ".docx":
|
97 |
+
try:
|
98 |
+
d = docx.Document(BytesIO(content))
|
99 |
+
text = "\n".join(p.text for p in d.paragraphs[:50]) # First 50 paragraphs
|
100 |
+
return f"DOCX Content:\n{text[:3500]}"
|
101 |
+
except Exception as e:
|
102 |
+
return f"DOCX parsing error: {str(e)}"
|
103 |
+
|
104 |
+
# --- MP3 Files ---
|
105 |
+
elif file_type == ".mp3":
|
106 |
+
return transcribe_audio(content) # Use helper function
|
107 |
+
|
108 |
+
# --- Python Files ---
|
109 |
+
elif file_type == ".py":
|
110 |
+
text = content.decode(errors='ignore')
|
111 |
+
return f"Python Code:\n{text[:3500]}"
|
112 |
+
|
113 |
+
# --- Unsupported Types ---
|
114 |
+
else:
|
115 |
+
return f"Unsupported file type: {file_type}"
|
116 |
+
else:
|
117 |
+
return "No file type provided or file URL is invalid."
|
118 |
except Exception as e:
|
119 |
+
print(f"[parse_file] ERROR: {e}")
|
120 |
+
return f"File parsing failed: {str(e)}"
|
121 |
+
|
122 |
+
# Audio transcription helper
|
123 |
+
def transcribe_audio(content: bytes) -> str:
|
124 |
+
try:
|
125 |
+
# Create temp files
|
126 |
+
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as mp3_tmp:
|
127 |
+
mp3_tmp.write(content)
|
128 |
+
mp3_path = mp3_tmp.name
|
129 |
+
|
130 |
+
wav_path = mp3_path.replace(".mp3", ".wav")
|
131 |
+
|
132 |
+
# Convert to WAV
|
133 |
+
try:
|
134 |
+
from pydub import AudioSegment
|
135 |
+
audio = AudioSegment.from_mp3(mp3_path)
|
136 |
+
audio.export(wav_path, format="wav")
|
137 |
+
audio_file = wav_path
|
138 |
+
except ImportError:
|
139 |
+
audio_file = mp3_path # Fallback to MP3 if pydub not available
|
140 |
+
|
141 |
+
# Transcribe audio
|
142 |
+
recognizer = sr.Recognizer()
|
143 |
+
with sr.AudioFile(audio_file) as source:
|
144 |
+
audio = recognizer.record(source)
|
145 |
+
transcript = recognizer.recognize_google(audio)
|
146 |
+
|
147 |
+
# Cleanup
|
148 |
+
for path in [mp3_path, wav_path]:
|
149 |
+
if os.path.exists(path):
|
150 |
+
os.remove(path)
|
151 |
+
|
152 |
+
return f"Audio Transcript:\n{transcript}"
|
153 |
|
154 |
+
except Exception as e:
|
155 |
+
print(f"Audio transcription error: {e}")
|
156 |
+
return "Could not transcribe audio"
|
157 |
+
|
158 |
# YouTube transcript tool
|
159 |
def get_youtube_transcript(url: str) -> str:
|
160 |
try:
|
161 |
+
video_id = url.split("v=")[-1].split("&")[0] # Clean video ID
|
162 |
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
163 |
return " ".join([e['text'] for e in transcript])
|
164 |
+
except NoTranscriptFound:
|
165 |
+
return "No transcript available for this video"
|
166 |
+
except Exception as e:
|
167 |
+
return f"Error retrieving transcript: {str(e)}"
|
168 |
+
|
169 |
|
170 |
# ------------ DuckDuckGo Search and Extract -------------------------
|
171 |
def scrape_text_from_url(url: str, max_chars=4000) -> str:
|
|
|
179 |
except Exception as e:
|
180 |
return f"Could not scrape {url}: {e}"
|
181 |
|
182 |
+
def duckduckgo_search_and_scrape(
|
183 |
+
question: str,
|
184 |
+
max_results: int = 10,
|
185 |
+
min_chars: int = 400, # treat shorter pages as βunscrapableβ
|
186 |
+
max_chars: int = 4000 # final truncate length
|
187 |
+
) -> str:
|
188 |
"""
|
189 |
+
DuckDuckGo β scrape β fallback.
|
190 |
+
|
191 |
+
1. Try up to max_results links; return the first page that gives
|
192 |
+
β₯ min_chars of visible text.
|
193 |
+
2. If none succeed, compose an answer from the DDG result metadata.
|
194 |
"""
|
|
|
195 |
ddg_spec = DuckDuckGoSearchToolSpec()
|
196 |
+
results = ddg_spec.duckduckgo_full_search(question) or []
|
197 |
+
|
198 |
+
if not isinstance(results, list):
|
199 |
return "No search results found."
|
200 |
+
|
201 |
+
cleaned_pages = []
|
202 |
+
|
203 |
+
for entry in results[:max_results]:
|
204 |
href = entry.get("href", "")
|
205 |
+
if not href:
|
206 |
+
continue
|
207 |
+
|
208 |
+
# --- attempt to scrape ------------------------------------------------
|
209 |
+
text = scrape_text_from_url(href, max_chars=max_chars)
|
210 |
+
if text.startswith("Could not scrape") or len(text) < min_chars:
|
211 |
+
continue # treat as failure β try next result
|
212 |
+
# success!
|
213 |
+
return (
|
214 |
+
f"Here is content scraped from {href}:\n\n"
|
215 |
+
f"{text}\n\n"
|
216 |
+
"Based on this, please answer the original question."
|
217 |
+
)
|
218 |
+
|
219 |
+
# ---------------- fallback: build summary from DDG metadata --------------
|
220 |
+
if not results:
|
221 |
+
return "No search results found."
|
222 |
+
|
223 |
+
summary_lines = []
|
224 |
+
for idx, entry in enumerate(results[:max_results], start=1):
|
225 |
+
title = entry.get("title") or "Untitled result"
|
226 |
+
snippet = (entry.get("body") or "").replace("\n", " ")[:160]
|
227 |
+
href = entry.get("href")
|
228 |
+
summary_lines.append(f"{idx}. {title} β {snippet} ({href})")
|
229 |
+
|
230 |
return (
|
231 |
+
"I could not successfully scrape any of the top pages. "
|
232 |
+
"Here are the top DuckDuckGo results:\n\n"
|
233 |
+
+ "\n".join(summary_lines)
|
234 |
+
+ "\n\nPlease answer the original question using this list."
|
235 |
)
|
236 |
|
237 |
+
|
238 |
+
|
239 |
# ------------ Image Processing Tool Functions -------------------------
|
240 |
# MIME type mapping for images
|
241 |
MIME_MAP = {
|
|
|
291 |
except Exception as e:
|
292 |
return f"Vision API error: {e}"
|
293 |
|
294 |
+
|
295 |
+
# βββ formatter.py (or inline in your module) βββββββββββββββββββββββββ
|
296 |
+
from pydantic import BaseModel, ValidationError
|
297 |
+
from openai import AzureOpenAI
|
298 |
+
|
299 |
+
FALLBACK = "ANSWER_NOT_FOUND" # single source of truth, keep as plain text
|
300 |
+
|
301 |
+
SYSTEM_PROMPT = (
|
302 |
+
"You are an answer-formatter. I will give you:\n"
|
303 |
+
" β’ the user question\n"
|
304 |
+
" β’ a raw multi-agent trace that may contain Thoughts, Actions, tool "
|
305 |
+
" outputs, and possibly a FINAL ANSWER.\n\n"
|
306 |
+
|
307 |
+
"Your job:\n"
|
308 |
+
"1. Extract the true answer if it is present anywhere in the trace.\n"
|
309 |
+
"2. Output exactly one line in this template:\n"
|
310 |
+
" FINAL ANSWER: <ANSWER>\n\n"
|
311 |
+
"If the trace contains no FINAL ANSWER **but the question itself already contains enough information**, deduce the answer on your own."
|
312 |
+
"Return a FINAL ANSWER line in the usual format.\n"
|
313 |
+
|
314 |
+
|
315 |
+
"Rules for <ANSWER>:\n"
|
316 |
+
"β’ Number β digits only, no commas, no currency/percent signs unless "
|
317 |
+
" explicitly asked for.\n"
|
318 |
+
"β’ String β as short as possible, no articles unless required.\n"
|
319 |
+
"β’ List β comma-separated values following the above rules; if no order "
|
320 |
+
" is specified, sort alphabetically.\n"
|
321 |
+
"β’ If rounding or units are requested in the question, apply before "
|
322 |
+
" formatting and include the unit with **no preceding space**.\n\n"
|
323 |
+
f"If you cannot find a valid answer, output:\n"
|
324 |
+
f" FINAL ANSWER: {FALLBACK}\n\n"
|
325 |
+
|
326 |
+
"Examples (follow exactly)\n"
|
327 |
+
"###\n"
|
328 |
+
"Q: Reverse this word: elppa\n"
|
329 |
+
"Trace: (no FINAL ANSWER)\n"
|
330 |
+
"A: FINAL ANSWER: apple\n"
|
331 |
+
"Q: What is 2+3?\n"
|
332 |
+
"Trace: Thought: need a calculator\n"
|
333 |
+
"A: FINAL ANSWER: 5\n"
|
334 |
+
"Q: How many planets? Trace: β¦ FINAL ANSWER: 8\n"
|
335 |
+
"A: FINAL ANSWER: 8\n"
|
336 |
+
"###\n"
|
337 |
+
"Q: Give the colour. Trace: β¦ blue.\n"
|
338 |
+
"A: FINAL ANSWER: blue\n"
|
339 |
+
"###\n"
|
340 |
+
"Q: Name the three vowels. Trace: β¦ a, e, i, o, u.\n"
|
341 |
+
"A: FINAL ANSWER: a,e,i,o,u\n"
|
342 |
+
"###\n"
|
343 |
+
"Q: Whatβs the speed? (units requested) Trace: β¦ 3.0 m/s.\n"
|
344 |
+
"A: FINAL ANSWER: 3.0m/s\n"
|
345 |
+
"###\n"
|
346 |
+
"Q: Any answer? Trace: β¦ tool failure β¦\n"
|
347 |
+
f"A: FINAL ANSWER: {FALLBACK}"
|
348 |
+
)
|
349 |
+
|
350 |
+
|
351 |
+
class Result(BaseModel):
|
352 |
+
final_answer: str
|
353 |
+
|
354 |
+
|
355 |
+
def format_final_answer(question: str,
|
356 |
+
raw_trace: str,
|
357 |
+
*,
|
358 |
+
api_key: str,
|
359 |
+
api_version: str,
|
360 |
+
endpoint: str,
|
361 |
+
deployment: str,
|
362 |
+
temperature: float = 0.0) -> str:
|
363 |
+
"""
|
364 |
+
Second-pass LLM call that converts an unstructured agent trace into the
|
365 |
+
strict 'FINAL ANSWER: β¦' template. On any error returns the FALLBACK.
|
366 |
+
"""
|
367 |
+
try:
|
368 |
+
from openai import AzureOpenAI
|
369 |
+
client = AzureOpenAI(
|
370 |
+
api_key=api_key,
|
371 |
+
api_version=api_version,
|
372 |
+
azure_endpoint=endpoint,
|
373 |
+
)
|
374 |
+
|
375 |
+
messages = [
|
376 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
377 |
+
{"role": "user", "content": f"Question: {question}\nTrace: {raw_trace}"}
|
378 |
+
]
|
379 |
+
|
380 |
+
rsp = client.chat.completions.create(
|
381 |
+
model=deployment,
|
382 |
+
messages=messages,
|
383 |
+
temperature=temperature,
|
384 |
+
max_tokens=120,
|
385 |
+
)
|
386 |
+
|
387 |
+
out = rsp.choices[0].message.content.strip()
|
388 |
+
|
389 |
+
# Remove the label for downstream code (keep only the value)
|
390 |
+
if out.lower().startswith("final answer:"):
|
391 |
+
out = out.split(":", 1)[1].strip()
|
392 |
+
|
393 |
+
# basic schema check β non-empty string
|
394 |
+
Result(final_answer=out)
|
395 |
+
return out or FALLBACK
|
396 |
+
|
397 |
+
except (ValidationError, Exception):
|
398 |
+
return FALLBACK
|
399 |
+
|
400 |
# ------------------------------
|
401 |
# 2. BasicAgent Class Definition
|
402 |
# ------------------------------
|
403 |
+
REASONING_PROMPT = """
|
404 |
+
You are the Router-&-Reasoning-Agent.
|
405 |
+
|
406 |
+
NEVER output filler like βCould you please provide more contextβ.
|
407 |
+
|
408 |
+
If the answer is not already in the question, DELEGATE:
|
409 |
+
|
410 |
+
β’ Any external fact β WebSearch-Agent
|
411 |
+
β’ YouTube link β YouTube-Agent
|
412 |
+
β’ File link (PDFβ¦) β File-Agent
|
413 |
+
β’ Image link β Image-Agent
|
414 |
+
|
415 |
+
How to delegate
|
416 |
+
βββββββββββββββ
|
417 |
+
Call the special tool `handoff` **once** with JSON:
|
418 |
+
{"to_agent":"<agent_name>","reason":"<why>"}
|
419 |
+
|
420 |
+
When to answer directly
|
421 |
+
βββββββββββββββββββββββ
|
422 |
+
β’ The question already contains all information needed (e.g. reversed text,
|
423 |
+
Caesar cipher, mental arithmetic, pure logic).
|
424 |
+
β’ You are 100 % certain no external resource is required.
|
425 |
+
|
426 |
+
Output format
|
427 |
+
βββββββββββββ
|
428 |
+
β’ If you delegate β return the tool call only; the delegated agent will finish.
|
429 |
+
β’ If you answer yourself β one line:
|
430 |
+
FINAL ANSWER: <clean answer>
|
431 |
+
Follow the global rules (digits only, short strings, comma-lists, etc.).
|
432 |
+
|
433 |
+
Never
|
434 |
+
βββββ
|
435 |
+
β’ Never try to scrape the web or parse files yourself.
|
436 |
+
β’ Never add filler like βThinkingβ¦β or βAwaiting responseβ.
|
437 |
+
β’ Never answer if the question clearly needs a specialised agent.
|
438 |
+
|
439 |
+
Example
|
440 |
+
βββββββ
|
441 |
+
Example (self-contained)
|
442 |
+
Q: .rewsna eht sa "tfel" β¦ β reversed
|
443 |
+
A: FINAL ANSWER: right
|
444 |
+
Example (delegation)
|
445 |
+
Q: Who wrote the novel Dune?
|
446 |
+
A: Action: handoff
|
447 |
+
Action Input: {"to_agent":"websearch_agent","reason":"needs web"}
|
448 |
+
"""
|
449 |
+
|
450 |
+
|
451 |
class BasicAgent:
|
452 |
def __init__(self):
|
453 |
"""Initialize the BasicAgent with all tools and agent workflow."""
|
|
|
465 |
|
466 |
# Define routing instruction
|
467 |
self.routing_instruction = (
|
468 |
+
"You are a multi-agent AI system that routes questions **and** produces "
|
469 |
+
"the final answer.\n\n"
|
470 |
+
|
471 |
+
"β If the question already *contains* the needed information "
|
472 |
+
"(e.g. encoded, reversed, maths puzzle), **answer directly** β "
|
473 |
+
"no tools, no sub-agents.\n\n"
|
474 |
+
|
475 |
+
"You have four specialised agents:\n"
|
476 |
+
"β’ File-Agent β files (PDF, DOCX, β¦)\n"
|
477 |
+
"β’ YouTube-Agent β video transcripts\n"
|
478 |
+
"β’ WebSearch-Agent β fresh/general web info\n"
|
479 |
+
"β’ Image-Agent β vision questions\n\n"
|
480 |
+
|
481 |
+
"When you delegate, do **not** add commentary such as "
|
482 |
+
"'I will await the agent's response'.\n"
|
483 |
+
"When you answer yourself, end with:\n"
|
484 |
+
" FINAL ANSWER: <clean answer>\n\n"
|
485 |
+
|
486 |
+
"Example β (self-contained)\n"
|
487 |
+
'Q: "opposite of north"..."\n'
|
488 |
+
"A: FINAL ANSWER: south\n\n"
|
489 |
+
|
490 |
+
"Example β (delegation)\n"
|
491 |
+
"Q: Who wrote Dune?\n"
|
492 |
+
"A: Action: handoff\n"
|
493 |
+
'Action Input: {"to_agent":"websearch_agent","reason":"needs web"}\n'
|
494 |
)
|
495 |
+
|
496 |
|
497 |
def _setup_tools(self):
|
498 |
"""Initialize all the tools."""
|
|
|
502 |
self.ddg_tool = FunctionTool.from_defaults(
|
503 |
fn=duckduckgo_search_and_scrape,
|
504 |
name="web_search",
|
505 |
+
description=(
|
506 |
+
"Performs a DuckDuckGo search, attempts to scrape each top result, "
|
507 |
+
"and falls back to result metadata if scraping fails."
|
508 |
+
)
|
509 |
)
|
510 |
|
511 |
self.image_processing_tool = FunctionTool.from_defaults(
|
|
|
516 |
|
517 |
def _setup_agents(self):
|
518 |
"""Initialize all the specialized agents."""
|
519 |
+
|
520 |
+
self.reasoning_agent = ReActAgent(
|
521 |
+
name="reasoning_agent",
|
522 |
+
description="Router and on-board reasoning.",
|
523 |
+
system_prompt=REASONING_PROMPT,
|
524 |
+
tools=[], # no direct tools β only `handoff` is implicit
|
525 |
+
llm=self.llm,
|
526 |
+
)
|
527 |
+
|
528 |
# File Parsing ReActAgent
|
529 |
self.file_agent = ReActAgent(
|
530 |
name="file_agent",
|
531 |
description="Expert at reading and extracting info from files",
|
532 |
+
system_prompt="""You are File-Agent.
|
533 |
+
A router has already chosen you because the userβs question involves a
|
534 |
+
non-image file (PDF, DOCX, XLSX, CSV, TXT, MP3, β¦).
|
535 |
+
Rules
|
536 |
+
1. ALWAYS call the tool `parse_file(file_url, file_type?)` **once** to read
|
537 |
+
the file.
|
538 |
+
2. Use ONLY the file content to answer the user.
|
539 |
+
3. NEVER hand the task to another agent and NEVER mention you are using a tool.
|
540 |
+
4. When you are done, reply with one line in this exact format:
|
541 |
+
FINAL ANSWER: <clean answer text>""",
|
542 |
tools=[self.file_parser_tool],
|
543 |
llm=self.llm,
|
544 |
)
|
|
|
547 |
self.youtube_agent = ReActAgent(
|
548 |
name="youtube_agent",
|
549 |
description="Expert at extracting info from YouTube videos by transcript.",
|
550 |
+
system_prompt="""
|
551 |
+
You are YouTube-Agent.
|
552 |
+
The router picked you because the question references a YouTube video.
|
553 |
+
|
554 |
+
Rules
|
555 |
+
1. ALWAYS call `get_youtube_transcript(url)` once.
|
556 |
+
2. Base your answer ONLY on the transcript you receive.
|
557 |
+
3. Do NOT search the web, do NOT invoke other tools.
|
558 |
+
4. End with:
|
559 |
+
FINAL ANSWER: <clean answer text>
|
560 |
+
""",
|
561 |
tools=[self.youtube_transcript_tool],
|
562 |
llm=self.llm,
|
563 |
)
|
|
|
565 |
# DuckDuckGo Web Search ReActAgent
|
566 |
self.search_agent = ReActAgent(
|
567 |
name="websearch_agent",
|
568 |
+
description="Web search expert.",
|
569 |
system_prompt=(
|
570 |
+
"You are WebSearch-Agent.\n"
|
571 |
+
"1. ALWAYS call the tool `web_search` exactly once.\n"
|
572 |
+
"2. Read the text the tool returns and craft a concise answer to the user.\n"
|
573 |
+
"3. Do NOT quote the entire extract; use only the facts needed.\n"
|
574 |
+
"4. Finish with:\n"
|
575 |
+
" FINAL ANSWER: <clean answer text>"
|
576 |
+
"...\n"
|
577 |
+
"Example\n"
|
578 |
+
"User: Who wrote the novel Dune?\n"
|
579 |
+
"Tool output: Here is content scraped from https://en.wikipedia.org/wiki/Dune_(novel): ... Frank Herbert ... Based on this, please answer the original question.\n"
|
580 |
+
"Assistant: FINAL ANSWER: Frank Herbert\n"
|
581 |
),
|
582 |
tools=[self.ddg_tool],
|
583 |
llm=self.llm,
|
584 |
)
|
585 |
|
586 |
+
|
587 |
# Image Agent
|
588 |
self.image_agent = ReActAgent(
|
589 |
name="image_agent",
|
590 |
description="Analyzes images and answers questions using the image_processing tool.",
|
591 |
system_prompt=(
|
592 |
+
"""
|
593 |
+
You are Image-Agent.
|
594 |
+
The router picked you because the question involves an image file.
|
595 |
+
|
596 |
+
Rules
|
597 |
+
1. ALWAYS call the tool `image_processing(file_url, question)` exactly once.
|
598 |
+
2. Use ONLY the image content to answer the user.
|
599 |
+
3. NEVER hand the task to another agent and NEVER mention you are using a tool.
|
600 |
+
4. When you are done, reply with one line in this exact format:
|
601 |
+
FINAL ANSWER: <clean answer text>
|
602 |
+
"""
|
603 |
+
|
604 |
),
|
605 |
tools=[self.image_processing_tool],
|
606 |
llm=self.llm,
|
|
|
609 |
def _setup_workflow(self):
|
610 |
"""Initialize the agent workflow."""
|
611 |
self.agentflow = AgentWorkflow(
|
612 |
+
agents=[self.reasoning_agent,
|
613 |
+
self.file_agent,
|
614 |
+
self.youtube_agent,
|
615 |
+
self.search_agent,
|
616 |
+
self.image_agent],
|
617 |
+
root_agent=self.reasoning_agent.name # start with pure reasoning
|
618 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
619 |
|
|
|
|
|
620 |
|
621 |
+
# βββ BasicAgent._extract_final_answer ββββββββββββββββββββββββββββββββββββββββββ
|
622 |
+
def _extract_final_answer(self, question: str, agent_resp) -> str:
|
623 |
+
raw_trace = "\n".join(block.text for block in agent_resp.response.blocks)
|
624 |
+
return format_final_answer(
|
625 |
+
question,
|
626 |
+
raw_trace,
|
627 |
+
api_key=api_key,
|
628 |
+
api_version=azure_api_version,
|
629 |
+
endpoint=azure_endpoint,
|
630 |
+
deployment=azure_model_name,
|
631 |
+
)
|
632 |
+
|
633 |
+
|
634 |
+
def __call__(self, question: str, task_id: str, file_name: str, file_type = None) -> str:
|
635 |
"""
|
636 |
Main method to process a question and return an answer.
|
637 |
This method will be called by the evaluation system.
|
|
|
639 |
Args:
|
640 |
question (str): The question to answer
|
641 |
task_id (str, optional): Task ID for file retrieval
|
642 |
+
file_name (str, optional): Name of the file associated with the question
|
643 |
+
file_type (str, optional): Type of the file (e.g., .pdf, .docx, etc.)
|
644 |
Returns:
|
645 |
str: The answer to the question
|
646 |
"""
|
|
|
649 |
# The evaluation system should provide file info in the question or via task_id
|
650 |
enhanced_question = question
|
651 |
|
652 |
+
if len(file_name) > 0:
|
653 |
+
file_url = f"{DEFAULT_API_URL}/files/{task_id}"
|
654 |
+
print(f"Processing file: {file_name} with type {file_type} at URL {file_url}")
|
655 |
+
enhanced_question += f"\nThis question relates to the file at {file_url} (filename: {file_name} and file type: {file_type}). Please analyze its contents using the appropriate tool."
|
|
|
|
|
|
|
|
|
|
|
656 |
|
|
|
|
|
657 |
|
658 |
+
# Construct the full prompt with routing instructions
|
659 |
+
full_prompt = f"\n\nUser Question:\n{enhanced_question}"
|
660 |
|
661 |
+
# Run the agent workflow with proper async handling
|
662 |
+
agent_resp = self._run_async_workflow(full_prompt)
|
663 |
+
print(f"Agent response received:\n{question}\n---\n{agent_resp}")
|
664 |
+
|
665 |
+
# Extract & return
|
666 |
+
final_answer = self._extract_final_answer(question, agent_resp)
|
667 |
+
print("Final answer extracted:", final_answer)
|
668 |
+
print(f"Final answer extracted: {final_answer}")
|
669 |
+
print("------------------------------------------------------------------------------------------------")
|
670 |
+
print('****************************************************************************')
|
671 |
return final_answer
|
672 |
|
673 |
except Exception as e:
|
674 |
print(f"Error in BasicAgent.__call__: {e}")
|
675 |
return f"Error processing question: {str(e)}"
|
676 |
|
677 |
+
# βββ keep just ONE runner ββββββββββββββββββββββββββββββββββββββββββββ
|
678 |
+
def _run_async_workflow(self, prompt: str):
|
679 |
+
"""
|
680 |
+
Call `agentflow.run()` until the response STOPs containing an
|
681 |
+
Action/Thought line. Works with older llama-index that has no
|
682 |
+
`.initialize() / .run_step()`.
|
683 |
+
"""
|
684 |
+
async def _step(msg):
|
685 |
+
return await self.agentflow.run(user_msg=msg)
|
686 |
+
|
687 |
+
async def _inner():
|
688 |
+
rsp = await _step(prompt) # first turn
|
689 |
+
# If the last block is still a tool-call, keep asking βcontinueβ
|
690 |
+
while rsp.response.blocks[-1].text.lstrip().lower().startswith(("action:", "thought:")):
|
691 |
+
rsp = await _step("continue")
|
692 |
+
return rsp
|
693 |
+
|
694 |
+
try:
|
695 |
+
loop = asyncio.get_running_loop() # running inside Gradio
|
696 |
+
except RuntimeError: # plain Python
|
697 |
+
return asyncio.run(_inner())
|
698 |
+
else:
|
699 |
+
return asyncio.run_coroutine_threadsafe(_inner(), loop).result()
|
700 |
+
|
701 |
+
|
702 |
# ------------------------------
|
703 |
# 3. Modified answer_questions_batch function (kept for reference)
|
704 |
# ------------------------------
|
|
|
714 |
question = question_data.get("question", "")
|
715 |
file_name = question_data.get("file_name", "")
|
716 |
task_id = question_data.get("task_id", "")
|
717 |
+
file_type = Path(file_name).suffix.lower().split("?")[0] if len(file_name)> 0 else None
|
718 |
|
719 |
try:
|
720 |
# Let the BasicAgent handle the question processing
|
721 |
+
answer = agent(question, task_id, file_name, file_type)
|
722 |
|
723 |
answers.append({
|
724 |
"task_id": task_id,
|
|
|
807 |
try:
|
808 |
# Prepare enhanced question with file information if present
|
809 |
enhanced_question = question_text
|
810 |
+
if len(file_name) > 0:
|
811 |
file_type = Path(file_name).suffix.lower().split("?")[0]
|
812 |
file_url = f"{api_url}/files/{task_id}"
|
813 |
enhanced_question += f"\nThis question relates to the file at {file_url} (filename: {file_name} and file type: {file_type}). Please analyze its contents using the appropriate tool."
|
814 |
+
else:
|
815 |
+
file_type = None
|
816 |
# Call the agent
|
817 |
+
submitted_answer = agent(enhanced_question, task_id, file_name, file_type)
|
818 |
|
819 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
820 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
requirements.txt
CHANGED
@@ -13,4 +13,5 @@ docx==0.2.4
|
|
13 |
llama-index-embeddings-azure-openai==0.3.5
|
14 |
llama-index-llms-azure-openai==0.3.2
|
15 |
beautifulsoup4
|
16 |
-
python-dotenv
|
|
|
|
13 |
llama-index-embeddings-azure-openai==0.3.5
|
14 |
llama-index-llms-azure-openai==0.3.2
|
15 |
beautifulsoup4
|
16 |
+
python-dotenv
|
17 |
+
gradio[oauth]
|