Burcin commited on
Commit
4773feb
Β·
verified Β·
1 Parent(s): 800ba42

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +468 -164
  2. requirements.txt +2 -1
app.py CHANGED
@@ -17,6 +17,8 @@ import pdfplumber
17
  import docx
18
  import speech_recognition as sr
19
  import base64
 
 
20
 
21
  from io import BytesIO, StringIO
22
  from dotenv import load_dotenv
@@ -46,89 +48,124 @@ llm = AzureOpenAI(
46
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
47
 
48
  # File parsing tool
49
- def parse_file(file_url: str, file_type: str) -> str:
50
  try:
51
- # Download file
52
- resp = requests.get(file_url, timeout=30)
53
- resp.raise_for_status()
54
- content = resp.content
55
-
56
- # --- XLSX ---
57
- if file_type == ".xlsx":
58
- df = pd.read_excel(BytesIO(content))
59
- return f"Excel Sheet Content:\n{df.to_string(index=False)}"
60
-
61
- # --- CSV ---
62
- if file_type == ".csv":
63
- df = pd.read_csv(StringIO(content.decode()))
64
- return f"CSV File Content:\n{df.to_string(index=False)}"
65
-
66
- # --- TXT ---
67
- if file_type == ".txt":
68
- text = content.decode(errors='ignore')
69
- return f"Text File Content:\n{text[:3500]}"
70
-
71
- # --- PDF ---
72
- if file_type == ".pdf" and pdfplumber:
73
- with pdfplumber.open(BytesIO(content)) as pdf:
74
- text = "\n".join(page.extract_text() or "" for page in pdf.pages)
75
- return f"PDF Content (first 3500 chars):\n{text[:3500]}"
76
-
77
- # --- DOCX ---
78
- if file_type == ".docx" and docx:
79
- d = docx.Document(BytesIO(content))
80
- text = "\n".join(p.text for p in d.paragraphs)
81
- return f"DOCX Content (first 3500 chars):\n{text[:3500]}"
82
-
83
- # --- MP3 (Audio to Text) ---
84
- if file_type == ".mp3" and sr:
85
- # Save MP3 to local
86
- mp3_path = "temp.mp3"
87
- with open(mp3_path, "wb") as f:
88
- f.write(content)
89
- try:
90
- # Convert MP3 to WAV using pydub if available
91
- wav_path = "temp.wav"
92
  try:
93
- from pydub import AudioSegment
94
- sound = AudioSegment.from_mp3(mp3_path)
95
- sound.export(wav_path, format="wav")
96
- audio_file = wav_path
97
- except Exception:
98
- audio_file = mp3_path # Try raw mp3 if conversion fails
99
-
100
- recognizer = sr.Recognizer()
101
- with sr.AudioFile(audio_file) as source:
102
- audio = recognizer.record(source)
103
- transcript = recognizer.recognize_google(audio)
104
- # Clean up
105
- if os.path.exists(mp3_path): os.remove(mp3_path)
106
- if os.path.exists(wav_path): os.remove(wav_path)
107
- return f"Audio Transcript:\n{transcript}"
108
- except Exception as e:
109
- if os.path.exists(mp3_path): os.remove(mp3_path)
110
- if os.path.exists("temp.wav"): os.remove("temp.wav")
111
- return f"Could not transcribe audio: {e}"
112
-
113
- # --- Python file ---
114
- if file_type == ".py":
115
- text = content.decode(errors='ignore')
116
- return f"Python Script Content:\n{text[:3500]}"
117
-
118
- # --- Fallback ---
119
- return f"File type {file_type} is not supported yet, or required package is missing."
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  except Exception as e:
122
- return f"Failed to parse file: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
 
 
 
 
124
  # YouTube transcript tool
125
  def get_youtube_transcript(url: str) -> str:
126
  try:
127
- video_id = url.split("v=")[-1]
128
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
129
  return " ".join([e['text'] for e in transcript])
130
- except Exception:
131
- return "No transcript available."
 
 
 
132
 
133
  # ------------ DuckDuckGo Search and Extract -------------------------
134
  def scrape_text_from_url(url: str, max_chars=4000) -> str:
@@ -142,35 +179,63 @@ def scrape_text_from_url(url: str, max_chars=4000) -> str:
142
  except Exception as e:
143
  return f"Could not scrape {url}: {e}"
144
 
145
- def duckduckgo_search_and_scrape(question: str) -> str:
 
 
 
 
 
146
  """
147
- Performs a DuckDuckGo search, scrapes the top relevant link, and returns the scraped content for LLM-based answering.
 
 
 
 
148
  """
149
- # Step 1: Search
150
  ddg_spec = DuckDuckGoSearchToolSpec()
151
- results = ddg_spec.duckduckgo_full_search(question)
152
- if not results or not isinstance(results, list):
 
153
  return "No search results found."
154
-
155
- # Step 2: Find first Wikipedia or Discogs or similar music data site
156
- for entry in results:
 
157
  href = entry.get("href", "")
158
- if href:
159
- text = scrape_text_from_url(href)
160
- # Step 3: Compose output for LLM or direct answer
161
- return (
162
- f"Here is content scraped from {href}:\n\n"
163
- f"{text}\n\n"
164
- "Based on this, please answer the original question."
165
- )
166
- # If no "trusted" link found, fallback to first result
167
- text = scrape_text_from_url(results[0]["href"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  return (
169
- f"Here is content scraped from {results[0]['href']}:\n\n"
170
- f"{text}\n\n"
171
- "Based on this, please answer the original question."
 
172
  )
173
 
 
 
174
  # ------------ Image Processing Tool Functions -------------------------
175
  # MIME type mapping for images
176
  MIME_MAP = {
@@ -226,9 +291,163 @@ def process_image(file_url: str, question: str) -> str:
226
  except Exception as e:
227
  return f"Vision API error: {e}"
228
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  # ------------------------------
230
  # 2. BasicAgent Class Definition
231
  # ------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  class BasicAgent:
233
  def __init__(self):
234
  """Initialize the BasicAgent with all tools and agent workflow."""
@@ -246,29 +465,34 @@ class BasicAgent:
246
 
247
  # Define routing instruction
248
  self.routing_instruction = (
249
- "You are a multi-agent AI system responsible for routing and answering diverse user questions.\n"
250
- "You have access to the following specialized agents:\n"
251
- "- File Parser Agent β†’ handles structured documents like PDFs, DOCXs, CSVs, etc.\n"
252
- "- YouTube Transcript Agent β†’ answers questions about YouTube video content.\n"
253
- "- Web Search Agent β†’ retrieves general or real-time information using web search.\n"
254
- "- Image Agent β†’ analyzes image files and answer visual questions.\n\n"
255
- "Your responsibilities:\n"
256
- "1. Analyze the user question and any attached file if any.\n"
257
- "2. Select and route the task to the most appropriate agent.\n"
258
- "3. Return ONLY the final answer from the selected agent.\n"
259
- "4. If the file type is image file(png,jpg,jpeg, webp, gif etc..), you must immediately forward the question and the provided image path to the Image Agent. Once you hand off to the Image Agent, that agent will call the image_processing tool to fetch and analyze the image.\n\n"
260
- "5. For all other file types(pdf, docx, xlsx, txt, mp3, wav, mov etc..), use the File Parser Agent to extract content and answer the question.\n"
261
- "Strict guidelines:\n"
262
- "- NEVER reply with filler phrases like 'please wait', 'I will await Agent's response.' or 'awaiting response', wait until the response received from any agent.\n"
263
- "- ALWAYS use the most appropriate agent based on the task.\n"
264
- "- For ambiguous, encoded, or reversed questions, attempt to interpret and resolve them logically.\n"
265
- "- Do NOT skip or ignore input unless it clearly violates safety policies.\n\n"
266
- "Answer formatting:\n"
267
- "- Final responses must end with: FINAL ANSWER: [your answer].\n"
268
- "- The answer must be a clean string, number, or comma-separated list β€” no currency symbols, percentages, or unnecessary words.\n"
269
- "- Do NOT include the phrase 'FINAL ANSWER:' in the output. Return only the final clean answer string.\n\n"
270
- "Now analyze and respond to the user question appropriately."
 
 
 
 
271
  )
 
272
 
273
  def _setup_tools(self):
274
  """Initialize all the tools."""
@@ -278,7 +502,10 @@ class BasicAgent:
278
  self.ddg_tool = FunctionTool.from_defaults(
279
  fn=duckduckgo_search_and_scrape,
280
  name="web_search",
281
- description="Performs a DuckDuckGo search and scrapes the top relevant link and fetch the webpage content of the link to answer the question."
 
 
 
282
  )
283
 
284
  self.image_processing_tool = FunctionTool.from_defaults(
@@ -289,16 +516,29 @@ class BasicAgent:
289
 
290
  def _setup_agents(self):
291
  """Initialize all the specialized agents."""
 
 
 
 
 
 
 
 
 
292
  # File Parsing ReActAgent
293
  self.file_agent = ReActAgent(
294
  name="file_agent",
295
  description="Expert at reading and extracting info from files",
296
- system_prompt="""You are a precise file analyst.
297
- Steps to follow:
298
- 1. Check if there's a file URL in the question
299
- 2. Use parse_file tool to examine the file
300
- 3. For all file types except images, extract content and answer
301
- 4. Never attempt to analyze images yourself""",
 
 
 
 
302
  tools=[self.file_parser_tool],
303
  llm=self.llm,
304
  )
@@ -307,7 +547,17 @@ class BasicAgent:
307
  self.youtube_agent = ReActAgent(
308
  name="youtube_agent",
309
  description="Expert at extracting info from YouTube videos by transcript.",
310
- system_prompt="You are a video analyst. For YouTube questions, fetch and summarize or quote the video transcript.",
 
 
 
 
 
 
 
 
 
 
311
  tools=[self.youtube_transcript_tool],
312
  llm=self.llm,
313
  )
@@ -315,26 +565,42 @@ class BasicAgent:
315
  # DuckDuckGo Web Search ReActAgent
316
  self.search_agent = ReActAgent(
317
  name="websearch_agent",
318
- description="Web search expert. ALWAYS use the web search tool for any question you receive. Do NOT just say you are searching.",
319
  system_prompt=(
320
- "You are a web researcher. For any question, always use the DuckDuckGo search tool to get an answer. "
321
- "Never ask the user to wait. Do not simply state that you are searching. "
322
- "Return the answer from the tool as your only reply."
 
 
 
 
 
 
 
 
323
  ),
324
  tools=[self.ddg_tool],
325
  llm=self.llm,
326
  )
327
 
 
328
  # Image Agent
329
  self.image_agent = ReActAgent(
330
  name="image_agent",
331
  description="Analyzes images and answers questions using the image_processing tool.",
332
  system_prompt=(
333
- "You are a vision specialist. For *every* user query involving an image, "
334
- "you **must** issue exactly one tool call:\n\n"
335
- "```\nAction: image_processing\n"
336
- "Action Input: {\"file_url\": <url>, \"question\": <user question>}\n```"
337
- "\nThen immediately return *only* the tool's output."
 
 
 
 
 
 
 
338
  ),
339
  tools=[self.image_processing_tool],
340
  llm=self.llm,
@@ -343,22 +609,29 @@ class BasicAgent:
343
  def _setup_workflow(self):
344
  """Initialize the agent workflow."""
345
  self.agentflow = AgentWorkflow(
346
- agents=[self.file_agent, self.youtube_agent, self.search_agent, self.image_agent],
347
- root_agent=self.file_agent.name, # the file_agent will detect image types and delegate to image_agent
 
 
 
 
348
  )
349
-
350
- def _extract_final_answer(self, response_text: str) -> str:
351
- """Extract the final answer from the response, removing 'FINAL ANSWER:' prefix if present."""
352
- # Look for FINAL ANSWER: pattern and extract what comes after
353
- if "FINAL ANSWER:" in response_text:
354
- parts = response_text.split("FINAL ANSWER:", 1)
355
- if len(parts) > 1:
356
- return parts[1].strip()
357
 
358
- # If no FINAL ANSWER: pattern found, return the full response stripped
359
- return response_text.strip()
360
 
361
- def __call__(self, question: str, task_id: str = None) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  """
363
  Main method to process a question and return an answer.
364
  This method will be called by the evaluation system.
@@ -366,7 +639,8 @@ class BasicAgent:
366
  Args:
367
  question (str): The question to answer
368
  task_id (str, optional): Task ID for file retrieval
369
-
 
370
  Returns:
371
  str: The answer to the question
372
  """
@@ -375,28 +649,56 @@ class BasicAgent:
375
  # The evaluation system should provide file info in the question or via task_id
376
  enhanced_question = question
377
 
378
- # If task_id is provided, we might need to construct file URL
379
- if task_id:
380
- # This assumes the evaluation system follows the same pattern
381
- file_url = f"{self.api_url}/files/{task_id}"
382
- # You might need to adjust this logic based on how files are provided
383
- enhanced_question += f"\nFile URL: {file_url}"
384
-
385
- # Construct the full prompt with routing instructions
386
- full_prompt = f"{self.routing_instruction}\n\nUser Question:\n{enhanced_question}"
387
 
388
- # Run the agent workflow
389
- response = asyncio.run(self.agentflow.run(user_msg=full_prompt))
390
 
391
- # Extract and clean the final answer
392
- final_answer = self._extract_final_answer(response.response.blocks[0].text)
393
 
 
 
 
 
 
 
 
 
 
 
394
  return final_answer
395
 
396
  except Exception as e:
397
  print(f"Error in BasicAgent.__call__: {e}")
398
  return f"Error processing question: {str(e)}"
399
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  # ------------------------------
401
  # 3. Modified answer_questions_batch function (kept for reference)
402
  # ------------------------------
@@ -412,10 +714,11 @@ async def answer_questions_batch(questions_data):
412
  question = question_data.get("question", "")
413
  file_name = question_data.get("file_name", "")
414
  task_id = question_data.get("task_id", "")
 
415
 
416
  try:
417
  # Let the BasicAgent handle the question processing
418
- answer = agent(question, task_id)
419
 
420
  answers.append({
421
  "task_id": task_id,
@@ -504,13 +807,14 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
504
  try:
505
  # Prepare enhanced question with file information if present
506
  enhanced_question = question_text
507
- if file_name:
508
  file_type = Path(file_name).suffix.lower().split("?")[0]
509
  file_url = f"{api_url}/files/{task_id}"
510
  enhanced_question += f"\nThis question relates to the file at {file_url} (filename: {file_name} and file type: {file_type}). Please analyze its contents using the appropriate tool."
511
-
 
512
  # Call the agent
513
- submitted_answer = agent(enhanced_question, task_id)
514
 
515
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
516
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
17
  import docx
18
  import speech_recognition as sr
19
  import base64
20
+ import tempfile
21
+ import re
22
 
23
  from io import BytesIO, StringIO
24
  from dotenv import load_dotenv
 
48
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
49
 
50
  # File parsing tool
51
+ def parse_file(file_url: str, file_name: str) -> str:
52
  try:
53
+ # Determine file type from file_name or URL
54
+ if len(file_name)>0:
55
+ file_type = Path(file_name).suffix.lower()
56
+ file_type = file_type.split("?")[0]
57
+ else:
58
+ file_type = None
59
+ # Remove query params
60
+ if file_type:
61
+ resp = requests.get(file_url, timeout=30)
62
+ resp.raise_for_status()
63
+ content = resp.content
64
+
65
+ # --- Excel Files ---
66
+ if file_type in [".xlsx", ".xls"]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  try:
68
+ df = pd.read_excel(BytesIO(content))
69
+ return f"Excel Content:\n{df.head(5).to_string(index=False)}" # Only first 5 rows
70
+ except Exception as e:
71
+ return f"Excel parsing error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ # --- CSV Files ---
74
+ elif file_type == ".csv":
75
+ try:
76
+ df = pd.read_csv(BytesIO(content))
77
+ return f"CSV Content:\n{df.head(5).to_string(index=False)}" # Only first 5 rows
78
+ except Exception as e:
79
+ return f"CSV parsing error: {str(e)}"
80
+
81
+ # --- Text Files ---
82
+ elif file_type == ".txt":
83
+ text = content.decode(errors='ignore')
84
+ return f"Text Content:\n{text[:3500]}"
85
+
86
+ # --- PDF Files ---
87
+ elif file_type == ".pdf":
88
+ try:
89
+ with pdfplumber.open(BytesIO(content)) as pdf:
90
+ text = "\n".join(page.extract_text() or "" for page in pdf.pages[:3]) # First 3 pages
91
+ return f"PDF Content:\n{text[:3500]}"
92
+ except Exception as e:
93
+ return f"PDF parsing error: {str(e)}"
94
+
95
+ # --- DOCX Files ---
96
+ elif file_type == ".docx":
97
+ try:
98
+ d = docx.Document(BytesIO(content))
99
+ text = "\n".join(p.text for p in d.paragraphs[:50]) # First 50 paragraphs
100
+ return f"DOCX Content:\n{text[:3500]}"
101
+ except Exception as e:
102
+ return f"DOCX parsing error: {str(e)}"
103
+
104
+ # --- MP3 Files ---
105
+ elif file_type == ".mp3":
106
+ return transcribe_audio(content) # Use helper function
107
+
108
+ # --- Python Files ---
109
+ elif file_type == ".py":
110
+ text = content.decode(errors='ignore')
111
+ return f"Python Code:\n{text[:3500]}"
112
+
113
+ # --- Unsupported Types ---
114
+ else:
115
+ return f"Unsupported file type: {file_type}"
116
+ else:
117
+ return "No file type provided or file URL is invalid."
118
  except Exception as e:
119
+ print(f"[parse_file] ERROR: {e}")
120
+ return f"File parsing failed: {str(e)}"
121
+
122
+ # Audio transcription helper
123
+ def transcribe_audio(content: bytes) -> str:
124
+ try:
125
+ # Create temp files
126
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as mp3_tmp:
127
+ mp3_tmp.write(content)
128
+ mp3_path = mp3_tmp.name
129
+
130
+ wav_path = mp3_path.replace(".mp3", ".wav")
131
+
132
+ # Convert to WAV
133
+ try:
134
+ from pydub import AudioSegment
135
+ audio = AudioSegment.from_mp3(mp3_path)
136
+ audio.export(wav_path, format="wav")
137
+ audio_file = wav_path
138
+ except ImportError:
139
+ audio_file = mp3_path # Fallback to MP3 if pydub not available
140
+
141
+ # Transcribe audio
142
+ recognizer = sr.Recognizer()
143
+ with sr.AudioFile(audio_file) as source:
144
+ audio = recognizer.record(source)
145
+ transcript = recognizer.recognize_google(audio)
146
+
147
+ # Cleanup
148
+ for path in [mp3_path, wav_path]:
149
+ if os.path.exists(path):
150
+ os.remove(path)
151
+
152
+ return f"Audio Transcript:\n{transcript}"
153
 
154
+ except Exception as e:
155
+ print(f"Audio transcription error: {e}")
156
+ return "Could not transcribe audio"
157
+
158
  # YouTube transcript tool
159
  def get_youtube_transcript(url: str) -> str:
160
  try:
161
+ video_id = url.split("v=")[-1].split("&")[0] # Clean video ID
162
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
163
  return " ".join([e['text'] for e in transcript])
164
+ except NoTranscriptFound:
165
+ return "No transcript available for this video"
166
+ except Exception as e:
167
+ return f"Error retrieving transcript: {str(e)}"
168
+
169
 
170
  # ------------ DuckDuckGo Search and Extract -------------------------
171
  def scrape_text_from_url(url: str, max_chars=4000) -> str:
 
179
  except Exception as e:
180
  return f"Could not scrape {url}: {e}"
181
 
182
+ def duckduckgo_search_and_scrape(
183
+ question: str,
184
+ max_results: int = 10,
185
+ min_chars: int = 400, # treat shorter pages as β€œunscrapable”
186
+ max_chars: int = 4000 # final truncate length
187
+ ) -> str:
188
  """
189
+ DuckDuckGo β†’ scrape β†’ fallback.
190
+
191
+ 1. Try up to max_results links; return the first page that gives
192
+ β‰₯ min_chars of visible text.
193
+ 2. If none succeed, compose an answer from the DDG result metadata.
194
  """
 
195
  ddg_spec = DuckDuckGoSearchToolSpec()
196
+ results = ddg_spec.duckduckgo_full_search(question) or []
197
+
198
+ if not isinstance(results, list):
199
  return "No search results found."
200
+
201
+ cleaned_pages = []
202
+
203
+ for entry in results[:max_results]:
204
  href = entry.get("href", "")
205
+ if not href:
206
+ continue
207
+
208
+ # --- attempt to scrape ------------------------------------------------
209
+ text = scrape_text_from_url(href, max_chars=max_chars)
210
+ if text.startswith("Could not scrape") or len(text) < min_chars:
211
+ continue # treat as failure – try next result
212
+ # success!
213
+ return (
214
+ f"Here is content scraped from {href}:\n\n"
215
+ f"{text}\n\n"
216
+ "Based on this, please answer the original question."
217
+ )
218
+
219
+ # ---------------- fallback: build summary from DDG metadata --------------
220
+ if not results:
221
+ return "No search results found."
222
+
223
+ summary_lines = []
224
+ for idx, entry in enumerate(results[:max_results], start=1):
225
+ title = entry.get("title") or "Untitled result"
226
+ snippet = (entry.get("body") or "").replace("\n", " ")[:160]
227
+ href = entry.get("href")
228
+ summary_lines.append(f"{idx}. {title} – {snippet} ({href})")
229
+
230
  return (
231
+ "I could not successfully scrape any of the top pages. "
232
+ "Here are the top DuckDuckGo results:\n\n"
233
+ + "\n".join(summary_lines)
234
+ + "\n\nPlease answer the original question using this list."
235
  )
236
 
237
+
238
+
239
  # ------------ Image Processing Tool Functions -------------------------
240
  # MIME type mapping for images
241
  MIME_MAP = {
 
291
  except Exception as e:
292
  return f"Vision API error: {e}"
293
 
294
+
295
+ # ─── formatter.py (or inline in your module) ─────────────────────────
296
+ from pydantic import BaseModel, ValidationError
297
+ from openai import AzureOpenAI
298
+
299
+ FALLBACK = "ANSWER_NOT_FOUND" # single source of truth, keep as plain text
300
+
301
+ SYSTEM_PROMPT = (
302
+ "You are an answer-formatter. I will give you:\n"
303
+ " β€’ the user question\n"
304
+ " β€’ a raw multi-agent trace that may contain Thoughts, Actions, tool "
305
+ " outputs, and possibly a FINAL ANSWER.\n\n"
306
+
307
+ "Your job:\n"
308
+ "1. Extract the true answer if it is present anywhere in the trace.\n"
309
+ "2. Output exactly one line in this template:\n"
310
+ " FINAL ANSWER: <ANSWER>\n\n"
311
+ "If the trace contains no FINAL ANSWER **but the question itself already contains enough information**, deduce the answer on your own."
312
+ "Return a FINAL ANSWER line in the usual format.\n"
313
+
314
+
315
+ "Rules for <ANSWER>:\n"
316
+ "β€’ Number β†’ digits only, no commas, no currency/percent signs unless "
317
+ " explicitly asked for.\n"
318
+ "β€’ String β†’ as short as possible, no articles unless required.\n"
319
+ "β€’ List β†’ comma-separated values following the above rules; if no order "
320
+ " is specified, sort alphabetically.\n"
321
+ "β€’ If rounding or units are requested in the question, apply before "
322
+ " formatting and include the unit with **no preceding space**.\n\n"
323
+ f"If you cannot find a valid answer, output:\n"
324
+ f" FINAL ANSWER: {FALLBACK}\n\n"
325
+
326
+ "Examples (follow exactly)\n"
327
+ "###\n"
328
+ "Q: Reverse this word: elppa\n"
329
+ "Trace: (no FINAL ANSWER)\n"
330
+ "A: FINAL ANSWER: apple\n"
331
+ "Q: What is 2+3?\n"
332
+ "Trace: Thought: need a calculator\n"
333
+ "A: FINAL ANSWER: 5\n"
334
+ "Q: How many planets? Trace: … FINAL ANSWER: 8\n"
335
+ "A: FINAL ANSWER: 8\n"
336
+ "###\n"
337
+ "Q: Give the colour. Trace: … blue.\n"
338
+ "A: FINAL ANSWER: blue\n"
339
+ "###\n"
340
+ "Q: Name the three vowels. Trace: … a, e, i, o, u.\n"
341
+ "A: FINAL ANSWER: a,e,i,o,u\n"
342
+ "###\n"
343
+ "Q: What’s the speed? (units requested) Trace: … 3.0 m/s.\n"
344
+ "A: FINAL ANSWER: 3.0m/s\n"
345
+ "###\n"
346
+ "Q: Any answer? Trace: … tool failure …\n"
347
+ f"A: FINAL ANSWER: {FALLBACK}"
348
+ )
349
+
350
+
351
+ class Result(BaseModel):
352
+ final_answer: str
353
+
354
+
355
+ def format_final_answer(question: str,
356
+ raw_trace: str,
357
+ *,
358
+ api_key: str,
359
+ api_version: str,
360
+ endpoint: str,
361
+ deployment: str,
362
+ temperature: float = 0.0) -> str:
363
+ """
364
+ Second-pass LLM call that converts an unstructured agent trace into the
365
+ strict 'FINAL ANSWER: …' template. On any error returns the FALLBACK.
366
+ """
367
+ try:
368
+ from openai import AzureOpenAI
369
+ client = AzureOpenAI(
370
+ api_key=api_key,
371
+ api_version=api_version,
372
+ azure_endpoint=endpoint,
373
+ )
374
+
375
+ messages = [
376
+ {"role": "system", "content": SYSTEM_PROMPT},
377
+ {"role": "user", "content": f"Question: {question}\nTrace: {raw_trace}"}
378
+ ]
379
+
380
+ rsp = client.chat.completions.create(
381
+ model=deployment,
382
+ messages=messages,
383
+ temperature=temperature,
384
+ max_tokens=120,
385
+ )
386
+
387
+ out = rsp.choices[0].message.content.strip()
388
+
389
+ # Remove the label for downstream code (keep only the value)
390
+ if out.lower().startswith("final answer:"):
391
+ out = out.split(":", 1)[1].strip()
392
+
393
+ # basic schema check – non-empty string
394
+ Result(final_answer=out)
395
+ return out or FALLBACK
396
+
397
+ except (ValidationError, Exception):
398
+ return FALLBACK
399
+
400
  # ------------------------------
401
  # 2. BasicAgent Class Definition
402
  # ------------------------------
403
+ REASONING_PROMPT = """
404
+ You are the Router-&-Reasoning-Agent.
405
+
406
+ NEVER output filler like β€œCould you please provide more context”.
407
+
408
+ If the answer is not already in the question, DELEGATE:
409
+
410
+ β€’ Any external fact β†’ WebSearch-Agent
411
+ β€’ YouTube link β†’ YouTube-Agent
412
+ β€’ File link (PDF…) β†’ File-Agent
413
+ β€’ Image link β†’ Image-Agent
414
+
415
+ How to delegate
416
+ ───────────────
417
+ Call the special tool `handoff` **once** with JSON:
418
+ {"to_agent":"<agent_name>","reason":"<why>"}
419
+
420
+ When to answer directly
421
+ ───────────────────────
422
+ β€’ The question already contains all information needed (e.g. reversed text,
423
+ Caesar cipher, mental arithmetic, pure logic).
424
+ β€’ You are 100 % certain no external resource is required.
425
+
426
+ Output format
427
+ ─────────────
428
+ β€’ If you delegate β†’ return the tool call only; the delegated agent will finish.
429
+ β€’ If you answer yourself β†’ one line:
430
+ FINAL ANSWER: <clean answer>
431
+ Follow the global rules (digits only, short strings, comma-lists, etc.).
432
+
433
+ Never
434
+ ─────
435
+ β€’ Never try to scrape the web or parse files yourself.
436
+ β€’ Never add filler like β€œThinking…” or β€œAwaiting response”.
437
+ β€’ Never answer if the question clearly needs a specialised agent.
438
+
439
+ Example
440
+ ───────
441
+ Example (self-contained)
442
+ Q: .rewsna eht sa "tfel" … ← reversed
443
+ A: FINAL ANSWER: right
444
+ Example (delegation)
445
+ Q: Who wrote the novel Dune?
446
+ A: Action: handoff
447
+ Action Input: {"to_agent":"websearch_agent","reason":"needs web"}
448
+ """
449
+
450
+
451
  class BasicAgent:
452
  def __init__(self):
453
  """Initialize the BasicAgent with all tools and agent workflow."""
 
465
 
466
  # Define routing instruction
467
  self.routing_instruction = (
468
+ "You are a multi-agent AI system that routes questions **and** produces "
469
+ "the final answer.\n\n"
470
+
471
+ "– If the question already *contains* the needed information "
472
+ "(e.g. encoded, reversed, maths puzzle), **answer directly** – "
473
+ "no tools, no sub-agents.\n\n"
474
+
475
+ "You have four specialised agents:\n"
476
+ "β€’ File-Agent – files (PDF, DOCX, …)\n"
477
+ "β€’ YouTube-Agent – video transcripts\n"
478
+ "β€’ WebSearch-Agent – fresh/general web info\n"
479
+ "β€’ Image-Agent – vision questions\n\n"
480
+
481
+ "When you delegate, do **not** add commentary such as "
482
+ "'I will await the agent's response'.\n"
483
+ "When you answer yourself, end with:\n"
484
+ " FINAL ANSWER: <clean answer>\n\n"
485
+
486
+ "Example ➊ (self-contained)\n"
487
+ 'Q: "opposite of north"..."\n'
488
+ "A: FINAL ANSWER: south\n\n"
489
+
490
+ "Example βž‹ (delegation)\n"
491
+ "Q: Who wrote Dune?\n"
492
+ "A: Action: handoff\n"
493
+ 'Action Input: {"to_agent":"websearch_agent","reason":"needs web"}\n'
494
  )
495
+
496
 
497
  def _setup_tools(self):
498
  """Initialize all the tools."""
 
502
  self.ddg_tool = FunctionTool.from_defaults(
503
  fn=duckduckgo_search_and_scrape,
504
  name="web_search",
505
+ description=(
506
+ "Performs a DuckDuckGo search, attempts to scrape each top result, "
507
+ "and falls back to result metadata if scraping fails."
508
+ )
509
  )
510
 
511
  self.image_processing_tool = FunctionTool.from_defaults(
 
516
 
517
  def _setup_agents(self):
518
  """Initialize all the specialized agents."""
519
+
520
+ self.reasoning_agent = ReActAgent(
521
+ name="reasoning_agent",
522
+ description="Router and on-board reasoning.",
523
+ system_prompt=REASONING_PROMPT,
524
+ tools=[], # no direct tools – only `handoff` is implicit
525
+ llm=self.llm,
526
+ )
527
+
528
  # File Parsing ReActAgent
529
  self.file_agent = ReActAgent(
530
  name="file_agent",
531
  description="Expert at reading and extracting info from files",
532
+ system_prompt="""You are File-Agent.
533
+ A router has already chosen you because the user’s question involves a
534
+ non-image file (PDF, DOCX, XLSX, CSV, TXT, MP3, …).
535
+ Rules
536
+ 1. ALWAYS call the tool `parse_file(file_url, file_type?)` **once** to read
537
+ the file.
538
+ 2. Use ONLY the file content to answer the user.
539
+ 3. NEVER hand the task to another agent and NEVER mention you are using a tool.
540
+ 4. When you are done, reply with one line in this exact format:
541
+ FINAL ANSWER: <clean answer text>""",
542
  tools=[self.file_parser_tool],
543
  llm=self.llm,
544
  )
 
547
  self.youtube_agent = ReActAgent(
548
  name="youtube_agent",
549
  description="Expert at extracting info from YouTube videos by transcript.",
550
+ system_prompt="""
551
+ You are YouTube-Agent.
552
+ The router picked you because the question references a YouTube video.
553
+
554
+ Rules
555
+ 1. ALWAYS call `get_youtube_transcript(url)` once.
556
+ 2. Base your answer ONLY on the transcript you receive.
557
+ 3. Do NOT search the web, do NOT invoke other tools.
558
+ 4. End with:
559
+ FINAL ANSWER: <clean answer text>
560
+ """,
561
  tools=[self.youtube_transcript_tool],
562
  llm=self.llm,
563
  )
 
565
  # DuckDuckGo Web Search ReActAgent
566
  self.search_agent = ReActAgent(
567
  name="websearch_agent",
568
+ description="Web search expert.",
569
  system_prompt=(
570
+ "You are WebSearch-Agent.\n"
571
+ "1. ALWAYS call the tool `web_search` exactly once.\n"
572
+ "2. Read the text the tool returns and craft a concise answer to the user.\n"
573
+ "3. Do NOT quote the entire extract; use only the facts needed.\n"
574
+ "4. Finish with:\n"
575
+ " FINAL ANSWER: <clean answer text>"
576
+ "...\n"
577
+ "Example\n"
578
+ "User: Who wrote the novel Dune?\n"
579
+ "Tool output: Here is content scraped from https://en.wikipedia.org/wiki/Dune_(novel): ... Frank Herbert ... Based on this, please answer the original question.\n"
580
+ "Assistant: FINAL ANSWER: Frank Herbert\n"
581
  ),
582
  tools=[self.ddg_tool],
583
  llm=self.llm,
584
  )
585
 
586
+
587
  # Image Agent
588
  self.image_agent = ReActAgent(
589
  name="image_agent",
590
  description="Analyzes images and answers questions using the image_processing tool.",
591
  system_prompt=(
592
+ """
593
+ You are Image-Agent.
594
+ The router picked you because the question involves an image file.
595
+
596
+ Rules
597
+ 1. ALWAYS call the tool `image_processing(file_url, question)` exactly once.
598
+ 2. Use ONLY the image content to answer the user.
599
+ 3. NEVER hand the task to another agent and NEVER mention you are using a tool.
600
+ 4. When you are done, reply with one line in this exact format:
601
+ FINAL ANSWER: <clean answer text>
602
+ """
603
+
604
  ),
605
  tools=[self.image_processing_tool],
606
  llm=self.llm,
 
609
  def _setup_workflow(self):
610
  """Initialize the agent workflow."""
611
  self.agentflow = AgentWorkflow(
612
+ agents=[self.reasoning_agent,
613
+ self.file_agent,
614
+ self.youtube_agent,
615
+ self.search_agent,
616
+ self.image_agent],
617
+ root_agent=self.reasoning_agent.name # start with pure reasoning
618
  )
 
 
 
 
 
 
 
 
619
 
 
 
620
 
621
+ # ─── BasicAgent._extract_final_answer ──────────────────────────────────────────
622
+ def _extract_final_answer(self, question: str, agent_resp) -> str:
623
+ raw_trace = "\n".join(block.text for block in agent_resp.response.blocks)
624
+ return format_final_answer(
625
+ question,
626
+ raw_trace,
627
+ api_key=api_key,
628
+ api_version=azure_api_version,
629
+ endpoint=azure_endpoint,
630
+ deployment=azure_model_name,
631
+ )
632
+
633
+
634
+ def __call__(self, question: str, task_id: str, file_name: str, file_type = None) -> str:
635
  """
636
  Main method to process a question and return an answer.
637
  This method will be called by the evaluation system.
 
639
  Args:
640
  question (str): The question to answer
641
  task_id (str, optional): Task ID for file retrieval
642
+ file_name (str, optional): Name of the file associated with the question
643
+ file_type (str, optional): Type of the file (e.g., .pdf, .docx, etc.)
644
  Returns:
645
  str: The answer to the question
646
  """
 
649
  # The evaluation system should provide file info in the question or via task_id
650
  enhanced_question = question
651
 
652
+ if len(file_name) > 0:
653
+ file_url = f"{DEFAULT_API_URL}/files/{task_id}"
654
+ print(f"Processing file: {file_name} with type {file_type} at URL {file_url}")
655
+ enhanced_question += f"\nThis question relates to the file at {file_url} (filename: {file_name} and file type: {file_type}). Please analyze its contents using the appropriate tool."
 
 
 
 
 
656
 
 
 
657
 
658
+ # Construct the full prompt with routing instructions
659
+ full_prompt = f"\n\nUser Question:\n{enhanced_question}"
660
 
661
+ # Run the agent workflow with proper async handling
662
+ agent_resp = self._run_async_workflow(full_prompt)
663
+ print(f"Agent response received:\n{question}\n---\n{agent_resp}")
664
+
665
+ # Extract & return
666
+ final_answer = self._extract_final_answer(question, agent_resp)
667
+ print("Final answer extracted:", final_answer)
668
+ print(f"Final answer extracted: {final_answer}")
669
+ print("------------------------------------------------------------------------------------------------")
670
+ print('****************************************************************************')
671
  return final_answer
672
 
673
  except Exception as e:
674
  print(f"Error in BasicAgent.__call__: {e}")
675
  return f"Error processing question: {str(e)}"
676
 
677
+ # ─── keep just ONE runner ────────────────────────────────────────────
678
+ def _run_async_workflow(self, prompt: str):
679
+ """
680
+ Call `agentflow.run()` until the response STOPs containing an
681
+ Action/Thought line. Works with older llama-index that has no
682
+ `.initialize() / .run_step()`.
683
+ """
684
+ async def _step(msg):
685
+ return await self.agentflow.run(user_msg=msg)
686
+
687
+ async def _inner():
688
+ rsp = await _step(prompt) # first turn
689
+ # If the last block is still a tool-call, keep asking β€œcontinue”
690
+ while rsp.response.blocks[-1].text.lstrip().lower().startswith(("action:", "thought:")):
691
+ rsp = await _step("continue")
692
+ return rsp
693
+
694
+ try:
695
+ loop = asyncio.get_running_loop() # running inside Gradio
696
+ except RuntimeError: # plain Python
697
+ return asyncio.run(_inner())
698
+ else:
699
+ return asyncio.run_coroutine_threadsafe(_inner(), loop).result()
700
+
701
+
702
  # ------------------------------
703
  # 3. Modified answer_questions_batch function (kept for reference)
704
  # ------------------------------
 
714
  question = question_data.get("question", "")
715
  file_name = question_data.get("file_name", "")
716
  task_id = question_data.get("task_id", "")
717
+ file_type = Path(file_name).suffix.lower().split("?")[0] if len(file_name)> 0 else None
718
 
719
  try:
720
  # Let the BasicAgent handle the question processing
721
+ answer = agent(question, task_id, file_name, file_type)
722
 
723
  answers.append({
724
  "task_id": task_id,
 
807
  try:
808
  # Prepare enhanced question with file information if present
809
  enhanced_question = question_text
810
+ if len(file_name) > 0:
811
  file_type = Path(file_name).suffix.lower().split("?")[0]
812
  file_url = f"{api_url}/files/{task_id}"
813
  enhanced_question += f"\nThis question relates to the file at {file_url} (filename: {file_name} and file type: {file_type}). Please analyze its contents using the appropriate tool."
814
+ else:
815
+ file_type = None
816
  # Call the agent
817
+ submitted_answer = agent(enhanced_question, task_id, file_name, file_type)
818
 
819
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
820
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
requirements.txt CHANGED
@@ -13,4 +13,5 @@ docx==0.2.4
13
  llama-index-embeddings-azure-openai==0.3.5
14
  llama-index-llms-azure-openai==0.3.2
15
  beautifulsoup4
16
- python-dotenv
 
 
13
  llama-index-embeddings-azure-openai==0.3.5
14
  llama-index-llms-azure-openai==0.3.2
15
  beautifulsoup4
16
+ python-dotenv
17
+ gradio[oauth]