Prabhjotschugh commited on
Commit
a5da8b4
·
verified ·
1 Parent(s): 81917a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +302 -17
app.py CHANGED
@@ -1,23 +1,270 @@
1
- import os
2
  import gradio as gr
3
- import requests
4
- import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
  # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
@@ -40,11 +287,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
40
 
41
  # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
 
 
 
 
 
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
47
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
  print(agent_code)
50
 
@@ -76,11 +328,46 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
76
  for item in questions_data:
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
 
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
- submitted_answer = agent(question_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
  except Exception as e:
@@ -146,11 +433,9 @@ with gr.Blocks() as demo:
146
  gr.Markdown(
147
  """
148
  **Instructions:**
149
-
150
  1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
  2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
  3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
153
-
154
  ---
155
  **Disclaimers:**
156
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
 
 
1
  import gradio as gr
 
 
2
  import pandas as pd
3
+ from smolagents import CodeAgent, OpenAIServerModel, tool
4
+ import os, subprocess
5
+ from bs4 import BeautifulSoup
6
+ from duckduckgo_search import DDGS
7
+ import csv
8
+ import json
9
+ import requests
10
+ import whisper
11
+ from typing import Optional
12
+ import openpyxl
13
 
14
  # (Keep Constants as is)
15
  # --- Constants ---
16
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
17
 
18
  # --- Basic Agent Definition ---
19
+ # ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------
20
+ def download_file(file_name: str) -> None:
21
+ if not os.path.exists(file_name):
22
+ url = f"{DEFAULT_API_URL}/files/{file_name.split('.')[0]}"
23
+ r = requests.get(url)
24
+ with open(file_name, "wb") as f:
25
+ f.write(r.content)
26
+
27
+ @tool
28
+ def open_file_as_text(file_name: str, filetype: Optional[str] = "txt") -> str:
29
+ """
30
+ Opens a file and returns its content as readable text.
31
+ Supports 'txt', 'json', 'csv', 'xlsx', and 'mp3' (transcribes speech to text).
32
+ Args:
33
+ file_name (str): The path or name of the file.
34
+ filetype (Optional[str]): Type of file ('txt', 'json', 'csv', 'xlsx', 'mp3'). Defaults to 'txt'.
35
+ Returns:
36
+ str: The content of the file as text, or transcribed speech if 'mp3'.
37
+ """
38
+ download_file(file_name)
39
+ try:
40
+ if filetype == "txt":
41
+ with open(file_name, "r", encoding="utf-8") as f:
42
+ return f.read()
43
+
44
+ elif filetype == "json":
45
+ with open(file_name, "r", encoding="utf-8") as f:
46
+ data = json.load(f)
47
+ return json.dumps(data, indent=2)
48
+
49
+ elif filetype == "csv":
50
+ with open(file_name, "r", encoding="utf-8") as f:
51
+ reader = csv.reader(f)
52
+ rows = list(reader)
53
+ return "\n".join([", ".join(row) for row in rows])
54
+
55
+ elif filetype == "xlsx":
56
+ wb = openpyxl.load_workbook(file_name, data_only=True)
57
+ sheet = wb.active
58
+ content = []
59
+ for row in sheet.iter_rows(values_only=True):
60
+ content.append(", ".join(str(cell) if cell is not None else "" for cell in row))
61
+ return "\n".join(content)
62
+
63
+ elif filetype == "mp3":
64
+ w = whisper.load_model("base")
65
+ res = w.transcribe(file_name)
66
+ return res["text"]
67
+
68
+ else:
69
+ return f"Unsupported filetype '{filetype}'. Supported types are 'txt', 'json', 'csv', 'xlsx', and 'mp3'."
70
+
71
+ except FileNotFoundError:
72
+ return f"File '{file_name}' not found."
73
+ except Exception as e:
74
+ return f"Error opening file '{file_name}': {str(e)}"
75
+
76
+ @tool
77
+ def web_search(query: str) -> str:
78
+ """
79
+ Searches the web using DuckDuckGo and returns top search snippets.
80
+ Args:
81
+ query (str): The search query string.
82
+ Returns:
83
+ str: A list of top search results with title, snippet, and URL.
84
+ """
85
+ try:
86
+ with DDGS() as ddgs:
87
+ results = ddgs.text(query, max_results=3)
88
+ if not results:
89
+ return "No results found."
90
+ return "\n\n".join([f"Title: {r['title']}\nSnippet: {r['body']}\nURL: {r['href']}" for r in results])
91
+ except Exception as e:
92
+ return f"Error during search: {str(e)}"
93
+
94
+ def parse_wikipedia_table(table) -> str:
95
+ """
96
+ Parses a Wikipedia table into a clean, readable text format.
97
+ Args:
98
+ table (Tag): BeautifulSoup Tag for the table.
99
+ Returns:
100
+ str: Formatted table as readable text.
101
+ """
102
+ rows = []
103
+ headers = []
104
+
105
+ # Try to get headers
106
+ thead = table.find('thead')
107
+ if thead:
108
+ for th in thead.find_all('th'):
109
+ header_text = th.get_text(separator=" ", strip=True)
110
+ headers.append(header_text)
111
+ if headers:
112
+ rows.append(" | ".join(headers))
113
+
114
+ # Parse table body rows
115
+ tbody = table.find('tbody')
116
+ if not tbody:
117
+ tbody = table # fallback: some tables have no tbody explicitly
118
+
119
+ for tr in tbody.find_all('tr'):
120
+ cells = tr.find_all(['th', 'td'])
121
+ cell_texts = []
122
+ for cell in cells:
123
+ # Clean references like [7], [note 1], etc.
124
+ for sup in cell.find_all('sup', class_='reference'):
125
+ sup.decompose()
126
+
127
+ text = cell.get_text(separator=" ", strip=True)
128
+ cell_texts.append(text)
129
+
130
+ if cell_texts:
131
+ row_text = " | ".join(cell_texts)
132
+ rows.append(row_text)
133
+
134
+ return "\n".join(rows)
135
+
136
+ @tool
137
+ def read_wikipedia_page(url: str) -> str:
138
+ """
139
+ Fetches a Wikipedia article and extracts clean sectioned text around the relevant query.
140
+ Args:
141
+ url (str): The Wikipedia page URL.
142
+ Returns:
143
+ str: Sectioned and readable snippet focused around the query.
144
+ """
145
+ headers = {
146
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
147
+ }
148
+ resp = requests.get(url, headers=headers, timeout=10)
149
+ resp.raise_for_status()
150
+ soup = BeautifulSoup(resp.text, "html.parser")
151
+
152
+ content_div = soup.find('div', id='mw-content-text')
153
+ if not content_div:
154
+ return "Content not found."
155
+
156
+ parts = []
157
+ for elem in content_div.find_all(['h2', 'h3', 'p', 'ul', 'ol', 'table']):
158
+ if elem.name in ['h2', 'h3']:
159
+ parts.append("\n\n" + elem.get_text(strip=True) + "\n")
160
+ elif elem.name in ['p', 'ul', 'ol']:
161
+ parts.append(elem.get_text(strip=True))
162
+ elif elem.name == 'table':
163
+ parts.append(parse_wikipedia_table(elem))
164
+
165
+ full_text = "\n".join(parts)
166
+
167
+ return full_text
168
+
169
+ @tool
170
+ def smart_paginate_around_query(full_text: str, query: str) -> list:
171
+ """
172
+ Splits text into windows around each occurrence of the query.
173
+ Args:
174
+ full_text (str): The full text to search within.
175
+ query (str): The search query.
176
+ Returns:
177
+ list: List of relevant text windows (pages).
178
+ """
179
+ before_chars = 1000
180
+ after_chars = 3000
181
+ full_text_lower = full_text.lower()
182
+ query_lower = query.lower()
183
+ query_len = len(query_lower)
184
+
185
+ pages = []
186
+ search_pos = 0
187
+ text_len = len(full_text)
188
+
189
+ while True:
190
+ match_pos = full_text_lower.find(query_lower, search_pos)
191
+
192
+ if match_pos == -1:
193
+ break # no more matches
194
+
195
+ # Define window around match
196
+ start = max(0, match_pos - before_chars)
197
+ end = min(text_len, match_pos + query_len + after_chars)
198
+
199
+ page = full_text[start:end]
200
+ pages.append(page)
201
+
202
+ # Move search pointer to AFTER current window
203
+ search_pos = end
204
+
205
+ return pages
206
+
207
+ @tool
208
+ def reverse_sentence(text: str) -> str:
209
+ """
210
+ Reverses the input text.
211
+ Args:
212
+ text (str): The input string to be reversed.
213
+ Returns:
214
+ str: The reversed string.
215
+ """
216
+ return text[::-1]
217
+
218
+ @tool
219
+ def run_python_code(file_name: str) -> str:
220
+ """
221
+ Executes a Python file and returns its printed final output.
222
+ Args:
223
+ file_name (str): Name of the Python file.
224
+ Returns:
225
+ str: The final printed output.
226
+ """
227
+ download_file(file_name)
228
+
229
+ try:
230
+ # Run in subprocess with timeout
231
+ result = subprocess.run(
232
+ ["python", file_name],
233
+ capture_output=True,
234
+ text=True,
235
+ timeout=10 # seconds
236
+ )
237
+
238
+ if result.returncode != 0:
239
+ return f"Error running code: {result.stderr.strip()}"
240
+
241
+ output = result.stdout.strip()
242
+ return output
243
+
244
+ except subprocess.TimeoutExpired:
245
+ return "Execution timed out."
246
+ except Exception as e:
247
+ return f"Error: {str(e)}"
248
+
249
+ tools = [
250
+ open_file_as_text,
251
+ web_search,
252
+ read_wikipedia_page,
253
+ smart_paginate_around_query,
254
+ reverse_sentence,
255
+ ]
256
+
257
+ model = OpenAIServerModel(
258
+ model_id="gpt-4o",
259
+ api_key=os.getenv("OPENAI_API_KEY"),
260
+ temperature=0
261
+ )
262
+
263
+ agent = CodeAgent(
264
+ model=model,
265
+ tools=tools,
266
+ additional_authorized_imports=["pandas", "numpy", "datetime", "json", "re", "math", "os", "requests", "csv", "urllib"]
267
+ )
268
 
269
  def run_and_submit_all( profile: gr.OAuthProfile | None):
270
  """
 
287
 
288
  # 1. Instantiate Agent ( modify this part to create your agent)
289
  try:
290
+ agent = CodeAgent(
291
+ model=model,
292
+ tools=tools,
293
+ additional_authorized_imports=["pandas", "numpy", "datetime", "json", "re", "math", "os", "requests", "csv",
294
+ "urllib"]
295
+ )
296
  except Exception as e:
297
  print(f"Error instantiating agent: {e}")
298
  return f"Error initializing agent: {e}", None
299
+ # In the case of an app running as a hugging Face space, this link points toward your codebase (useful for others so please keep it public)
300
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
301
  print(agent_code)
302
 
 
328
  for item in questions_data:
329
  task_id = item.get("task_id")
330
  question_text = item.get("question")
331
+ file_name = item.get("file_name")
332
  if not task_id or question_text is None:
333
  print(f"Skipping item with missing task_id or question: {item}")
334
  continue
335
  try:
336
+ full_prompt = f"""You are a highly precise answering agent.
337
+ When given a question:
338
+ - If necessary, perform a web search using the tool `web_search` to find possible sources of information.
339
+ - If the web search only returns titles and short snippets, you MUST visit the actual webpage to read the full content before answering.
340
+ - Use the `read_wikipedia_page` tool to fetch and read the Wikipedia page when necessary.
341
+ - You just have the ability to read Wikipedia pages only.
342
+ - You MUST paginate the content using `smart_paginate_around_query`.
343
+ - When using `smart_paginate_around_query`, you must select a short, general query based on the main keywords only. Avoid using full questions or long phrases. Use 1–3 essential words.
344
+ - If the task requires reversing the order of words, letters, phrases, or any text, you must use the `reverse_sentence` tool to perform the operation.
345
+ - Never reverse text manually inside your code. Always call the tool instead.
346
+ - If the task requires reading, listening, or analyzing a file, you must use the file specified in the `file_name` field of the task metadata, not the file name mentioned casually inside the question text.
347
+ - Comma separated lists MUST contain a single space after each comma.
348
+ - If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
349
+ - If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
350
+ - If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
351
+ - Only answer after you have gathered enough information by reading the actual page contents.
352
+ - Once you have the final answer, you must call `final_answer("your_answer")` immediately after printing it.
353
+ - Do not retry or execute anything else after calling `final_answer`.
354
+ - `final_answer` must wrap the exact printed value.
355
+ Provide ONLY the precise answer requested.
356
+ Do not include explanations, steps, reasoning, or additional text.
357
+ Be direct and specific. GAIA benchmark requires exact matching answers.
358
+ Example: if asked "What is the capital of France?", respond exactly:
359
+ Thoughts: I need to retrieve the capital of France from Wikipedia and output it directly.
360
+ Code:
361
+ ```py
362
+ print("Paris")
363
+ ```<end_code>
364
+ Based on the above guidelines, answer the following question:
365
+ --begin of question--
366
+ {question_text}
367
+ --end of question--
368
+ If the questions mentions the need to use a file, use the following `file_name` value as the `file_name` parameter in any function calls:
369
+ file_name: {file_name}"""
370
+ submitted_answer = agent.run(full_prompt)
371
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
372
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
373
  except Exception as e:
 
433
  gr.Markdown(
434
  """
435
  **Instructions:**
 
436
  1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
437
  2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
438
  3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
439
  ---
440
  **Disclaimers:**
441
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).