lucasnseq commited on
Commit
f61048a
·
verified ·
1 Parent(s): e0dbb96

Upload 8 files

Browse files
Files changed (8) hide show
  1. .gitattributes +35 -35
  2. .gitignore +2 -0
  3. README.md +14 -14
  4. agent.py +102 -0
  5. app.py +184 -264
  6. consts.py +2 -0
  7. requirements.txt +9 -9
  8. tools.py +47 -0
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .venv
2
+ .env
README.md CHANGED
@@ -1,15 +1,15 @@
1
- ---
2
- title: Template Final Assignment
3
- emoji: 🕵🏻‍♂️
4
- colorFrom: indigo
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 5.25.2
8
- app_file: app.py
9
- pinned: false
10
- hf_oauth: true
11
- # optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
12
- hf_oauth_expiration_minutes: 480
13
- ---
14
-
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Template Final Assignment
3
+ emoji: 🕵🏻‍♂️
4
+ colorFrom: indigo
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 5.25.2
8
+ app_file: app.py
9
+ pinned: false
10
+ hf_oauth: true
11
+ # optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
12
+ hf_oauth_expiration_minutes: 480
13
+ ---
14
+
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
agent.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Libs
2
+ import os
3
+ import yaml
4
+ import importlib
5
+
6
+ from smolagents import (
7
+ CodeAgent,
8
+ DuckDuckGoSearchTool,
9
+ VisitWebpageTool,
10
+ WikipediaSearchTool,
11
+ OpenAIServerModel,
12
+ SpeechToTextTool,
13
+ FinalAnswerTool,
14
+ )
15
+
16
+ # Local
17
+ from tools import GetTaskFileTool, LoadXlsxFileTool, LoadTextFileTool
18
+
19
+ sys_instruction = (
20
+ "You are a general AI assistant. Answer each question by reporting your thoughts, "
21
+ "then submit ONLY a concise text using the 'final_answer' tool. "
22
+ "Final answer MUST be a number, a few words, or a comma-separated list of numbers and/or strings. "
23
+ "For numbers, avoid commas and units unless specified. For strings, avoid articles and abbreviations, "
24
+ "and write digits in full unless stated otherwise. Apply these rules for list elements as well."
25
+ )
26
+
27
+ prompts = yaml.safe_load(
28
+ importlib.resources.files("smolagents.prompts").joinpath("code_agent.yaml").read_text()
29
+ )
30
+ prompts["system_prompt"] = sys_instruction + prompts["system_prompt"]
31
+
32
+ # req_instruction = (
33
+ req_instruction = (
34
+ "You are an expert and helpful agent named {{name}}.\n"
35
+ "A valued client has assigned you the following task:\n"
36
+ "---\n"
37
+ "Task:\n"
38
+ "{{task}}\n"
39
+ "---\n"
40
+ "Use your tools as needed. Before completing the task, plan your actions carefully.\n"
41
+ "While completing the task, think step by step. And after completing the task, carefully double check your solution.\n\n"
42
+ "If you respond correctly, you will be rewarded with a very high bonus.\n\n"
43
+ "Your final_answer MUST be:\n"
44
+ "- a number,\n"
45
+ "- a short phrase,\n"
46
+ "- or a comma-separated list of numbers or strings (no articles or abbreviations).\n\n"
47
+ "Only the content passed to the final_answer tool will be preserved—everything else will be discarded."
48
+ )
49
+ prompts['managed_agent']['task'] = req_instruction
50
+ prompts['managed_agent']['report'] = "{{final_answer}}"
51
+
52
+ # print(prompts["system_prompt"])
53
+ # print(prompts['planning'])
54
+ # print(prompts['managed_agent'])
55
+ # print(prompts['final_answer'])
56
+
57
+ def get_model(
58
+ model_id: str = "gpt-4.1-mini",
59
+ model_temperature: float = 0.7,
60
+ ):
61
+ """
62
+ Create and return an OpenAIServerModel instance with the specified model ID and temperature.
63
+ """
64
+ # Load the model
65
+ model = OpenAIServerModel(
66
+ model_id=model_id,
67
+ api_key=os.getenv("OPENAI_API_KEY"),
68
+ temperature=model_temperature
69
+ )
70
+ return model
71
+
72
+ def get_agent(
73
+ model_id: str = "gpt-4.1-mini",
74
+ model_temperature: float = 0.7,
75
+ agent_max_steps: int = 15,
76
+ ):
77
+ """
78
+ Create and return a CodeAgent instance with the specified model and tools.
79
+ """
80
+
81
+ # Defne the agent with the tools
82
+ agent = CodeAgent(
83
+ tools=[
84
+ DuckDuckGoSearchTool(),
85
+ VisitWebpageTool(),
86
+ WikipediaSearchTool(),
87
+ GetTaskFileTool(),
88
+ SpeechToTextTool(),
89
+ LoadXlsxFileTool(),
90
+ LoadTextFileTool(),
91
+ FinalAnswerTool(),
92
+ ],
93
+ model=get_model(
94
+ model_id=model_id,
95
+ model_temperature=model_temperature,
96
+ ),
97
+ prompt_templates=prompts,
98
+ max_steps=agent_max_steps,
99
+ additional_authorized_imports = ["pandas"],
100
+ name="GAIAAgent",
101
+ )
102
+ return agent
app.py CHANGED
@@ -1,265 +1,185 @@
1
- import os
2
- import gradio as gr
3
- import requests
4
- import pandas as pd
5
- import yaml
6
- import importlib
7
-
8
- from smolagents import (
9
- CodeAgent,
10
- DuckDuckGoSearchTool,
11
- VisitWebpageTool,
12
- WikipediaSearchTool,
13
- Tool,
14
- OpenAIServerModel,
15
- SpeechToTextTool,
16
- FinalAnswerTool,
17
- )
18
-
19
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
20
-
21
- class GetTaskFileTool(Tool):
22
- name = "get_task_file_tool"
23
- description = """This tool downloads the file content associated with the given task_id if exists. Returns absolute file path"""
24
- inputs = {
25
- "task_id": {"type": "string", "description": "Task id"},
26
- "file_name": {"type": "string", "description": "File name"},
27
- }
28
- output_type = "string"
29
-
30
- def forward(self, task_id: str, file_name: str) -> str:
31
- response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=15)
32
- response.raise_for_status()
33
- with open(file_name, 'wb') as file:
34
- file.write(response.content)
35
- return os.path.abspath(file_name)
36
-
37
- class LoadXlsxFileTool(Tool):
38
- name = "load_xlsx_file_tool"
39
- description = """This tool loads xlsx file into pandas and returns it"""
40
- inputs = {
41
- "file_path": {"type": "string", "description": "File path"}
42
- }
43
- output_type = "object"
44
-
45
- def forward(self, file_path: str) -> object:
46
- return pd.read_excel(file_path)
47
-
48
- class LoadTextFileTool(Tool):
49
- name = "load_text_file_tool"
50
- description = """This tool loads any text file"""
51
- inputs = {
52
- "file_path": {"type": "string", "description": "File path"}
53
- }
54
- output_type = "string"
55
-
56
- def forward(self, file_path: str) -> object:
57
- with open(file_path, 'r', encoding='utf-8') as file:
58
- return file.read()
59
-
60
- prompts = yaml.safe_load(
61
- importlib.resources.files("smolagents.prompts").joinpath("code_agent.yaml").read_text()
62
- )
63
- prompts["system_prompt"] = ("You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer using the 'final_answer' tool. Your final answer MUST be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. "
64
- + prompts["system_prompt"])
65
- print(prompts["system_prompt"])
66
-
67
- def init_agent():
68
- model = OpenAIServerModel(
69
- model_id="gpt-4.1",
70
- api_key=os.getenv("OPENAI_API_KEY"),
71
- temperature=0.7
72
- )
73
- agent = CodeAgent(
74
- tools=[
75
- DuckDuckGoSearchTool(),
76
- VisitWebpageTool(),
77
- WikipediaSearchTool(),
78
- GetTaskFileTool(),
79
- SpeechToTextTool(),
80
- LoadXlsxFileTool(),
81
- LoadTextFileTool(),
82
- FinalAnswerTool(),
83
- ],
84
- model=model,
85
- prompt_templates=prompts,
86
- max_steps=15,
87
- additional_authorized_imports = ["pandas"]
88
- )
89
- return agent
90
-
91
- def run_and_submit_all( profile: gr.OAuthProfile | None):
92
- """
93
- Fetches all questions, runs the BasicAgent on them, submits all answers,
94
- and displays the results.
95
- """
96
- # --- Determine HF Space Runtime URL and Repo URL ---
97
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
98
-
99
- if profile:
100
- username= f"{profile.username}"
101
- print(f"User logged in: {username}")
102
- else:
103
- print("User not logged in.")
104
- return "Please Login to Hugging Face with the button.", None
105
-
106
- api_url = DEFAULT_API_URL
107
- questions_url = f"{api_url}/questions"
108
- submit_url = f"{api_url}/submit"
109
-
110
- # 1. Instantiate Agent ( modify this part to create your agent)
111
- try:
112
- agent = init_agent()
113
- except Exception as e:
114
- print(f"Error instantiating agent: {e}")
115
- return f"Error initializing agent: {e}", None
116
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
117
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
118
- print(agent_code)
119
-
120
- # 2. Fetch Questions
121
- print(f"Fetching questions from: {questions_url}")
122
- try:
123
- response = requests.get(questions_url, timeout=15)
124
- response.raise_for_status()
125
- questions_data = response.json()
126
- if not questions_data:
127
- print("Fetched questions list is empty.")
128
- return "Fetched questions list is empty or invalid format.", None
129
- print(f"Fetched {len(questions_data)} questions.")
130
- except requests.exceptions.RequestException as e:
131
- print(f"Error fetching questions: {e}")
132
- return f"Error fetching questions: {e}", None
133
- except requests.exceptions.JSONDecodeError as e:
134
- print(f"Error decoding JSON response from questions endpoint: {e}")
135
- print(f"Response text: {response.text[:500]}")
136
- return f"Error decoding server response for questions: {e}", None
137
- except Exception as e:
138
- print(f"An unexpected error occurred fetching questions: {e}")
139
- return f"An unexpected error occurred fetching questions: {e}", None
140
-
141
- # 3. Run your Agent
142
- results_log = []
143
- answers_payload = []
144
- print(f"Running agent on {len(questions_data)} questions...")
145
- for item in questions_data:
146
- task_id = item.get("task_id")
147
- question_text = item.get("question")
148
- if not task_id or question_text is None:
149
- print(f"Skipping item with missing task_id or question: {item}")
150
- continue
151
- try:
152
- submitted_answer = agent(question_text)
153
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
154
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
155
- except Exception as e:
156
- print(f"Error running agent on task {task_id}: {e}")
157
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
158
-
159
- if not answers_payload:
160
- print("Agent did not produce any answers to submit.")
161
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
162
-
163
- # 4. Prepare Submission
164
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
165
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
166
- print(status_update)
167
-
168
- # 5. Submit
169
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
170
- try:
171
- response = requests.post(submit_url, json=submission_data, timeout=60)
172
- response.raise_for_status()
173
- result_data = response.json()
174
- final_status = (
175
- f"Submission Successful!\n"
176
- f"User: {result_data.get('username')}\n"
177
- f"Overall Score: {result_data.get('score', 'N/A')}% "
178
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
179
- f"Message: {result_data.get('message', 'No message received.')}"
180
- )
181
- print("Submission successful.")
182
- results_df = pd.DataFrame(results_log)
183
- return final_status, results_df
184
- except requests.exceptions.HTTPError as e:
185
- error_detail = f"Server responded with status {e.response.status_code}."
186
- try:
187
- error_json = e.response.json()
188
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
189
- except requests.exceptions.JSONDecodeError:
190
- error_detail += f" Response: {e.response.text[:500]}"
191
- status_message = f"Submission Failed: {error_detail}"
192
- print(status_message)
193
- results_df = pd.DataFrame(results_log)
194
- return status_message, results_df
195
- except requests.exceptions.Timeout:
196
- status_message = "Submission Failed: The request timed out."
197
- print(status_message)
198
- results_df = pd.DataFrame(results_log)
199
- return status_message, results_df
200
- except requests.exceptions.RequestException as e:
201
- status_message = f"Submission Failed: Network error - {e}"
202
- print(status_message)
203
- results_df = pd.DataFrame(results_log)
204
- return status_message, results_df
205
- except Exception as e:
206
- status_message = f"An unexpected error occurred during submission: {e}"
207
- print(status_message)
208
- results_df = pd.DataFrame(results_log)
209
- return status_message, results_df
210
-
211
-
212
- # --- Build Gradio Interface using Blocks ---
213
- with gr.Blocks() as demo:
214
- gr.Markdown("# Basic Agent Evaluation Runner")
215
- gr.Markdown(
216
- """
217
- **Instructions:**
218
-
219
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
220
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
221
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
222
-
223
- ---
224
- **Disclaimers:**
225
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
226
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
227
- """
228
- )
229
-
230
- gr.LoginButton()
231
-
232
- run_button = gr.Button("Run Evaluation & Submit All Answers")
233
-
234
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
235
- # Removed max_rows=10 from DataFrame constructor
236
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
237
-
238
- run_button.click(
239
- fn=run_and_submit_all,
240
- outputs=[status_output, results_table]
241
- )
242
-
243
- if __name__ == "__main__":
244
- print("\n" + "-"*30 + " App Starting " + "-"*30)
245
- # Check for SPACE_HOST and SPACE_ID at startup for information
246
- space_host_startup = os.getenv("SPACE_HOST")
247
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
248
-
249
- if space_host_startup:
250
- print(f"✅ SPACE_HOST found: {space_host_startup}")
251
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
252
- else:
253
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
254
-
255
- if space_id_startup: # Print repo URLs if SPACE_ID is found
256
- print(f"✅ SPACE_ID found: {space_id_startup}")
257
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
258
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
259
- else:
260
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
261
-
262
- print("-"*(60 + len(" App Starting ")) + "\n")
263
-
264
- print("Launching Gradio Interface for Basic Agent Evaluation...")
265
  demo.launch(debug=True, share=False)
 
1
+ # Libs
2
+ import os
3
+ import gradio as gr
4
+ import requests
5
+ import pandas as pd
6
+
7
+ # Local
8
+ from consts import DEFAULT_API_URL, MODEL_ID
9
+ from agent import get_agent
10
+
11
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
12
+ """
13
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
14
+ and displays the results.
15
+ """
16
+ # --- Determine HF Space Runtime URL and Repo URL ---
17
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
18
+
19
+ if profile:
20
+ username= f"{profile.username}"
21
+ print(f"User logged in: {username}")
22
+ else:
23
+ print("User not logged in.")
24
+ return "Please Login to Hugging Face with the button.", None
25
+
26
+ api_url = DEFAULT_API_URL
27
+ questions_url = f"{api_url}/questions"
28
+ submit_url = f"{api_url}/submit"
29
+
30
+ # 1. Instantiate Agent ( modify this part to create your agent)
31
+ try:
32
+ agent = get_agent(model_id=MODEL_ID, model_temperature=0.7, agent_max_steps=15)
33
+ except Exception as e:
34
+ print(f"Error instantiating agent: {e}")
35
+ return f"Error initializing agent: {e}", None
36
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
37
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
38
+ print(agent_code)
39
+
40
+ # 2. Fetch Questions
41
+ print(f"Fetching questions from: {questions_url}")
42
+ try:
43
+ response = requests.get(questions_url, timeout=15)
44
+ response.raise_for_status()
45
+ questions_data = response.json()
46
+ if not questions_data:
47
+ print("Fetched questions list is empty.")
48
+ return "Fetched questions list is empty or invalid format.", None
49
+ print(f"Fetched {len(questions_data)} questions.")
50
+ except requests.exceptions.RequestException as e:
51
+ print(f"Error fetching questions: {e}")
52
+ return f"Error fetching questions: {e}", None
53
+ except requests.exceptions.JSONDecodeError as e:
54
+ print(f"Error decoding JSON response from questions endpoint: {e}")
55
+ print(f"Response text: {response.text[:500]}")
56
+ return f"Error decoding server response for questions: {e}", None
57
+ except Exception as e:
58
+ print(f"An unexpected error occurred fetching questions: {e}")
59
+ return f"An unexpected error occurred fetching questions: {e}", None
60
+
61
+ # 3. Run your Agent
62
+ results_log = []
63
+ answers_payload = []
64
+ print(f"Running agent on {len(questions_data)} questions...")
65
+ for item in questions_data:
66
+ task_id = item.get("task_id")
67
+ question_text = item.get("question")
68
+ if not task_id or question_text is None:
69
+ print(f"Skipping item with missing task_id or question: {item}")
70
+ continue
71
+ try:
72
+ submitted_answer = agent(question_text)
73
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
74
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
75
+ except Exception as e:
76
+ print(f"Error running agent on task {task_id}: {e}")
77
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
78
+
79
+ if not answers_payload:
80
+ print("Agent did not produce any answers to submit.")
81
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
82
+
83
+ # 4. Prepare Submission
84
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
85
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
86
+ print(status_update)
87
+
88
+ # 5. Submit
89
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
90
+ try:
91
+ response = requests.post(submit_url, json=submission_data, timeout=60)
92
+ response.raise_for_status()
93
+ result_data = response.json()
94
+ final_status = (
95
+ f"Submission Successful!\n"
96
+ f"User: {result_data.get('username')}\n"
97
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
98
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
99
+ f"Message: {result_data.get('message', 'No message received.')}"
100
+ )
101
+ print("Submission successful.")
102
+ results_df = pd.DataFrame(results_log)
103
+ return final_status, results_df
104
+ except requests.exceptions.HTTPError as e:
105
+ error_detail = f"Server responded with status {e.response.status_code}."
106
+ try:
107
+ error_json = e.response.json()
108
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
109
+ except requests.exceptions.JSONDecodeError:
110
+ error_detail += f" Response: {e.response.text[:500]}"
111
+ status_message = f"Submission Failed: {error_detail}"
112
+ print(status_message)
113
+ results_df = pd.DataFrame(results_log)
114
+ return status_message, results_df
115
+ except requests.exceptions.Timeout:
116
+ status_message = "Submission Failed: The request timed out."
117
+ print(status_message)
118
+ results_df = pd.DataFrame(results_log)
119
+ return status_message, results_df
120
+ except requests.exceptions.RequestException as e:
121
+ status_message = f"Submission Failed: Network error - {e}"
122
+ print(status_message)
123
+ results_df = pd.DataFrame(results_log)
124
+ return status_message, results_df
125
+ except Exception as e:
126
+ status_message = f"An unexpected error occurred during submission: {e}"
127
+ print(status_message)
128
+ results_df = pd.DataFrame(results_log)
129
+ return status_message, results_df
130
+
131
+
132
+ # --- Build Gradio Interface using Blocks ---
133
+ with gr.Blocks() as demo:
134
+ gr.Markdown("# Basic Agent Evaluation Runner")
135
+ gr.Markdown(
136
+ """
137
+ **Instructions:**
138
+
139
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
140
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
141
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
142
+
143
+ ---
144
+ **Disclaimers:**
145
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
146
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
147
+ """
148
+ )
149
+
150
+ gr.LoginButton()
151
+
152
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
153
+
154
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
155
+ # Removed max_rows=10 from DataFrame constructor
156
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
157
+
158
+ run_button.click(
159
+ fn=run_and_submit_all,
160
+ outputs=[status_output, results_table]
161
+ )
162
+
163
+ if __name__ == "__main__":
164
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
165
+ # Check for SPACE_HOST and SPACE_ID at startup for information
166
+ space_host_startup = os.getenv("SPACE_HOST")
167
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
168
+
169
+ if space_host_startup:
170
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
171
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
172
+ else:
173
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
174
+
175
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
176
+ print(f"✅ SPACE_ID found: {space_id_startup}")
177
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
178
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
179
+ else:
180
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
181
+
182
+ print("-"*(60 + len(" App Starting ")) + "\n")
183
+
184
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  demo.launch(debug=True, share=False)
consts.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
2
+ MODEL_ID = "gpt-4.1"
requirements.txt CHANGED
@@ -1,10 +1,10 @@
1
- gradio
2
- requests
3
- smolagents
4
- wikipedia-api
5
- smolagents[openai]
6
- pyyaml[resolver]
7
- transformers
8
- smolagents[transformers]
9
- smolagents[audio]
10
  openpyxl
 
1
+ gradio
2
+ requests
3
+ smolagents
4
+ wikipedia-api
5
+ smolagents[openai]
6
+ pyyaml[resolver]
7
+ transformers
8
+ smolagents[transformers]
9
+ smolagents[audio]
10
  openpyxl
tools.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Libs
2
+ import os
3
+ import requests
4
+ import pandas as pd
5
+ from smolagents import Tool
6
+
7
+ # Local
8
+ from consts import DEFAULT_API_URL
9
+
10
+ class GetTaskFileTool(Tool):
11
+ name = "get_task_file_tool"
12
+ description = """This tool downloads the file content associated with the given task_id if exists. Returns absolute file path"""
13
+ inputs = {
14
+ "task_id": {"type": "string", "description": "Task id"},
15
+ "file_name": {"type": "string", "description": "File name"},
16
+ }
17
+ output_type = "string"
18
+
19
+ def forward(self, task_id: str, file_name: str) -> str:
20
+ response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=15)
21
+ response.raise_for_status()
22
+ with open(file_name, 'wb') as file:
23
+ file.write(response.content)
24
+ return os.path.abspath(file_name)
25
+
26
+ class LoadXlsxFileTool(Tool):
27
+ name = "load_xlsx_file_tool"
28
+ description = """This tool loads xlsx file into pandas and returns it"""
29
+ inputs = {
30
+ "file_path": {"type": "string", "description": "File path"}
31
+ }
32
+ output_type = "object"
33
+
34
+ def forward(self, file_path: str) -> object:
35
+ return pd.read_excel(file_path)
36
+
37
+ class LoadTextFileTool(Tool):
38
+ name = "load_text_file_tool"
39
+ description = """This tool loads any text file"""
40
+ inputs = {
41
+ "file_path": {"type": "string", "description": "File path"}
42
+ }
43
+ output_type = "string"
44
+
45
+ def forward(self, file_path: str) -> object:
46
+ with open(file_path, 'r', encoding='utf-8') as file:
47
+ return file.read()