pateas commited on
Commit
89cdc9f
·
unverified ·
1 Parent(s): 81917a3

initial commit 7/20

Browse files
Files changed (8) hide show
  1. .gitignore +21 -0
  2. agent.py +208 -0
  3. app.py +50 -29
  4. app_dev.py +133 -0
  5. fetch.py +139 -0
  6. pyproject.toml +20 -0
  7. requirements.txt +0 -0
  8. uv.lock +0 -0
.gitignore ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.csv
2
+ *.xlsx
3
+ *.py
4
+ *.png
5
+ *.jpg
6
+ *.jpeg
7
+ *.gif
8
+ *.mp3
9
+ *.wav
10
+ *.mp4
11
+ *.avi
12
+ *.mov
13
+ __pycache__/
14
+ *.pyc
15
+ *.pyo
16
+ *.pyd
17
+ *.db
18
+ *.env
19
+ *.envrc
20
+ *.log
21
+ .vscode/
agent.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import logging
3
+ import os
4
+ from io import BytesIO
5
+ from typing import Any
6
+
7
+ from smolagents import (
8
+ CodeAgent,
9
+ DuckDuckGoSearchTool,
10
+ OpenAIServerModel,
11
+ SpeechToTextTool,
12
+ VisitWebpageTool,
13
+ WikipediaSearchTool,
14
+ tool,
15
+ )
16
+
17
+ system_prompt = """You are an AI Agent that is tasked to answer questions in a concise and accurate manner.
18
+ I will ask you a question and provide you with additional context if available.
19
+ Context can be in the form of Data(data), Code(code), Audio(audio), or Images(image_url).
20
+ Context is provided by specifying the content type followed by the content itself.
21
+
22
+ For example: code: print("Hello World") or Data: [1, 2, 3, 4, 5] or audio: [base64 encoded audio] or image_url: [base64 encoded image].
23
+
24
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
25
+ DO NOT use formatting such as bold, italics, or code blocks in your final answer.
26
+ DO NOT use sources, references, or abbreviations in your final answer.
27
+ If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
28
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
29
+ If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
30
+ If you are asked for a specific number format, follow the instructions carefully.
31
+ If you are asked for a number only answer with the number itself, without any additional text or formatting.
32
+ If you are asked for a string only answer with the string itself, without any additional text or formatting.
33
+ If you are asked for a list only answer with the list itself, without any additional text or formatting.
34
+
35
+ Finish your Answer with the following template:
36
+ FINAL ANSWER: [YOUR FINAL ANSWER]
37
+
38
+ For example, if the question is "What is the capital of France?", you should answer:
39
+ FINAL ANSWER: Paris
40
+
41
+ If the question is "What is 2 + 2?", you should answer:
42
+ FINAL ANSWER: 4
43
+
44
+ If the question is "What is 1 divided by 2, answer with 2 digits after the decimal point?", you should answer:
45
+ FINAL ANSWER: 0.50
46
+
47
+ Think step by step, and use the tools provided to gather information if necessary.
48
+ Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
49
+ """
50
+
51
+
52
+ # def is_correct_format(answer: str, _) -> bool:
53
+ # """Check if the answer contains a final answer in the correct format.
54
+
55
+ # Args:
56
+ # answer: The answer to check.
57
+
58
+ # Returns:
59
+ # True if the answer contains a final answer, False otherwise.
60
+ # This ensures the final output is in the correct format.
61
+ # """
62
+ # return (
63
+ # "ANSWER:" in answer
64
+ # or "FINAL ANSWER:" in answer
65
+ # or "Answer:" in answer
66
+ # or "Final Answer:" in answer
67
+ # or "answer:" in answer
68
+ # or "final answer:" in answer
69
+ # or "answer:" in answer.lower()
70
+ # or "final answer:" in answer.lower()
71
+ # )
72
+
73
+
74
+ @tool
75
+ def wikipedia_suggested_page(query: str) -> str:
76
+ """Search Wikipedia for suggested pages based on the query.
77
+
78
+ Args:
79
+ query: The search query. The query should be coarse and not provide too many details.
80
+ E.g. "Python programming" or "Artificial Intelligence".
81
+
82
+ Returns:
83
+ A list of suggested page titles. Pages are \n separated.
84
+ """
85
+ from wikipedia import suggest
86
+
87
+ try:
88
+ return suggest(query)
89
+ except Exception as e:
90
+ logging.error(f"Error fetching Wikipedia suggestions for '{query}': {e}")
91
+ return f"Error fetching suggestions: {e}"
92
+
93
+
94
+ @tool
95
+ def wikipedia_page(title: str) -> str:
96
+ """Search Wikipedia for a page based on the title.
97
+
98
+ Args:
99
+ title: The title of the Wikipedia page to search for.
100
+
101
+ Returns:
102
+ The content of the Wikipedia page.
103
+ """
104
+ from wikipedia import page
105
+
106
+ try:
107
+ return page(title, auto_suggest=True).content
108
+ except Exception as e:
109
+ logging.error(f"Error fetching Wikipedia page for '{title}': {e}")
110
+ return f"Error fetching page: {e}"
111
+
112
+
113
+ class BasicAgent:
114
+ def __init__(self):
115
+ model = OpenAIServerModel(
116
+ model_id="gpt-4o-mini",
117
+ api_key=os.getenv("OPENAI_API_KEY"),
118
+ )
119
+ search = DuckDuckGoSearchTool(max_results=5)
120
+ # speech_to_text = SpeechToTextTool()
121
+ visitor = VisitWebpageTool(max_output_length=4000)
122
+ wiki_search = WikipediaSearchTool()
123
+ self.agent = CodeAgent(
124
+ max_steps=5,
125
+ verbosity_level=0,
126
+ tools=[
127
+ search,
128
+ # speech_to_text,
129
+ visitor,
130
+ wiki_search,
131
+ wikipedia_suggested_page,
132
+ wikipedia_page,
133
+ ],
134
+ model=model,
135
+ instructions=system_prompt,
136
+ additional_authorized_imports=["pandas", "numpy"],
137
+ use_structured_outputs_internally=True,
138
+ add_base_tools=True,
139
+ # final_answer_checks=[is_correct_format],
140
+ )
141
+ logging.info(
142
+ f"System prompt set for BasicAgent: {self.agent.memory.system_prompt}"
143
+ )
144
+
145
+ def __call__(self, question: str, content, content_type) -> Any:
146
+ match content_type:
147
+ case "xlsx":
148
+ additional_args = {"data": content}
149
+ case "py":
150
+ additional_args = {"code": content}
151
+ case "audio":
152
+ additional_args = {"audio": content}
153
+ case "png":
154
+ buffer = BytesIO()
155
+ content.save(buffer, format="PNG")
156
+ buffer.seek(0)
157
+ image_content = (
158
+ "data:image/png;base64,"
159
+ + base64.b64encode(buffer.getvalue()).decode("utf-8")
160
+ )
161
+ additional_args = {"image_url": image_content}
162
+ case _:
163
+ additional_args = None
164
+ response = self.agent.run(
165
+ question,
166
+ additional_args=additional_args,
167
+ images=[content] if content_type == "png" else None,
168
+ reset=True,
169
+ )
170
+ return response
171
+
172
+ @staticmethod
173
+ def formatting(answer: str) -> str:
174
+ """Extract the final answer from the response."""
175
+ if "FINAL ANSWER:" in answer:
176
+ answer = answer.split("FINAL ANSWER:")[-1].strip()
177
+ if "ANSWER:" in answer:
178
+ answer = answer.split("ANSWER:")[-1].strip()
179
+ if "Answer:" in answer:
180
+ answer = answer.split("Answer:")[-1].strip()
181
+ if "Final Answer:" in answer:
182
+ answer = answer.split("Final Answer:")[-1].strip()
183
+ if "answer:" in answer.lower():
184
+ answer = answer.split("answer:")[-1].strip()
185
+ if "final answer:" in answer.lower():
186
+ answer = answer.split("final answer:")[-1].strip()
187
+ if "answer is:" in answer.lower():
188
+ answer = answer.split("answer is:")[-1].strip()
189
+ if "is:" in answer.lower():
190
+ answer = answer.split("is:")[-1].strip()
191
+ if "**" in answer:
192
+ answer = answer.split("**")[-1].strip().replace("**", "")
193
+ if "```" in answer:
194
+ answer = answer.split("```")[-1].strip().replace("```", "")
195
+ if "```python" in answer:
196
+ answer = answer.split("```python")[-1].strip().replace("```", "")
197
+ if "```json" in answer:
198
+ answer = answer.split("```json")[-1].strip().replace("```", "")
199
+ if "```yaml" in answer:
200
+ answer = answer.split("```yaml")[-1].strip().replace("```", "")
201
+ if "```txt" in answer:
202
+ answer = answer.split("```txt")[-1].strip().replace("```", "")
203
+ answer = answer.capitalize()
204
+
205
+ answer = answer.replace('"', '').strip()
206
+ answer = answer.replace("'", "").strip()
207
+ answer = answer.replace("[", "").replace("]", "").strip()
208
+ return answer.strip() # Fallback to return the whole answer if no specific format found
app.py CHANGED
@@ -1,34 +1,38 @@
 
1
  import os
 
2
  import gradio as gr
3
- import requests
4
- import inspect
5
  import pandas as pd
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
 
11
  # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
  class BasicAgent:
14
  def __init__(self):
15
  print("BasicAgent initialized.")
 
16
  def __call__(self, question: str) -> str:
17
  print(f"Agent received question (first 50 chars): {question[:50]}...")
18
  fixed_answer = "This is a default answer."
19
  print(f"Agent returning fixed answer: {fixed_answer}")
20
  return fixed_answer
21
 
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
 
23
  """
24
  Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
26
  """
27
  # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
30
  if profile:
31
- username= f"{profile.username}"
32
  print(f"User logged in: {username}")
33
  else:
34
  print("User not logged in.")
@@ -55,16 +59,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
55
  response.raise_for_status()
56
  questions_data = response.json()
57
  if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
60
  print(f"Fetched {len(questions_data)} questions.")
61
  except requests.exceptions.RequestException as e:
62
  print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
- except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
  print(f"An unexpected error occurred fetching questions: {e}")
70
  return f"An unexpected error occurred fetching questions: {e}", None
@@ -81,18 +81,36 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
81
  continue
82
  try:
83
  submitted_answer = agent(question_text)
84
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
89
 
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
- # 4. Prepare Submission
95
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
96
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
  print(status_update)
98
 
@@ -162,20 +180,19 @@ with gr.Blocks() as demo:
162
 
163
  run_button = gr.Button("Run Evaluation & Submit All Answers")
164
 
165
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
 
166
  # Removed max_rows=10 from DataFrame constructor
167
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
169
- run_button.click(
170
- fn=run_and_submit_all,
171
- outputs=[status_output, results_table]
172
- )
173
 
174
  if __name__ == "__main__":
175
- print("\n" + "-"*30 + " App Starting " + "-"*30)
176
  # Check for SPACE_HOST and SPACE_ID at startup for information
177
  space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
 
180
  if space_host_startup:
181
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -183,14 +200,18 @@ if __name__ == "__main__":
183
  else:
184
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
 
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
  print(f"✅ SPACE_ID found: {space_id_startup}")
188
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
 
190
  else:
191
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
 
 
192
 
193
- print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
  print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
1
+ import inspect
2
  import os
3
+
4
  import gradio as gr
 
 
5
  import pandas as pd
6
+ import requests
7
 
8
  # (Keep Constants as is)
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
+
13
  # --- Basic Agent Definition ---
14
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
15
  class BasicAgent:
16
  def __init__(self):
17
  print("BasicAgent initialized.")
18
+
19
  def __call__(self, question: str) -> str:
20
  print(f"Agent received question (first 50 chars): {question[:50]}...")
21
  fixed_answer = "This is a default answer."
22
  print(f"Agent returning fixed answer: {fixed_answer}")
23
  return fixed_answer
24
 
25
+
26
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
27
  """
28
  Fetches all questions, runs the BasicAgent on them, submits all answers,
29
  and displays the results.
30
  """
31
  # --- Determine HF Space Runtime URL and Repo URL ---
32
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
33
 
34
  if profile:
35
+ username = f"{profile.username}"
36
  print(f"User logged in: {username}")
37
  else:
38
  print("User not logged in.")
 
59
  response.raise_for_status()
60
  questions_data = response.json()
61
  if not questions_data:
62
+ print("Fetched questions list is empty.")
63
+ return "Fetched questions list is empty or invalid format.", None
64
  print(f"Fetched {len(questions_data)} questions.")
65
  except requests.exceptions.RequestException as e:
66
  print(f"Error fetching questions: {e}")
67
  return f"Error fetching questions: {e}", None
 
 
 
 
68
  except Exception as e:
69
  print(f"An unexpected error occurred fetching questions: {e}")
70
  return f"An unexpected error occurred fetching questions: {e}", None
 
81
  continue
82
  try:
83
  submitted_answer = agent(question_text)
84
+ answers_payload.append(
85
+ {"task_id": task_id, "submitted_answer": submitted_answer}
86
+ )
87
+ results_log.append(
88
+ {
89
+ "Task ID": task_id,
90
+ "Question": question_text,
91
+ "Submitted Answer": submitted_answer,
92
+ }
93
+ )
94
  except Exception as e:
95
+ print(f"Error running agent on task {task_id}: {e}")
96
+ results_log.append(
97
+ {
98
+ "Task ID": task_id,
99
+ "Question": question_text,
100
+ "Submitted Answer": f"AGENT ERROR: {e}",
101
+ }
102
+ )
103
 
104
  if not answers_payload:
105
  print("Agent did not produce any answers to submit.")
106
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
107
 
108
+ # 4. Prepare Submission
109
+ submission_data = {
110
+ "username": username.strip(),
111
+ "agent_code": agent_code,
112
+ "answers": answers_payload,
113
+ }
114
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
115
  print(status_update)
116
 
 
180
 
181
  run_button = gr.Button("Run Evaluation & Submit All Answers")
182
 
183
+ status_output = gr.Textbox(
184
+ label="Run Status / Submission Result", lines=5, interactive=False
185
+ )
186
  # Removed max_rows=10 from DataFrame constructor
187
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
188
 
189
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
190
 
191
  if __name__ == "__main__":
192
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
193
  # Check for SPACE_HOST and SPACE_ID at startup for information
194
  space_host_startup = os.getenv("SPACE_HOST")
195
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
196
 
197
  if space_host_startup:
198
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
200
  else:
201
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
202
 
203
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
204
  print(f"✅ SPACE_ID found: {space_id_startup}")
205
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
206
+ print(
207
+ f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
208
+ )
209
  else:
210
+ print(
211
+ "ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
212
+ )
213
 
214
+ print("-" * (60 + len(" App Starting ")) + "\n")
215
 
216
  print("Launching Gradio Interface for Basic Agent Evaluation...")
217
+ demo.launch(debug=True, share=False)
app_dev.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import gradio as gr
4
+ import pandas as pd
5
+ import requests
6
+ from langfuse import get_client
7
+ from openinference.instrumentation.smolagents import SmolagentsInstrumentor
8
+
9
+ from agent import BasicAgent
10
+ from fetch import DEFAULT_API_URL, fetch_questions, run_agent
11
+
12
+ submit_url = f"{DEFAULT_API_URL}/submit"
13
+
14
+ langfuse = get_client()
15
+
16
+ # Verify connection
17
+ if langfuse.auth_check():
18
+ print("Langfuse client is authenticated and ready!")
19
+ else:
20
+ print("Authentication failed. Please check your credentials and host.")
21
+
22
+ SmolagentsInstrumentor().instrument()
23
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
24
+ """
25
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
26
+ and displays the results.
27
+ """
28
+ # --- Determine HF Space Runtime URL and Repo URL ---
29
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
30
+
31
+ if profile:
32
+ username = f"{profile.username}"
33
+ print(f"User logged in: {username}")
34
+ else:
35
+ print("User not logged in.")
36
+ return "Please Login to Hugging Face with the button.", None
37
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
38
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
39
+ print(agent_code)
40
+ agent = BasicAgent()
41
+ questions_data = fetch_questions()
42
+ answers_payload, results_log = run_agent(agent, questions_data)
43
+ # 4. Prepare Submission
44
+ submission_data = {
45
+ "username": username.strip(),
46
+ "agent_code": agent_code,
47
+ "answers": answers_payload,
48
+ }
49
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
50
+ print(status_update)
51
+
52
+ # 5. Submit
53
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
54
+ try:
55
+ response = requests.post(submit_url, json=submission_data, timeout=60)
56
+ response.raise_for_status()
57
+ result_data = response.json()
58
+ final_status = (
59
+ f"Submission Successful!\n"
60
+ f"User: {result_data.get('username')}\n"
61
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
62
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
63
+ f"Message: {result_data.get('message', 'No message received.')}"
64
+ )
65
+ print("Submission successful.")
66
+ results_df = pd.DataFrame(results_log)
67
+ return final_status, results_df
68
+ except requests.exceptions.HTTPError as e:
69
+ error_detail = f"Server responded with status {e.response.status_code}."
70
+ try:
71
+ error_json = e.response.json()
72
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
73
+ except requests.exceptions.JSONDecodeError:
74
+ error_detail += f" Response: {e.response.text[:500]}"
75
+ status_message = f"Submission Failed: {error_detail}"
76
+ print(status_message)
77
+ results_df = pd.DataFrame(results_log)
78
+ return status_message, results_df
79
+ except requests.exceptions.Timeout:
80
+ status_message = "Submission Failed: The request timed out."
81
+ print(status_message)
82
+ results_df = pd.DataFrame(results_log)
83
+ return status_message, results_df
84
+ except requests.exceptions.RequestException as e:
85
+ status_message = f"Submission Failed: Network error - {e}"
86
+ print(status_message)
87
+ results_df = pd.DataFrame(results_log)
88
+ return status_message, results_df
89
+ except Exception as e:
90
+ status_message = f"An unexpected error occurred during submission: {e}"
91
+ print(status_message)
92
+ results_df = pd.DataFrame(results_log)
93
+ return status_message, results_df
94
+
95
+
96
+ # --- Build Gradio Interface using Blocks ---
97
+ with gr.Blocks() as demo:
98
+ gr.Markdown("# Basic Agent Evaluation Runner")
99
+ gr.Markdown(
100
+ """
101
+ **Instructions:**
102
+
103
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
104
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
105
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
106
+
107
+ ---
108
+ **Disclaimers:**
109
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
110
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
111
+ """
112
+ )
113
+
114
+ gr.LoginButton()
115
+
116
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
117
+
118
+ status_output = gr.Textbox(
119
+ label="Run Status / Submission Result", lines=5, interactive=False
120
+ )
121
+ # Removed max_rows=10 from DataFrame constructor
122
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
123
+
124
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
125
+
126
+
127
+ if __name__ == "__main__":
128
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
129
+ # Check for SPACE_HOST and SPACE_ID at startup for information
130
+ space_host_startup = os.getenv("SPACE_HOST")
131
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
132
+
133
+ demo.launch(debug=True, share=False)
fetch.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from io import BytesIO
3
+
4
+ import pandas as pd
5
+ import requests
6
+ from PIL import Image
7
+
8
+ from agent import BasicAgent
9
+
10
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
+
12
+ questions_url = f"{DEFAULT_API_URL}/questions"
13
+ files_url = f"{DEFAULT_API_URL}/files"
14
+
15
+
16
+ def fetch_questions():
17
+ """
18
+ Fetches questions from the API.
19
+ Returns a list of questions or an error message.
20
+ """
21
+ try:
22
+ response = requests.get(questions_url, timeout=15)
23
+ response.raise_for_status()
24
+ questions_data = response.json()
25
+ if not questions_data:
26
+ logging.warning("Fetched questions list is empty.")
27
+ return None
28
+ logging.info(f"Fetched {len(questions_data)} questions.")
29
+
30
+ for question in questions_data:
31
+ content, content_type = _load_files(question)
32
+ if content is not None:
33
+ question["file_content"] = content
34
+ question["file_type"] = content_type
35
+ return questions_data
36
+ except requests.exceptions.RequestException as e:
37
+ logging.error(f"Error fetching questions: {e}")
38
+ return None
39
+ except Exception as e:
40
+ logging.error(f"An unexpected error occurred fetching questions: {e}")
41
+ return None
42
+ return questions_data
43
+
44
+
45
+ def _load_files(question_data: dict):
46
+ if file_name := question_data.get("file_name"):
47
+ extension = file_name.split(".")[-1]
48
+ if extension not in ["xlsx", "png", "py", "mp3", "wav"]:
49
+ logging.warning(
50
+ f"File {file_name} has an unsupported extension. Skipping file loading."
51
+ )
52
+ return None, None # Ensure a tuple is always returned
53
+ if task_id := question_data.get("task_id"):
54
+ try:
55
+ if extension == "mp3" or extension == "wav":
56
+ return f"{files_url}/{task_id}", "audio"
57
+ response = requests.get(f"{files_url}/{task_id}", timeout=15)
58
+ response.raise_for_status()
59
+ if response.status_code == 200:
60
+ # extensions: xlsx, png, py, else ignore
61
+ match extension:
62
+ case "xlsx":
63
+ if (
64
+ response.headers.get("Content-Type")
65
+ == "application/octet-stream"
66
+ ):
67
+ logging.info(f"Processing Excel file: {file_name}")
68
+ return pd.read_excel(response.content).to_json(), "xlsx"
69
+ case "png":
70
+ if response.headers.get("Content-Type") == "image/png":
71
+ logging.info(f"Processing image file: {file_name}")
72
+ return Image.open(BytesIO(response.content)).convert("RGB"), "png"
73
+ case "py":
74
+ if response.headers.get("Content-Type", "").startswith(
75
+ "text/x-python"
76
+ ):
77
+ logging.info(f"Processing Python file: {file_name}")
78
+ return response.content.decode(
79
+ "utf-8"
80
+ ), "py" # Load Python file if needed
81
+ except requests.exceptions.RequestException as e:
82
+ logging.error(f"Error fetching file for task {task_id}: {e}")
83
+ raise e
84
+ except Exception as e:
85
+ logging.error(
86
+ f"An unexpected error occurred fetching file for task {task_id}: {e}"
87
+ )
88
+ raise e
89
+ return None, None
90
+ return None, None # Always return a tuple
91
+
92
+
93
+ def run_agent(agent, questions_data):
94
+ results_log = []
95
+ answers_payload = []
96
+ logging.info(f"Running agent on {len(questions_data)} questions...")
97
+ for item in questions_data:
98
+ payload, log_item = run_agent_on_question(agent, item)
99
+ if payload is not None:
100
+ answers_payload.append(payload)
101
+ if log_item is not None:
102
+ results_log.append(log_item)
103
+ if not answers_payload:
104
+ logging.info("Agent did not produce any answers to submit.")
105
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
106
+ return answers_payload, pd.DataFrame(results_log)
107
+
108
+
109
+ def run_agent_on_question(agent: BasicAgent, question):
110
+ """
111
+ Runs the agent on a single question and returns the answer.
112
+ """
113
+ task_id = question.get("task_id")
114
+ question_text = question.get("question")
115
+ content = question.get("file_content")
116
+ content_type = question.get("file_type")
117
+ if not task_id or question_text is None:
118
+ logging.warning(f"Skipping item with missing task_id or question: {question}")
119
+ return None, None
120
+ try:
121
+ submitted_answer = agent(question_text, content=content, content_type=content_type)
122
+ return (
123
+ {"task_id": task_id, "submitted_answer": submitted_answer},
124
+ {
125
+ "Task ID": task_id,
126
+ "Question": question_text,
127
+ "Submitted Answer": submitted_answer,
128
+ },
129
+ )
130
+ except Exception as e:
131
+ logging.error(f"Error running agent on task {task_id}: {e}")
132
+ return (
133
+ {
134
+ "Task ID": task_id,
135
+ "Question": question_text,
136
+ "Submitted Answer": f"AGENT ERROR: {e}",
137
+ },
138
+ None,
139
+ )
pyproject.toml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "final-assignment-template"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "chess>=1.11.2",
9
+ "google-genai>=1.23.0",
10
+ "gradio[oauth]>=5.34.1",
11
+ "langfuse>=3.0.7",
12
+ "openpyxl>=3.1.5",
13
+ "pandas>=2.3.0",
14
+ "requests>=2.32.4",
15
+ "ruff>=0.12.0",
16
+ "smolagents[audio,litellm,openai,telemetry,toolkit,transformers]>=1.18.0",
17
+ "transformers>=4.53.0",
18
+ "wikipedia>=1.4.0",
19
+ "wikipedia-api>=0.8.1",
20
+ ]
requirements.txt CHANGED
The diff for this file is too large to render. See raw diff
 
uv.lock ADDED
The diff for this file is too large to render. See raw diff