Yago Bolivar commited on
Commit
b785644
·
1 Parent(s): 42ae606

feat: implement run_and_submit_all function for agent evaluation and submission with Gradio UI integration

Browse files
Files changed (1) hide show
  1. app.py +173 -15
app.py CHANGED
@@ -1,6 +1,11 @@
1
  from smolagents import CodeAgent, HfApiModel
2
  from Gradio_UI import GradioUI
3
  import yaml
 
 
 
 
 
4
 
5
  # Import tool CLASSES from the src directory
6
  from src.final_answer_tool import FinalAnswerTool
@@ -82,21 +87,174 @@ agent = EnhancedCodeAgent(
82
  prompt_templates=prompt_templates
83
  )
84
 
85
- # Launch the Gradio UI
86
- if __name__ == '__main__':
87
- print("Launching Gradio UI...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  try:
89
- ui = GradioUI(agent)
90
- ui.launch()
 
91
  except Exception as e:
92
- print(f"Failed to launch Gradio UI: {e}")
93
- print("Attempting to run agent with a sample query directly...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  try:
95
- # This is a placeholder for a direct test if Gradio fails.
96
- # You might want to load a question from common_questions.json here.
97
- sample_query = "What is the capital of France and reverse the answer?"
98
- print(f"Testing agent with query: {sample_query}")
99
- response = agent.run(sample_query)
100
- print(f"Agent response: {response}")
101
- except Exception as agent_e:
102
- print(f"Error running agent directly: {agent_e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from smolagents import CodeAgent, HfApiModel
2
  from Gradio_UI import GradioUI
3
  import yaml
4
+ import os
5
+ import requests
6
+ import pandas as pd
7
+ import gradio as gr
8
+ import time
9
 
10
  # Import tool CLASSES from the src directory
11
  from src.final_answer_tool import FinalAnswerTool
 
87
  prompt_templates=prompt_templates
88
  )
89
 
90
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
91
+ """
92
+ Fetches all questions, runs the agent on them, submits answers,
93
+ and displays the results.
94
+ """
95
+ space_id = os.getenv("SPACE_ID")
96
+
97
+ if profile:
98
+ username = f"{profile.username}"
99
+ print(f"User logged in: {username}")
100
+ else:
101
+ print("User not logged in.")
102
+ return "Please Login to Hugging Face with the button.", None
103
+
104
+ api_url = DEFAULT_API_URL
105
+ questions_url = f"{api_url}/questions"
106
+ submit_url = f"{api_url}/submit"
107
+
108
+ # 1. Use existing agent
109
  try:
110
+ # agent is already instantiated globally
111
+ if not agent:
112
+ return "Error: Agent not initialized", None
113
  except Exception as e:
114
+ print(f"Error accessing agent: {e}")
115
+ return f"Error accessing agent: {e}", None
116
+
117
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
118
+ print(agent_code)
119
+
120
+ # 2. Fetch Questions
121
+ print(f"Fetching questions from: {questions_url}")
122
+ try:
123
+ response = requests.get(questions_url, timeout=15)
124
+ response.raise_for_status()
125
+ questions_data = response.json()
126
+ if not questions_data:
127
+ print("Fetched questions list is empty.")
128
+ return "Fetched questions list is empty or invalid format.", None
129
+ print(f"Fetched {len(questions_data)} questions.")
130
+ except requests.exceptions.RequestException as e:
131
+ print(f"Error fetching questions: {e}")
132
+ return f"Error fetching questions: {e}", None
133
+ except requests.exceptions.JSONDecodeError as e:
134
+ print(f"Error decoding JSON response from questions endpoint: {e}")
135
+ print(f"Response text: {response.text[:500]}")
136
+ return f"Error decoding server response for questions: {e}", None
137
+ except Exception as e:
138
+ print(f"An unexpected error occurred fetching questions: {e}")
139
+ return f"An unexpected error occurred fetching questions: {e}", None
140
+
141
+ # 3. Run your Agent
142
+ results_log = []
143
+ answers_payload = []
144
+ print(f"Running agent on {len(questions_data)} questions...")
145
+ for item in questions_data:
146
+ task_id = item.get("task_id")
147
+ question_text = item.get("question")
148
+ if not task_id or question_text is None:
149
+ print(f"Skipping item with missing task_id or question: {item}")
150
+ continue
151
+
152
+ try:
153
+ submitted_answer = agent(question_text)
154
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
155
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
156
+ except Exception as e:
157
+ print(f"Error running agent on task {task_id}: {e}")
158
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
159
+
160
+ if not answers_payload:
161
+ print("Agent did not produce any answers to submit.")
162
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
163
+
164
+ # 4. Prepare Submission
165
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
166
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
167
+ print(status_update)
168
+
169
+ # 5. Submit
170
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
171
+ try:
172
+ response = requests.post(submit_url, json=submission_data, timeout=60)
173
+ response.raise_for_status()
174
+ result_data = response.json()
175
+ final_status = (
176
+ f"Submission Successful!\n"
177
+ f"User: {result_data.get('username')}\n"
178
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
179
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
180
+ f"Message: {result_data.get('message', 'No message received.')}"
181
+ )
182
+ print("Submission successful.")
183
+ results_df = pd.DataFrame(results_log)
184
+ return final_status, results_df
185
+ except requests.exceptions.HTTPError as e:
186
+ error_detail = f"Server responded with status {e.response.status_code}."
187
  try:
188
+ error_json = e.response.json()
189
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
190
+ except requests.exceptions.JSONDecodeError:
191
+ error_detail += f" Response: {e.response.text[:500]}"
192
+ status_message = f"Submission Failed: {error_detail}"
193
+ print(status_message)
194
+ results_df = pd.DataFrame(results_log)
195
+ return status_message, results_df
196
+ except requests.exceptions.Timeout:
197
+ status_message = "Submission Failed: The request timed out."
198
+ print(status_message)
199
+ results_df = pd.DataFrame(results_log)
200
+ return status_message, results_df
201
+ except requests.exceptions.RequestException as e:
202
+ status_message = f"Submission Failed: Network error - {e}"
203
+ print(status_message)
204
+ results_df = pd.DataFrame(results_log)
205
+ return status_message, results_df
206
+ except Exception as e:
207
+ status_message = f"An unexpected error occurred during submission: {e}"
208
+ print(status_message)
209
+ results_df = pd.DataFrame(results_log)
210
+ return status_message, results_df
211
+
212
+ # Launch the Gradio UI
213
+ if __name__ == '__main__':
214
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
215
+ # Check for SPACE_HOST and SPACE_ID at startup for information
216
+ space_host_startup = os.getenv("SPACE_HOST")
217
+ space_id_startup = os.getenv("SPACE_ID")
218
+
219
+ if space_host_startup:
220
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
221
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
222
+ else:
223
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
224
+
225
+ if space_id_startup:
226
+ print(f"✅ SPACE_ID found: {space_id_startup}")
227
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
228
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
229
+ else:
230
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
231
+
232
+ print("-"*(60 + len(" App Starting ")) + "\n")
233
+
234
+ # Build Gradio Interface using Blocks
235
+ with gr.Blocks() as demo:
236
+ gr.Markdown("# Enhanced Agent Evaluation Runner")
237
+ gr.Markdown(
238
+ """
239
+ **Instructions:**
240
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc...
241
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
242
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
243
+ ---
244
+ **Disclaimers:**
245
+ Once clicking on the "submit button, it can take quite some time (this is the time for the agent to go through all the questions).
246
+ """
247
+ )
248
+
249
+ gr.LoginButton()
250
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
251
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
252
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
253
+
254
+ run_button.click(
255
+ fn=run_and_submit_all,
256
+ outputs=[status_output, results_table]
257
+ )
258
+
259
+ print("Launching Gradio Interface...")
260
+ demo.launch(debug=True, share=False)