tatianija commited on
Commit
3164d5a
·
verified ·
1 Parent(s): ec0fdee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +268 -113
app.py CHANGED
@@ -5,14 +5,19 @@ import inspect
5
  import time
6
  import pandas as pd
7
  from smolagents import DuckDuckGoSearchTool
8
- # (Keep Constants as is)
 
 
 
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
- # --- Basic Agent Definition ---
13
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
14
-
 
15
 
 
16
  class BasicAgent:
17
  def __init__(self, debug: bool = False):
18
  self.search = DuckDuckGoSearchTool()
@@ -32,7 +37,7 @@ class BasicAgent:
32
  time.sleep(1)
33
  results = self.search(question)
34
 
35
- # Use truthiness check and early return
36
  if not results:
37
  return "No results found for that query."
38
 
@@ -62,89 +67,180 @@ class BasicAgent:
62
 
63
  return answer
64
 
65
- def run_and_submit_all( profile: gr.OAuthProfile | None):
66
  """
67
- Fetches all questions, runs the BasicAgent on them, submits all answers,
68
- and displays the results.
69
  """
70
- # --- Determine HF Space Runtime URL and Repo URL ---
71
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
72
-
73
- if profile:
74
- username= f"{profile.username}"
75
- print(f"User logged in: {username}")
76
- else:
77
- print("User not logged in.")
78
- return "Please Login to Hugging Face with the button.", None
79
-
80
  api_url = DEFAULT_API_URL
81
  questions_url = f"{api_url}/questions"
82
- submit_url = f"{api_url}/submit"
83
-
84
- # 1. Instantiate Agent ( modify this part to create your agent)
85
- try:
86
- agent = BasicAgent()
87
- except Exception as e:
88
- print(f"Error instantiating agent: {e}")
89
- return f"Error initializing agent: {e}", None
90
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
91
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
92
- print(agent_code)
93
-
94
- # 2. Fetch Questions
95
  print(f"Fetching questions from: {questions_url}")
96
  try:
97
  response = requests.get(questions_url, timeout=15)
98
  response.raise_for_status()
99
  questions_data = response.json()
 
100
  if not questions_data:
101
- print("Fetched questions list is empty.")
102
- return "Fetched questions list is empty or invalid format.", None
103
- print(f"Fetched {len(questions_data)} questions.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  except requests.exceptions.RequestException as e:
105
- print(f"Error fetching questions: {e}")
106
  return f"Error fetching questions: {e}", None
107
- except requests.exceptions.JSONDecodeError as e:
108
- print(f"Error decoding JSON response from questions endpoint: {e}")
109
- print(f"Response text: {response.text[:500]}")
110
- return f"Error decoding server response for questions: {e}", None
111
  except Exception as e:
112
- print(f"An unexpected error occurred fetching questions: {e}")
113
- return f"An unexpected error occurred fetching questions: {e}", None
114
-
115
- # 3. Run your Agent
116
- results_log = []
117
- answers_payload = []
118
- print(f"Running agent on {len(questions_data)} questions...")
119
- for item in questions_data:
120
- task_id = item.get("task_id")
121
- question_text = item.get("question")
122
- if not task_id or question_text is None:
123
- print(f"Skipping item with missing task_id or question: {item}")
124
- continue
125
- try:
126
- submitted_answer = agent(question_text)
127
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
128
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
129
- except Exception as e:
130
- print(f"Error running agent on task {task_id}: {e}")
131
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
132
 
133
- if not answers_payload:
134
- print("Agent did not produce any answers to submit.")
135
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
- # 4. Prepare Submission
138
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
139
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
140
- print(status_update)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
- # 5. Submit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
 
144
  try:
145
  response = requests.post(submit_url, json=submission_data, timeout=60)
146
  response.raise_for_status()
147
  result_data = response.json()
 
148
  final_status = (
149
  f"Submission Successful!\n"
150
  f"User: {result_data.get('username')}\n"
@@ -152,73 +248,132 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
152
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
153
  f"Message: {result_data.get('message', 'No message received.')}"
154
  )
155
- print("Submission successful.")
 
 
 
 
 
 
 
 
 
156
  results_df = pd.DataFrame(results_log)
157
  return final_status, results_df
 
158
  except requests.exceptions.HTTPError as e:
159
  error_detail = f"Server responded with status {e.response.status_code}."
160
  try:
161
  error_json = e.response.json()
162
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
163
- except requests.exceptions.JSONDecodeError:
164
  error_detail += f" Response: {e.response.text[:500]}"
165
- status_message = f"Submission Failed: {error_detail}"
166
- print(status_message)
167
- results_df = pd.DataFrame(results_log)
168
- return status_message, results_df
169
  except requests.exceptions.Timeout:
170
- status_message = "Submission Failed: The request timed out."
171
- print(status_message)
172
- results_df = pd.DataFrame(results_log)
173
- return status_message, results_df
174
- except requests.exceptions.RequestException as e:
175
- status_message = f"Submission Failed: Network error - {e}"
176
- print(status_message)
177
- results_df = pd.DataFrame(results_log)
178
- return status_message, results_df
179
  except Exception as e:
180
- status_message = f"An unexpected error occurred during submission: {e}"
181
- print(status_message)
182
- results_df = pd.DataFrame(results_log)
183
- return status_message, results_df
184
-
185
 
186
- # --- Build Gradio Interface using Blocks ---
187
- with gr.Blocks() as demo:
188
- gr.Markdown("# Basic Agent Evaluation Runner")
 
 
 
 
 
 
 
 
 
 
189
  gr.Markdown(
190
  """
191
- **Instructions:**
192
-
193
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
194
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
195
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
196
-
 
 
 
 
 
 
 
 
 
197
  ---
198
- **Disclaimers:**
199
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
200
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
201
  """
202
  )
203
 
204
- gr.LoginButton()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
- run_button = gr.Button("Run Evaluation & Submit All Answers")
 
 
 
 
 
 
 
 
 
207
 
208
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
209
- # Removed max_rows=10 from DataFrame constructor
210
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
 
211
 
212
- run_button.click(
213
- fn=run_and_submit_all,
214
- outputs=[status_output, results_table]
 
 
215
  )
216
 
217
  if __name__ == "__main__":
218
- print("\n" + "-"*30 + " App Starting " + "-"*30)
219
- # Check for SPACE_HOST and SPACE_ID at startup for information
220
  space_host_startup = os.getenv("SPACE_HOST")
221
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
222
 
223
  if space_host_startup:
224
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -226,14 +381,14 @@ if __name__ == "__main__":
226
  else:
227
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
228
 
229
- if space_id_startup: # Print repo URLs if SPACE_ID is found
230
  print(f"✅ SPACE_ID found: {space_id_startup}")
231
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
232
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
233
  else:
234
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
235
 
236
- print("-"*(60 + len(" App Starting ")) + "\n")
237
 
238
- print("Launching Gradio Interface for Basic Agent Evaluation...")
239
  demo.launch(debug=True, share=False)
 
5
  import time
6
  import pandas as pd
7
  from smolagents import DuckDuckGoSearchTool
8
+ import threading
9
+ from typing import Dict, List, Optional, Tuple
10
+ import json
11
+
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
+ # --- Global Cache for Answers ---
16
+ cached_answers = {}
17
+ cached_questions = []
18
+ processing_status = {"is_processing": False, "progress": 0, "total": 0}
19
 
20
+ # --- Basic Agent Definition ---
21
  class BasicAgent:
22
  def __init__(self, debug: bool = False):
23
  self.search = DuckDuckGoSearchTool()
 
37
  time.sleep(1)
38
  results = self.search(question)
39
 
40
+ # Use truthfulness check and early return
41
  if not results:
42
  return "No results found for that query."
43
 
 
67
 
68
  return answer
69
 
70
+ def fetch_questions() -> Tuple[str, Optional[pd.DataFrame]]:
71
  """
72
+ Fetch questions from the API and cache them.
 
73
  """
74
+ global cached_questions
75
+
 
 
 
 
 
 
 
 
76
  api_url = DEFAULT_API_URL
77
  questions_url = f"{api_url}/questions"
78
+
 
 
 
 
 
 
 
 
 
 
 
 
79
  print(f"Fetching questions from: {questions_url}")
80
  try:
81
  response = requests.get(questions_url, timeout=15)
82
  response.raise_for_status()
83
  questions_data = response.json()
84
+
85
  if not questions_data:
86
+ return "Fetched questions list is empty.", None
87
+
88
+ cached_questions = questions_data
89
+
90
+ # Create DataFrame for display
91
+ display_data = []
92
+ for item in questions_data:
93
+ display_data.append({
94
+ "Task ID": item.get("task_id", "Unknown"),
95
+ "Question": item.get("question", "")
96
+ })
97
+
98
+ df = pd.DataFrame(display_data)
99
+ status_msg = f"Successfully fetched {len(questions_data)} questions. Ready to generate answers."
100
+
101
+ return status_msg, df
102
+
103
  except requests.exceptions.RequestException as e:
 
104
  return f"Error fetching questions: {e}", None
 
 
 
 
105
  except Exception as e:
106
+ return f"An unexpected error occurred: {e}", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ def generate_answers_async(progress_callback=None):
109
+ """
110
+ Generate answers for all cached questions asynchronously.
111
+ """
112
+ global cached_answers, processing_status
113
+
114
+ if not cached_questions:
115
+ return "No questions available. Please fetch questions first."
116
+
117
+ processing_status["is_processing"] = True
118
+ processing_status["progress"] = 0
119
+ processing_status["total"] = len(cached_questions)
120
+
121
+ try:
122
+ agent = BasicAgent()
123
+ cached_answers = {}
124
+
125
+ for i, item in enumerate(cached_questions):
126
+ if not processing_status["is_processing"]: # Check if cancelled
127
+ break
128
+
129
+ task_id = item.get("task_id")
130
+ question_text = item.get("question")
131
+
132
+ if not task_id or question_text is None:
133
+ continue
134
+
135
+ try:
136
+ answer = agent(question_text)
137
+ cached_answers[task_id] = {
138
+ "question": question_text,
139
+ "answer": answer
140
+ }
141
+ except Exception as e:
142
+ cached_answers[task_id] = {
143
+ "question": question_text,
144
+ "answer": f"AGENT ERROR: {e}"
145
+ }
146
+
147
+ processing_status["progress"] = i + 1
148
+ if progress_callback:
149
+ progress_callback(i + 1, len(cached_questions))
150
+
151
+ except Exception as e:
152
+ print(f"Error in generate_answers_async: {e}")
153
+ finally:
154
+ processing_status["is_processing"] = False
155
 
156
+ def start_answer_generation():
157
+ """
158
+ Start the answer generation process in a separate thread.
159
+ """
160
+ if processing_status["is_processing"]:
161
+ return "Answer generation is already in progress.", None
162
+
163
+ if not cached_questions:
164
+ return "No questions available. Please fetch questions first.", None
165
+
166
+ # Start generation in background thread
167
+ thread = threading.Thread(target=generate_answers_async)
168
+ thread.daemon = True
169
+ thread.start()
170
+
171
+ return "Answer generation started. Check progress below.", None
172
+
173
+ def get_generation_progress():
174
+ """
175
+ Get the current progress of answer generation.
176
+ """
177
+ if not processing_status["is_processing"] and processing_status["progress"] == 0:
178
+ return "Not started", None
179
+
180
+ if processing_status["is_processing"]:
181
+ progress = processing_status["progress"]
182
+ total = processing_status["total"]
183
+ status_msg = f"Generating answers... {progress}/{total} completed"
184
+ return status_msg, None
185
+ else:
186
+ # Generation completed
187
+ if cached_answers:
188
+ # Create DataFrame with results
189
+ display_data = []
190
+ for task_id, data in cached_answers.items():
191
+ display_data.append({
192
+ "Task ID": task_id,
193
+ "Question": data["question"][:100] + "..." if len(data["question"]) > 100 else data["question"],
194
+ "Generated Answer": data["answer"][:200] + "..." if len(data["answer"]) > 200 else data["answer"]
195
+ })
196
+
197
+ df = pd.DataFrame(display_data)
198
+ status_msg = f"Answer generation completed! {len(cached_answers)} answers ready for submission."
199
+ return status_msg, df
200
+ else:
201
+ return "Answer generation completed but no answers were generated.", None
202
 
203
+ def submit_cached_answers(profile: gr.OAuthProfile | None):
204
+ """
205
+ Submit the cached answers to the evaluation API.
206
+ """
207
+ global cached_answers
208
+
209
+ if not profile:
210
+ return "Please log in to Hugging Face first.", None
211
+
212
+ if not cached_answers:
213
+ return "No cached answers available. Please generate answers first.", None
214
+
215
+ username = profile.username
216
+ space_id = os.getenv("SPACE_ID")
217
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown"
218
+
219
+ # Prepare submission payload
220
+ answers_payload = []
221
+ for task_id, data in cached_answers.items():
222
+ answers_payload.append({
223
+ "task_id": task_id,
224
+ "submitted_answer": data["answer"]
225
+ })
226
+
227
+ submission_data = {
228
+ "username": username.strip(),
229
+ "agent_code": agent_code,
230
+ "answers": answers_payload
231
+ }
232
+
233
+ # Submit to API
234
+ api_url = DEFAULT_API_URL
235
+ submit_url = f"{api_url}/submit"
236
+
237
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
238
+
239
  try:
240
  response = requests.post(submit_url, json=submission_data, timeout=60)
241
  response.raise_for_status()
242
  result_data = response.json()
243
+
244
  final_status = (
245
  f"Submission Successful!\n"
246
  f"User: {result_data.get('username')}\n"
 
248
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
249
  f"Message: {result_data.get('message', 'No message received.')}"
250
  )
251
+
252
+ # Create results DataFrame
253
+ results_log = []
254
+ for task_id, data in cached_answers.items():
255
+ results_log.append({
256
+ "Task ID": task_id,
257
+ "Question": data["question"],
258
+ "Submitted Answer": data["answer"]
259
+ })
260
+
261
  results_df = pd.DataFrame(results_log)
262
  return final_status, results_df
263
+
264
  except requests.exceptions.HTTPError as e:
265
  error_detail = f"Server responded with status {e.response.status_code}."
266
  try:
267
  error_json = e.response.json()
268
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
269
+ except:
270
  error_detail += f" Response: {e.response.text[:500]}"
271
+ return f"Submission Failed: {error_detail}", None
272
+
 
 
273
  except requests.exceptions.Timeout:
274
+ return "Submission Failed: The request timed out.", None
275
+
 
 
 
 
 
 
 
276
  except Exception as e:
277
+ return f"Submission Failed: {e}", None
 
 
 
 
278
 
279
+ def clear_cache():
280
+ """
281
+ Clear all cached data.
282
+ """
283
+ global cached_answers, cached_questions, processing_status
284
+ cached_answers = {}
285
+ cached_questions = []
286
+ processing_status = {"is_processing": False, "progress": 0, "total": 0}
287
+ return "Cache cleared successfully.", None
288
+
289
+ # --- Enhanced Gradio Interface ---
290
+ with gr.Blocks(title="Enhanced Agent Evaluation Runner") as demo:
291
+ gr.Markdown("# Enhanced Agent Evaluation Runner with Answer Caching")
292
  gr.Markdown(
293
  """
294
+ **Enhanced Instructions:**
295
+
296
+ 1. **Clone and Modify**: Clone this space and modify the agent logic as needed.
297
+ 2. **Login**: Log in to your Hugging Face account.
298
+ 3. **Fetch Questions**: Load all questions from the evaluation API.
299
+ 4. **Generate Answers**: Create answers for all questions (runs in background).
300
+ 5. **Review Results**: Check the generated answers before submission.
301
+ 6. **Submit**: Submit your answers when ready.
302
+
303
+ **Benefits of this approach:**
304
+ - ✅ Faster user feedback (separate steps)
305
+ - ✅ Ability to review answers before submission
306
+ - ✅ Progress tracking during answer generation
307
+ - ✅ Cache management for multiple runs
308
+
309
  ---
 
 
 
310
  """
311
  )
312
 
313
+ with gr.Row():
314
+ gr.LoginButton()
315
+ clear_btn = gr.Button("Clear Cache", variant="secondary")
316
+
317
+ with gr.Tab("Step 1: Fetch Questions"):
318
+ gr.Markdown("### Fetch Questions from API")
319
+ fetch_btn = gr.Button("Fetch Questions", variant="primary")
320
+ fetch_status = gr.Textbox(label="Fetch Status", lines=2, interactive=False)
321
+ questions_table = gr.DataFrame(label="Available Questions", wrap=True)
322
+
323
+ fetch_btn.click(
324
+ fn=fetch_questions,
325
+ outputs=[fetch_status, questions_table]
326
+ )
327
+
328
+ with gr.Tab("Step 2: Generate Answers"):
329
+ gr.Markdown("### Generate Answers (Background Processing)")
330
+
331
+ with gr.Row():
332
+ generate_btn = gr.Button("Start Answer Generation", variant="primary")
333
+ refresh_btn = gr.Button("Refresh Progress", variant="secondary")
334
+
335
+ generation_status = gr.Textbox(label="Generation Status", lines=2, interactive=False)
336
+ answers_preview = gr.DataFrame(label="Generated Answers Preview", wrap=True)
337
+
338
+ generate_btn.click(
339
+ fn=start_answer_generation,
340
+ outputs=[generation_status, answers_preview]
341
+ )
342
+
343
+ refresh_btn.click(
344
+ fn=get_generation_progress,
345
+ outputs=[generation_status, answers_preview]
346
+ )
347
 
348
+ with gr.Tab("Step 3: Submit Results"):
349
+ gr.Markdown("### Submit Generated Answers")
350
+ submit_btn = gr.Button("Submit Cached Answers", variant="primary")
351
+ submission_status = gr.Textbox(label="Submission Status", lines=5, interactive=False)
352
+ final_results = gr.DataFrame(label="Final Submission Results", wrap=True)
353
+
354
+ submit_btn.click(
355
+ fn=submit_cached_answers,
356
+ outputs=[submission_status, final_results]
357
+ )
358
 
359
+ # Clear cache functionality
360
+ clear_btn.click(
361
+ fn=clear_cache,
362
+ outputs=[fetch_status, questions_table]
363
+ )
364
 
365
+ # Auto-refresh progress every 5 seconds when generation is active
366
+ demo.load(
367
+ fn=get_generation_progress,
368
+ outputs=[generation_status, answers_preview],
369
+ every=5
370
  )
371
 
372
  if __name__ == "__main__":
373
+ print("\n" + "-"*30 + " Enhanced App Starting " + "-"*30)
374
+
375
  space_host_startup = os.getenv("SPACE_HOST")
376
+ space_id_startup = os.getenv("SPACE_ID")
377
 
378
  if space_host_startup:
379
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
381
  else:
382
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
383
 
384
+ if space_id_startup:
385
  print(f"✅ SPACE_ID found: {space_id_startup}")
386
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
387
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
388
  else:
389
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
390
 
391
+ print("-"*(60 + len(" Enhanced App Starting ")) + "\n")
392
 
393
+ print("Launching Enhanced Gradio Interface...")
394
  demo.launch(debug=True, share=False)