CanerCoban commited on
Commit
e4d5e70
·
verified ·
1 Parent(s): af0c0fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -3
app.py CHANGED
@@ -177,19 +177,180 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
177
  results_log = [] # Used to make a DataFrame for UI display (question + answer)
178
  answers_payload = [] # sent to grading API in the final submission
179
 
 
180
  for item in questions_data:
 
181
  task_id = item.get("task_id")
 
182
  question_text = item.get("question")
183
  if not task_id or question_text is None:
184
  print(f"Skipping item with missing task_id or question: {item}")
185
  continue
 
 
186
  try:
187
  submitted_answer = agent(question_text)
188
  answers_payload.append({"task_id": task_id, "submmitted_answer": submitted_answer})
189
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
190
  except Exception as e:
191
  print(f"Erron running agent on task {task_id}: {e}")
192
- results_log.append
193
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  results_log = [] # Used to make a DataFrame for UI display (question + answer)
178
  answers_payload = [] # sent to grading API in the final submission
179
 
180
+ # Loops through each question:
181
  for item in questions_data:
182
+ # Extracts task_id
183
  task_id = item.get("task_id")
184
+ # Extracts the question
185
  question_text = item.get("question")
186
  if not task_id or question_text is None:
187
  print(f"Skipping item with missing task_id or question: {item}")
188
  continue
189
+ # Use your agent (__call__) to answer the question
190
+ # Logs both result and metadata
191
  try:
192
  submitted_answer = agent(question_text)
193
  answers_payload.append({"task_id": task_id, "submmitted_answer": submitted_answer})
194
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
195
+ # On failure (bad formatting, model error, etc), logs an error message in the results.
196
  except Exception as e:
197
  print(f"Erron running agent on task {task_id}: {e}")
198
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
199
+ if not answers_payload:
200
+ print("Agent did not produce any asnwer to submit.")
201
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
202
+ # 4. Prepare submission
203
+ # A JSON-safe dict with everything the backend expects: Username (from loging), Code link (for peer review or reproducibility), All answers in the required format
204
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
205
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
206
+ print(status_update)
207
+
208
+ # 5. Submit
209
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
210
+ try:
211
+ # submits the payload to the grading server.
212
+ response = requests.post(submit_url, json=submission_data, timeout=60)
213
+
214
+ response.raise_for_status()
215
+ # if succesful;
216
+ result_data = response.json()
217
+ # Parse final score
218
+ final_status = (
219
+ f"Submission Succesful!\n"
220
+ f"User: {result_data.get('username')}\n"
221
+ # Final score
222
+ f"Overall Score: {result_data.get('score','N/A')}%"
223
+ # Number of correct answers
224
+ f"({result_data.get('correct_count','?')}/{result_data.get('total_attempted','?')} correct)\n"
225
+ # Backend message
226
+ f"Message: {result_data.get('message','No message received.')}"
227
+ )
228
+ print("Submission succesful.")
229
+ results_df = pd.DataFrame(results_log)
230
+ # Return a user-friendly summary string and a Pandas Dataframe to display in Gradio
231
+ return final_status, results_df
232
+ # Handles possible errors
233
+ # Catchees and logs:
234
+ # - HTTP errors
235
+ except requests.exceptions.HTTPError as e:
236
+ error_detail = f"Server responded with status {e.response.status_code}."
237
+ try:
238
+ error_json = e.response.json()
239
+ error_detail += f"Detail: {error_json.get('detail',e.response.text)}"
240
+ # Unexpected server responses
241
+ except requests.exceptions.JSONDecodeError:
242
+ error_detail += f" Response: {e.response.text[:500]}"
243
+ status_message = f"Submission Failed: {error_detail}"
244
+ print(status_message)
245
+ results_df = pd.DataFrame(results_log)
246
+ return status_message, results_df
247
+ # - Timeout error
248
+ except requests.exceptions.Timeout:
249
+ status_message = "Submission Failed: The request timed out."
250
+ print(status_message)
251
+ results_df = pd.DataFrame(results_log)
252
+ return status_message, results_df
253
+ # Network issues
254
+ except Exception as e:
255
+ status_message = f"An unexpected error occurred during submission: {e}"
256
+ print(status_message)
257
+ results_df = pd.DataFrame(results_log)
258
+ # Ensure the return is still clean, with a Dataframe of what happened so far.
259
+ return status_message, results_df
260
 
261
+ # --- build Gradio Interface using Blocks ---
262
+ # Layout-based API
263
+ with gr.Blocks() as demo:
264
+ # Display the title
265
+ gr.Markdown("# Basic Agent Evaluation Runner")
266
+ # Display the instructions
267
+ gr.Markdown(
268
+ """
269
+ **Instructions:**
270
+
271
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
272
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
273
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
274
+
275
+ ---
276
+ **Disclaimers:**
277
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
278
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
279
+
280
+ """
281
+ )
282
+ # Hugging Face Login button - allows users to authenticate with Hugging Face OAuth.
283
+ # This is required for tracking who is submitting.
284
+ # It returns a profile object once logged in.
285
+ gr.LoginButton()
286
+
287
+ # Define a Button to Trigger the Agent Run
288
+ # When clicked ,Instantiate your BasicAgent, Fetch questions, Run the agent, Submit answers,Show results
289
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
290
+ # Output Display Components
291
+ # shows messages like “Submission Successful” or errors.
292
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
293
+
294
+ # Removed max_rows=10 from DataFrame constructor
295
+ # displays a log of all questions and answers in tabular form.
296
+ # Useful for transparency or debugging agent behavior.
297
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
298
+
299
+ # Connect Logic to the Button
300
+ # This is where everything connects together.
301
+ # Whent the button is clicked;
302
+ # 1. Runs run_and_submit_all(profile)
303
+ # 2. The login_button provides the gr.OAuthProfile
304
+ # 3. The return value (status + DataFrame) is sent to the Textbox and Dataframe.
305
+ run_button.click(
306
+ fn=run_and_submit_all,
307
+ outputs=[status_output, results_table]
308
+ )
309
+
310
+ # Entry point for the Python app.
311
+ ## controls what happens when the script is run directly (e.g. on HF Space or locally).
312
+ ### Purpose: main execution trigger
313
+ #### * Checks for environment setup (SPACE_HOST, SPACE_ID)
314
+ ##### * Provides useful diagnostics (like URLs)
315
+ ###### * Finally, it launches the Gradio app interface.
316
+ # A standard Python syntax to ensure the code only runs if the file is executed directly (not imported as module)
317
+ # Since app.py is the main file, this block is the app's entry point.
318
+ if __name__ == "__main__":
319
+ # Login app startup
320
+ # Pretty foramtion to indicate that the app is initializing.
321
+ # Outputs a visible header
322
+ print("\n" + "-"*30 + " App Starting " + "-"*30) # ------------------------------ App Starting ------------------------------
323
+ # Check for SPACE_HOST and SPACE_ID at startup for information
324
+ # Check for HF environment variables
325
+ # NOTE: These are automatically set when the app is deployed on Hugging Face Spaces.
326
+ space_host_startup = os.getenv("SPACE_HOST") # subdomain for the Space (e.g., my-agent-space)
327
+ space_id_startup = os.getenv("SPACE_ID") # repo path (e.g., username/space-name)
328
+
329
+ # Print SPACE_HOST info
330
+ # If found, it logs the public URL of your Space.
331
+ if space_host_startup:
332
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
333
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
334
+ else:
335
+ # If not found, the app might be running locally or in a non-Space environment.
336
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
337
+ # If found, it prints:
338
+ if space_id_startup:
339
+ # The repo homepage (good for credit/visibility)
340
+ print(f"✅ SPACE_ID found: {space_id_startup}")
341
+ # The repo tree (code browser)
342
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
343
+ # These links are often included in the final submission for review.
344
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
345
+ else:
346
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
347
+ # 🔹 Final log and UI launch
348
+ # Finishes the startup banner and logs a message that the UI is about to appear.
349
+ print("-"*(60 + len(" App Starting ")) + "\n")
350
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
351
+
352
+ # LAUNCH THE APP
353
+ # debug=True: Gradio will print extra logs (useful during development).
354
+ # share=False: disables Gradio's external link feature (you don’t need it on Hugging Face Spaces).
355
+ demo.launch(debug=True, share=False) # starts the Gradio interface.
356
+