|
from typing import Dict, List, Tuple |
|
|
|
import pandas as pd |
|
import requests |
|
from gaia_agent import GaiaAgent |
|
from pandas import DataFrame |
|
|
|
|
|
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
QUESTIONS_URL = f"{DEFAULT_API_URL}/questions" |
|
SUBMIT_URL = f"{DEFAULT_API_URL}/submit" |
|
|
|
|
|
|
|
def fetch_all_questions() -> Dict: |
|
"""Fetches all questions from the specified API endpoint. |
|
|
|
This function retrieves a list of questions from the API, handles potential errors |
|
such as network issues, invalid responses, or empty question lists, and returns |
|
the questions as a dictionary. |
|
|
|
Returns: |
|
Dict: A dictionary containing the questions data retrieved from the API. |
|
|
|
Raises: |
|
UserWarning: If there is an error fetching the questions, such as network issues, |
|
invalid JSON response, or an empty question list. The exception message |
|
provides details about the specific error encountered. |
|
""" |
|
print(f"Fetching questions from: {QUESTIONS_URL}") |
|
response = requests.get(QUESTIONS_URL, timeout=15) |
|
try: |
|
response.raise_for_status() |
|
questions_data = response.json() |
|
if not questions_data: |
|
print("Fetched questions list is empty.") |
|
raise UserWarning("Fetched questions list is empty or invalid format.") |
|
print(f"Fetched {len(questions_data)} questions.") |
|
return questions_data |
|
except requests.exceptions.RequestException as e: |
|
print(f"Error fetching questions: {e}") |
|
raise UserWarning(f"Error fetching questions: {e}") |
|
except requests.exceptions.JSONDecodeError as e: |
|
print(f"Error decoding JSON response from questions endpoint: {e}") |
|
print(f"Response text: {response.text[:500]}") |
|
raise UserWarning(f"Error decoding server response for questions: {e}") |
|
except Exception as e: |
|
print(f"An unexpected error occurred fetching questions: {e}") |
|
raise UserWarning(f"An unexpected error occurred fetching questions: {e}") |
|
|
|
|
|
def submit_answers(submission_data: dict, results_log: list) -> Tuple[str, DataFrame]: |
|
"""Submits answers to the scoring API and returns the submission status and results. |
|
|
|
This function sends the provided answers to the scoring API, handles potential errors |
|
such as network issues, server errors, or invalid responses, and returns a status |
|
message indicating the success or failure of the submission, along with a DataFrame |
|
containing the results log. |
|
|
|
Args: |
|
submission_data (dict): A dictionary containing the answers to be submitted. |
|
Expected to have a structure compatible with the scoring API. |
|
results_log (list): A list of dictionaries containing the results log. |
|
This log is converted to a Pandas DataFrame and returned. |
|
|
|
Returns: |
|
Tuple[str, DataFrame]: A tuple containing: |
|
- A status message (str) indicating the submission status and any relevant |
|
information or error messages. |
|
- A Pandas DataFrame containing the results log. |
|
|
|
""" |
|
try: |
|
response = requests.post(SUBMIT_URL, json=submission_data, timeout=60) |
|
response.raise_for_status() |
|
result_data = response.json() |
|
final_status = ( |
|
f"Submission Successful!\n" |
|
f"User: {result_data.get('username')}\n" |
|
f"Overall Score: {result_data.get('score', 'N/A')}% " |
|
f"({result_data.get('correct_count', '?')}/" |
|
f"{result_data.get('total_attempted', '?')} correct)\n" |
|
f"Message: {result_data.get('message', 'No message received.')}" |
|
) |
|
print("Submission successful.") |
|
results_df = pd.DataFrame(results_log) |
|
return final_status, results_df |
|
except requests.exceptions.HTTPError as e: |
|
error_detail = f"Server responded with status {e.response.status_code}." |
|
try: |
|
error_json = e.response.json() |
|
error_detail += f" Detail: {error_json.get('detail', e.response.text)}" |
|
except requests.exceptions.JSONDecodeError: |
|
error_detail += f" Response: {e.response.text[:500]}" |
|
status_message = f"Submission Failed: {error_detail}" |
|
print(status_message) |
|
results_df = pd.DataFrame(results_log) |
|
return status_message, results_df |
|
except requests.exceptions.Timeout: |
|
status_message = "Submission Failed: The request timed out." |
|
print(status_message) |
|
results_df = pd.DataFrame(results_log) |
|
return status_message, results_df |
|
except requests.exceptions.RequestException as e: |
|
status_message = f"Submission Failed: Network error - {e}" |
|
print(status_message) |
|
results_df = pd.DataFrame(results_log) |
|
return status_message, results_df |
|
except Exception as e: |
|
status_message = f"An unexpected error occurred during submission: {e}" |
|
print(status_message) |
|
results_df = pd.DataFrame(results_log) |
|
return status_message, results_df |
|
|
|
|
|
def run_agent(agent: GaiaAgent, |
|
questions_data: List[Dict]) -> Tuple[List[Dict], List[Dict]]: |
|
"""Runs the agent on a list of questions and returns the results and answers. |
|
|
|
This function iterates through a list of questions, runs the provided agent on each |
|
question, and collects the results and answers. It handles potential errors during |
|
agent execution and returns the results log and the answers payload. |
|
|
|
Args: |
|
agent (GaiaAgent): An instance of the GaiaAgent class, which is responsible for |
|
generating answers to the questions. |
|
questions_data (List[Dict]): A list of dictionaries, where each dictionary |
|
represents a question and contains at least the 'task_id' and 'question' keys. |
|
|
|
Returns: |
|
Tuple[List[Dict], List[Dict]]: A tuple containing: |
|
- A list of dictionaries representing the results log, where each dictionary |
|
contains the 'Task ID', 'Question', and 'Submitted Answer'. |
|
- A list of dictionaries representing the answers payload, where each dictionary |
|
contains the 'task_id' and 'submitted_answer'. |
|
""" |
|
results_log = [] |
|
answers_payload = [] |
|
|
|
print(f"🚀 Running agent on {len(questions_data)} questions...") |
|
for item in questions_data: |
|
task_id = item.get("task_id") |
|
question_text = item.get("question") |
|
if not task_id or question_text is None: |
|
print(f"⚠️ Skipping invalid item (missing task_id or question): {item}") |
|
continue |
|
try: |
|
submitted_answer = agent(question_text) |
|
answers_payload.append( |
|
{"task_id": task_id, "submitted_answer": submitted_answer} |
|
) |
|
except Exception as e: |
|
print(f"❌ Error running agent on task {task_id}: {e}") |
|
submitted_answer = f"AGENT ERROR: {e}" |
|
|
|
results_log.append( |
|
{ |
|
"Task ID": task_id, |
|
"Question": question_text, |
|
"Submitted Answer": submitted_answer, |
|
} |
|
) |
|
return results_log, answers_payload |
|
|