|
from typing import Dict, List, Tuple |
|
import re |
|
import tempfile |
|
from pathlib import Path |
|
import pandas as pd |
|
import requests |
|
from agent import GaiaAgent |
|
from pandas import DataFrame |
|
|
|
|
|
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
QUESTIONS_URL = f"{DEFAULT_API_URL}/questions" |
|
SUBMIT_URL = f"{DEFAULT_API_URL}/submit" |
|
FILE_PATH = f"{DEFAULT_API_URL}/files/" |
|
|
|
|
|
|
|
def fetch_all_questions() -> Dict: |
|
"""Fetches all questions from the specified API endpoint. |
|
|
|
This function retrieves a list of questions from the API, handles potential errors |
|
such as network issues, invalid responses, or empty question lists, and returns |
|
the questions as a dictionary. |
|
|
|
Returns: |
|
Dict: A dictionary containing the questions data retrieved from the API. |
|
|
|
Raises: |
|
UserWarning: If there is an error fetching the questions, such as network issues, |
|
invalid JSON response, or an empty question list. The exception message |
|
provides details about the specific error encountered. |
|
""" |
|
print(f"Fetching questions from: {QUESTIONS_URL}") |
|
response = requests.get(QUESTIONS_URL, timeout=15) |
|
try: |
|
response.raise_for_status() |
|
questions_data = response.json() |
|
if not questions_data: |
|
print("Fetched questions list is empty.") |
|
raise UserWarning("Fetched questions list is empty or invalid format.") |
|
print(f"Fetched {len(questions_data)} questions.") |
|
return questions_data |
|
except requests.exceptions.RequestException as e: |
|
print(f"Error fetching questions: {e}") |
|
raise UserWarning(f"Error fetching questions: {e}") |
|
except requests.exceptions.JSONDecodeError as e: |
|
print(f"Error decoding JSON response from questions endpoint: {e}") |
|
print(f"Response text: {response.text[:500]}") |
|
raise UserWarning(f"Error decoding server response for questions: {e}") |
|
except Exception as e: |
|
print(f"An unexpected error occurred fetching questions: {e}") |
|
raise UserWarning(f"An unexpected error occurred fetching questions: {e}") |
|
|
|
|
|
def submit_answers(submission_data: dict, results_log: list) -> Tuple[str, DataFrame]: |
|
"""Submits answers to the scoring API and returns the submission status and results. |
|
|
|
This function sends the provided answers to the scoring API, handles potential errors |
|
such as network issues, server errors, or invalid responses, and returns a status |
|
message indicating the success or failure of the submission, along with a DataFrame |
|
containing the results log. |
|
|
|
Args: |
|
submission_data (dict): A dictionary containing the answers to be submitted. |
|
Expected to have a structure compatible with the scoring API. |
|
results_log (list): A list of dictionaries containing the results log. |
|
This log is converted to a Pandas DataFrame and returned. |
|
|
|
Returns: |
|
Tuple[str, DataFrame]: A tuple containing: |
|
- A status message (str) indicating the submission status and any relevant |
|
information or error messages. |
|
- A Pandas DataFrame containing the results log. |
|
|
|
""" |
|
try: |
|
response = requests.post(SUBMIT_URL, json=submission_data, timeout=60) |
|
response.raise_for_status() |
|
result_data = response.json() |
|
final_status = ( |
|
f"Submission Successful!\n" |
|
f"User: {result_data.get('username')}\n" |
|
f"Overall Score: {result_data.get('score', 'N/A')}% " |
|
f"({result_data.get('correct_count', '?')}/" |
|
f"{result_data.get('total_attempted', '?')} correct)\n" |
|
f"Message: {result_data.get('message', 'No message received.')}" |
|
) |
|
print("Submission successful.") |
|
results_df = pd.DataFrame(results_log) |
|
return final_status, results_df |
|
except requests.exceptions.HTTPError as e: |
|
error_detail = f"Server responded with status {e.response.status_code}." |
|
try: |
|
error_json = e.response.json() |
|
error_detail += f" Detail: {error_json.get('detail', e.response.text)}" |
|
except requests.exceptions.JSONDecodeError: |
|
error_detail += f" Response: {e.response.text[:500]}" |
|
status_message = f"Submission Failed: {error_detail}" |
|
print(status_message) |
|
results_df = pd.DataFrame(results_log) |
|
return status_message, results_df |
|
except requests.exceptions.Timeout: |
|
status_message = "Submission Failed: The request timed out." |
|
print(status_message) |
|
results_df = pd.DataFrame(results_log) |
|
return status_message, results_df |
|
except requests.exceptions.RequestException as e: |
|
status_message = f"Submission Failed: Network error - {e}" |
|
print(status_message) |
|
results_df = pd.DataFrame(results_log) |
|
return status_message, results_df |
|
except Exception as e: |
|
status_message = f"An unexpected error occurred during submission: {e}" |
|
print(status_message) |
|
results_df = pd.DataFrame(results_log) |
|
return status_message, results_df |
|
|
|
|
|
def run_agent( |
|
gaia_agent: GaiaAgent, questions_data: List[Dict] |
|
) -> Tuple[List[Dict], List[Dict]]: |
|
"""Runs the agent on a list of questions and returns the results and answers. |
|
|
|
This function iterates through a list of questions, runs the provided agent on each |
|
question, and collects the results and answers. It handles potential errors during |
|
agent execution and returns the results log and the answers payload. |
|
|
|
Args: |
|
gaia_agent (GaiaAgent): An instance of the GaiaAgent class, which is responsible for |
|
generating answers to the questions. |
|
questions_data (List[Dict]): A list of dictionaries, where each dictionary |
|
represents a question and contains at least the 'task_id' and 'question' keys. |
|
|
|
Returns: |
|
Tuple[List[Dict], List[Dict]]: A tuple containing: |
|
- A list of dictionaries representing the results log, where each dictionary |
|
contains the 'Task ID', 'Question', and 'Submitted Answer'. |
|
- A list of dictionaries representing the answers payload, where each dictionary |
|
contains the 'task_id' and 'submitted_answer'. |
|
""" |
|
results_log = [] |
|
answers_payload = [] |
|
|
|
print(f"🚀 Running agent on {len(questions_data)} questions...") |
|
for item in questions_data: |
|
task_id = item.get("task_id") |
|
question_text = item.get("question") |
|
question_text = process_file(task_id, question_text) |
|
if not task_id or question_text is None: |
|
print(f"⚠️ Skipping invalid item (missing task_id or question): {item}") |
|
continue |
|
try: |
|
submitted_answer = gaia_agent(task_id, question_text) |
|
answers_payload.append( |
|
{"task_id": task_id, "submitted_answer": submitted_answer} |
|
) |
|
except Exception as e: |
|
print(f"❌ Error running agent on task {task_id}: {e}") |
|
submitted_answer = f"AGENT ERROR: {e}" |
|
|
|
results_log.append( |
|
{ |
|
"Task ID": task_id, |
|
"Question": question_text, |
|
"Submitted Answer": submitted_answer, |
|
} |
|
) |
|
return results_log, answers_payload |
|
|
|
|
|
def process_file(task_id: str, question_text: str) -> str: |
|
""" |
|
Attempt to download a file associated with a task from the API. |
|
|
|
- If the file exists (HTTP 200), it is saved to a temp directory and the local file path is returned. |
|
- If no file is found (HTTP 404), returns None. |
|
- For all other HTTP errors, the exception is propagated to the caller. |
|
""" |
|
file_url = f"{FILE_PATH}{task_id}" |
|
|
|
try: |
|
response = requests.get(file_url, timeout=30) |
|
response.raise_for_status() |
|
except requests.exceptions.RequestException as exc: |
|
print(f"Exception in download_file>> {str(exc)}") |
|
return question_text |
|
|
|
|
|
content_disposition = response.headers.get("content-disposition", "") |
|
filename = task_id |
|
match = re.search(r'filename="([^"]+)"', content_disposition) |
|
if match: |
|
filename = match.group(1) |
|
|
|
|
|
temp_storage_dir = Path(tempfile.gettempdir()) / "gaia_cached_files" |
|
temp_storage_dir.mkdir(parents=True, exist_ok=True) |
|
|
|
file_path = temp_storage_dir / filename |
|
file_path.write_bytes(response.content) |
|
return ( |
|
f"{question_text}\n\n" |
|
f"---\n" |
|
f"A file was downloaded for this task and saved locally at:\n" |
|
f"{str(file_path)}\n" |
|
f"---\n\n" |
|
) |