File size: 3,792 Bytes
6ca25ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import os
from typing import Any, Dict, List, Tuple
import pandas as pd
import requests

def fetch_all_questions() -> List[Dict[str, Any]]:
    """Fetch all questions from the GAIA benchmark API."""
    try:
        # The actual endpoint will be provided by the GAIA benchmark
        api_url = os.getenv("GAIA_API_URL", "")
        if not api_url:
            raise ValueError("GAIA_API_URL environment variable not set")
            
        response = requests.get(f"{api_url}/questions")
        response.raise_for_status()
        
        questions = response.json()
        return questions
    except Exception as e:
        raise Exception(f"Failed to fetch questions: {str(e)}")

def run_agent(agent: Any, questions: List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
    """Run the agent on all questions and collect results.

    

    Args:

        agent: The GaiaAgent instance

        questions: List of question data from the API

        

    Returns:

        Tuple containing:

        - List of result logs for display

        - List of answer payloads for submission

    """
    results_log = []
    answers_payload = []
    
    for question in questions:
        question_id = question.get("id", "unknown")
        question_text = question.get("question", "")
        
        try:
            # Get answer from agent
            answer = agent.get_answer(question)
            
            # Log result
            result_entry = {
                "Question ID": question_id,
                "Question": question_text,
                "Answer": answer if answer else "No answer provided",
                "Status": "Success" if answer else "Failed"
            }
            results_log.append(result_entry)
            
            # Prepare submission payload if answer was generated
            if answer:
                answer_entry = {
                    "question_id": question_id,
                    "answer": answer
                }
                answers_payload.append(answer_entry)
                
        except Exception as e:
            # Log error
            result_entry = {
                "Question ID": question_id,
                "Question": question_text,
                "Answer": f"Error: {str(e)}",
                "Status": "Failed"
            }
            results_log.append(result_entry)
    
    return results_log, answers_payload

def submit_answers(submission_data: Dict[str, Any], results_log: List[Dict[str, Any]]) -> Tuple[str, pd.DataFrame]:
    """Submit answers to the GAIA benchmark API.

    

    Args:

        submission_data: Dictionary containing submission details

        results_log: List of result logs for display

        

    Returns:

        Tuple containing:

        - Status message string

        - DataFrame of results for display

    """
    try:
        # The actual endpoint will be provided by the GAIA benchmark
        api_url = os.getenv("GAIA_API_URL", "")
        if not api_url:
            raise ValueError("GAIA_API_URL environment variable not set")
            
        # Submit answers
        response = requests.post(
            f"{api_url}/submit",
            json=submission_data
        )
        response.raise_for_status()
        
        # Create DataFrame for display
        results_df = pd.DataFrame(results_log)
        
        # Return success message and results
        return "Answers submitted successfully!", results_df
        
    except Exception as e:
        # If submission fails, still show results but with error message
        results_df = pd.DataFrame(results_log)
        return f"Error submitting answers: {str(e)}", results_df