File size: 2,935 Bytes
0a778e2
c216f4b
 
70b0f89
c216f4b
 
 
70b0f89
c216f4b
70b0f89
 
 
89e6d63
 
70b0f89
4b51ad9
 
 
 
70b0f89
c216f4b
70b0f89
 
 
89e6d63
4b51ad9
70b0f89
4b51ad9
 
 
 
70b0f89
4b51ad9
70b0f89
4b51ad9
70b0f89
89e6d63
179d569
 
 
89e6d63
 
179d569
70b0f89
6d36bae
4b51ad9
 
 
 
70b0f89
c216f4b
0a778e2
 
4b51ad9
c216f4b
0a778e2
4b51ad9
0a778e2
70b0f89
89e6d63
6d36bae
c216f4b
0a778e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c216f4b
0a778e2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89

import os
import requests
from smolagents import CodeAgent, tool, OpenAIServerModel

API_URL = "https://agents-course-unit4-scoring.hf.space"

@tool
def fetch_questions() -> list:
    """
    Fetch the full list of GAIA evaluation questions.

    Returns:
        list: A list of question dicts, each with 'task_id' and 'question'.
    """
    resp = requests.get(f"{API_URL}/questions", timeout=15)
    resp.raise_for_status()
    return resp.json()

@tool
def fetch_random_question() -> dict:
    """
    Fetch a single random GAIA question.

    Returns:
        dict: A dict with keys 'task_id' and 'question'.
    """
    resp = requests.get(f"{API_URL}/random-question", timeout=15)
    resp.raise_for_status()
    return resp.json()

@tool
def submit_answers(username: str, agent_code: str, answers: list) -> dict:
    """
    Submit the agent's answers to GAIA and get the scoring.

    Args:
        username (str): HF username for the submission.
        agent_code (str): URL of your Space repo for verification.
        answers (list): List of dicts, each with 'task_id' and 'submitted_answer'.

    Returns:
        dict: Contains 'score', 'correct_count', 'total_attempted', 'message', etc.
    """
    payload = {"username": username, "agent_code": agent_code, "answers": answers}
    resp = requests.post(f"{API_URL}/submit", json=payload, timeout=60)
    resp.raise_for_status()
    return resp.json()

def create_agent() -> CodeAgent:
    """
    Build and return a configured CodeAgent using OpenAI GPT-3.5 Turbo,
    with a few-shot system prompt tailored to GAIA Level-1 exact-match requirements.
    Requires OPENAI_API_KEY in the environment.
    """
    # 1. Instantiate model
    model = OpenAIServerModel(model_id="gpt-3.5-turbo")
    # 2. Create the agent with default prompt
    agent = CodeAgent(
        tools=[fetch_questions, fetch_random_question, submit_answers],
        model=model
    )
    # 3. Override its system prompt to include paper’s instructions + few-shot
    agent.system_prompt_template = """
You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template:
FINAL ANSWER: [YOUR FINAL ANSWER].

YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. 
- If you are asked for a number, don't use commas, symbols or units (e.g. %, $, km) unless explicitly asked.
- If you are asked for a string, don't use articles ("a", "the"), abbreviations (e.g. "NYC"), or extra words; write digits in plain text unless specified otherwise.
- If you are asked for a comma separated list, apply the above rules to each element.

Example 1:
Question: What is 2 + 2?
Thought: simple arithmetic
FINAL ANSWER: 4

Example 2:
Question: What is the capital of France?
Thought: common geography
FINAL ANSWER: Paris

Now it’s your turn.
Question: {task}
"""

    return agent