File size: 4,503 Bytes
10e9b7d
6a38a35
eccf8e4
6a38a35
bee5328
0e6b913
 
bee5328
e0cc1b7
6a38a35
bee5328
e0cc1b7
 
0e6b913
e0cc1b7
0e6b913
 
 
 
 
e0cc1b7
 
 
0e6b913
e0cc1b7
 
de8170e
0e6b913
 
 
 
 
 
 
 
c396a92
0e6b913
c396a92
0e6b913
 
6a38a35
0e6b913
e0cc1b7
 
6a38a35
 
bee5328
0e6b913
e0cc1b7
0e6b913
c396a92
0e6b913
 
 
de8170e
6a38a35
e0cc1b7
6a38a35
fd5a08b
0e6b913
 
bd7cd5b
 
c396a92
41085c3
0e6b913
 
bd7cd5b
0e6b913
 
 
c396a92
6a38a35
0e6b913
e0cc1b7
 
6a38a35
 
de8170e
 
 
0e6388c
6a38a35
 
 
 
 
de8170e
0e6b913
 
 
 
 
 
e0cc1b7
 
0e6b913
 
 
e0cc1b7
 
6a38a35
0e6b913
6a38a35
e0cc1b7
de8170e
 
 
0e6b913
 
 
de8170e
 
21325a3
0e6b913
bd7cd5b
 
21325a3
bd7cd5b
 
 
 
6a38a35
0e6b913
 
 
6a38a35
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
import gradio as gr
import requests
import pandas as pd

from tools import AnswerTool
from smolagents import CodeAgent

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

class BasicAgent:
    def __init__(self):
        # Initialize CodeAgent with a single custom AnswerTool to handle GAIA Level 1 questions
        self.agent = CodeAgent(
            model=None,
            tools=[AnswerTool()],
            add_base_tools=False,
            max_steps=1,
            verbosity_level=0
        )

    def __call__(self, question: str) -> str:
        # Directly run the agent on the question (single-step tool invocation)
        return self.agent.run(question)


def run_and_submit_all(profile: gr.OAuthProfile | None):
    """
    Fetch all GAIA Level 1 questions, run the BasicAgent, submit answers, and display results.
    """
    space_id = os.getenv("SPACE_ID")
    if not profile:
        return "Please login to Hugging Face with the login button.", None
    username = getattr(profile, "username", None) or getattr(profile, "name", None)
    if not username:
        return "Login error: username not found.", None

    # 1. Fetch questions
    questions_url = f"{DEFAULT_API_URL}/questions"
    try:
        resp = requests.get(questions_url, timeout=15)
        resp.raise_for_status()
        questions = resp.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # 2. Run agent on each question
    agent = BasicAgent()
    results, payload = [], []
    for q in questions:
        task_id = q.get("task_id")
        text = q.get("question")
        if not task_id or not text:
            continue
        try:
            ans = agent(text)
        except Exception as e:
            ans = f"ERROR: {e}"
        results.append({"Task ID": task_id, "Question": text, "Answer": ans})
        payload.append({"task_id": task_id, "submitted_answer": ans})

    if not payload:
        return "Agent returned no answers.", pd.DataFrame(results)

    # 3. Submit answers
    submit_url = f"{DEFAULT_API_URL}/submit"
    submission = {
        "username": username.strip(),
        "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
        "answers": payload
    }
    try:
        sub_resp = requests.post(submit_url, json=submission, timeout=60)
        sub_resp.raise_for_status()
        data = sub_resp.json()
        status = (
            f"Submission Successful!\n"
            f"User: {data.get('username')}\n"
            f"Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\n"
            f"Message: {data.get('message')}"
        )
    except Exception as e:
        status = f"Submission Failed: {e}"

    return status, pd.DataFrame(results)


def test_random_question(profile: gr.OAuthProfile | None):
    """
    Fetch a single random GAIA question and return the agent's answer.
    """
    if not profile:
        return "Please login to Hugging Face with the login button.", ""
    try:
        q = requests.get(f"{DEFAULT_API_URL}/random-question", timeout=15).json()
        question = q.get("question", "")
        ans = BasicAgent()(question)
        return question, ans
    except Exception as e:
        return f"Error during test: {e}", ""

# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# Basic Agent Evaluation Runner")
    gr.Markdown(
        """
        **Instructions:**
        1. Clone this space and define your agent logic in `tools.py`.
        2. Log in with your Hugging Face account using the login button below.
        3. Use **Run Evaluation & Submit All Answers** or **Test Random Question**.
        """
    )

    login = gr.LoginButton()
    run_btn = gr.Button("Run Evaluation & Submit All Answers")
    test_btn = gr.Button("Test Random Question")

    status_out = gr.Textbox(label="Status / Result", lines=5, interactive=False)
    table_out = gr.DataFrame(label="Full Results Table", wrap=True)
    question_out = gr.Textbox(label="Random Question", lines=3, interactive=False)
    answer_out = gr.Textbox(label="Agent Answer", lines=3, interactive=False)

    # Wire buttons to callbacks; LoginButton auto-passes profile
    run_btn.click(fn=run_and_submit_all, inputs=[login], outputs=[status_out, table_out])
    test_btn.click(fn=test_random_question, inputs=[login], outputs=[question_out, answer_out])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)