File size: 4,300 Bytes
10e9b7d
 
eccf8e4
3c4371f
480c00a
 
f2262b0
480c00a
3db6293
e80aab9
480c00a
 
 
6042bfd
480c00a
 
 
 
 
f854a1c
 
480c00a
 
f854a1c
31243f4
480c00a
f854a1c
480c00a
f854a1c
 
7e4a06b
f854a1c
3c4371f
7e4a06b
7d65c66
3c4371f
480c00a
 
 
e80aab9
31243f4
480c00a
31243f4
 
f854a1c
 
eccf8e4
480c00a
 
 
f854a1c
480c00a
e80aab9
7d65c66
 
480c00a
31243f4
 
480c00a
 
31243f4
 
480c00a
 
 
31243f4
480c00a
31243f4
 
480c00a
31243f4
480c00a
 
 
 
 
e80aab9
 
480c00a
 
 
 
e80aab9
 
480c00a
 
 
e80aab9
480c00a
7d65c66
480c00a
e80aab9
480c00a
e80aab9
480c00a
 
 
 
 
 
 
 
 
31243f4
480c00a
 
e80aab9
480c00a
e80aab9
 
480c00a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import gradio as gr
import requests
import pandas as pd
from smolagents import CodeAgent, DuckDuckGoSearchTool
from smolagents.models import OpenAIServerModel

# Constants
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# === Define the Smol Agent ===
class MyAgent:
    def __init__(self):
        self.model = OpenAIServerModel(model_id="gpt-4")  # or "gpt-3.5-turbo"
        self.agent = CodeAgent(
            tools=[DuckDuckGoSearchTool()],
            model=self.model,
            system_message="""You are a general AI assistant. I will ask you a question.
Report your thoughts, and finish your answer with the following template:
FINAL ANSWER: [YOUR FINAL ANSWER].
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list 
of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
        )

    def __call__(self, question: str) -> str:
        return self.agent.run(question)

# === Submission Logic ===
def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")
    if profile:
        username = profile.username
        print(f"User logged in: {username}")
    else:
        return "Please Login to Hugging Face with the button.", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    questions_url = f"{DEFAULT_API_URL}/questions"
    submit_url = f"{DEFAULT_API_URL}/submit"

    try:
        agent = MyAgent()
    except Exception as e:
        return f"Error initializing agent: {e}", None

    # Fetch Questions
    try:
        res = requests.get(questions_url, timeout=15)
        res.raise_for_status()
        questions_data = res.json()
    except Exception as e:
        return f"Failed to fetch questions: {e}", None

    results_log = []
    answers_payload = []

    for item in questions_data:
        task_id = item.get("task_id")
        question = item.get("question")
        if not task_id or question is None:
            continue
        try:
            answer = agent(question)
            results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
            answers_payload.append({"task_id": task_id, "submitted_answer": answer})
        except Exception as e:
            results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": f"ERROR: {e}"})

    if not answers_payload:
        return "No answers generated.", pd.DataFrame(results_log)

    submission_data = {
        "username": username,
        "agent_code": agent_code,
        "answers": answers_payload
    }

    try:
        res = requests.post(submit_url, json=submission_data, timeout=60)
        res.raise_for_status()
        result_data = res.json()
        summary = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Score: {result_data.get('score', '?')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')})\n"
            f"Message: {result_data.get('message', '')}"
        )
        return summary, pd.DataFrame(results_log)
    except Exception as e:
        return f"Submission failed: {e}", pd.DataFrame(results_log)

# === Gradio UI ===
with gr.Blocks() as demo:
    gr.Markdown("# Agent Evaluation Runner (SmolAgents)")
    gr.Markdown("""
**Instructions:**
1. Clone this space and customize your agent.
2. Log in with Hugging Face.
3. Click 'Run Evaluation' to answer and submit.
""")

    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Status", lines=4, interactive=False)
    results_table = gr.DataFrame(label="Results", wrap=True)

    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

if __name__ == "__main__":
    print("Launching...")
    demo.launch(debug=True)