File size: 3,533 Bytes
10e9b7d
6e92f6f
10e9b7d
eccf8e4
3c4371f
4f957d6
6576efa
759cedb
4f957d6
10e9b7d
6576efa
 
4f957d6
 
 
 
 
 
 
 
 
 
 
91cad6f
31243f4
c8bf6ed
 
6576efa
c8bf6ed
2d924bf
4f957d6
 
f86bd24
 
91cad6f
f86bd24
4f957d6
f86bd24
c8bf6ed
f86bd24
4021bf3
4f957d6
6576efa
c8bf6ed
 
4f957d6
e80aab9
31243f4
6576efa
31243f4
 
6576efa
4f957d6
 
 
 
 
 
 
c8bf6ed
759cedb
4f957d6
 
c8bf6ed
 
759cedb
 
 
4f957d6
f8e24f8
4f957d6
 
 
 
 
 
 
c8bf6ed
 
f8e24f8
c8bf6ed
 
31243f4
759cedb
 
 
 
 
 
4f957d6
 
 
 
 
 
 
 
 
 
 
c8bf6ed
31243f4
c8bf6ed
e80aab9
c8bf6ed
7e4a06b
c8bf6ed
4f957d6
c8bf6ed
 
e80aab9
 
3c4371f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import time
import gradio as gr
import requests
import pandas as pd
import tiktoken  # For token length

from smolagents import CodeAgent, OpenAIServerModel
from smolagents.tools import code_tools  # Includes WebSearchTool and more

# Constants
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
MAX_TOKENS = 3000  # Limit to avoid GPT-4 token overflow

# Token counting helper
def token_length(text: str, model: str = "gpt-4") -> int:
    try:
        encoding = tiktoken.encoding_for_model(model)
        return len(encoding.encode(text))
    except Exception:
        return len(text) // 4  # fallback estimate

# Main agent class
class SmartGAIAAgent:
    def __init__(self):
        key = os.getenv("OPENAI_API_KEY")
        if not key:
            raise ValueError("Missing OPENAI_API_KEY")
        model = OpenAIServerModel(model_id="gpt-4", api_key=key)
        self.agent = CodeAgent(
            tools=code_tools(),  # Includes WebSearchTool, Code Interpreter, etc.
            model=model
        )

    def __call__(self, question: str) -> str:
        try:
            return self.agent.run(question).strip()
        except Exception as e:
            print("Agent error:", e)
            return "error"

# Run agent + submit answers to API
def run_and_submit_all(profile: gr.OAuthProfile | None):
    username = profile.username if profile else None
    if not username:
        return "Please login to Hugging Face", None

    try:
        agent = SmartGAIAAgent()
    except Exception as e:
        return f"Error initializing agent: {e}", None

    # Fetch questions
    try:
        resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
        resp.raise_for_status()
        questions = resp.json()
    except Exception as e:
        return f"Failed to fetch questions: {e}", None

    skip_kw = ['.mp3', '.wav', '.png', '.jpg', 'youtube', 'video', 'watch', 'listen']
    payload, logs = [], []

    for item in questions:
        tid = item.get("task_id")
        q = item.get("question", "")
        if not tid or not q:
            continue
        if token_length(q) > MAX_TOKENS or any(k in q.lower() for k in skip_kw):
            continue

        try:
            ans = agent(q)
        except Exception as e:
            print(f"[Error Task {tid}] {e}")
            ans = "error"

        payload.append({"task_id": tid, "submitted_answer": ans})
        logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})

    if not payload:
        return "No valid questions to submit.", pd.DataFrame(logs)

    sub = {
        "username": username,
        "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
        "answers": payload
    }

    try:
        resp = requests.post(f"{DEFAULT_API_URL}/submit", json=sub, timeout=60)
        resp.raise_for_status()
        result = resp.json()
        score = result.get("score")
        correct = result.get("correct_count")
        attempted = result.get("total_attempted")
        status = f"Score: {score}% ({correct}/{attempted})"
    except Exception as e:
        status = f"Submission failed: {e}"

    return status, pd.DataFrame(logs)

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# GAIA Agent")
    gr.LoginButton()
    run_btn = gr.Button("Run & Submit")
    status = gr.Textbox(lines=4, label="Result")
    table = gr.DataFrame()
    run_btn.click(run_and_submit_all, outputs=[status, table])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)