Spaces:
Runtime error
Runtime error
File size: 4,503 Bytes
10e9b7d 6a38a35 eccf8e4 6a38a35 bee5328 0e6b913 bee5328 e0cc1b7 6a38a35 bee5328 e0cc1b7 0e6b913 e0cc1b7 0e6b913 e0cc1b7 0e6b913 e0cc1b7 de8170e 0e6b913 c396a92 0e6b913 c396a92 0e6b913 6a38a35 0e6b913 e0cc1b7 6a38a35 bee5328 0e6b913 e0cc1b7 0e6b913 c396a92 0e6b913 de8170e 6a38a35 e0cc1b7 6a38a35 fd5a08b 0e6b913 bd7cd5b c396a92 41085c3 0e6b913 bd7cd5b 0e6b913 c396a92 6a38a35 0e6b913 e0cc1b7 6a38a35 de8170e 0e6388c 6a38a35 de8170e 0e6b913 e0cc1b7 0e6b913 e0cc1b7 6a38a35 0e6b913 6a38a35 e0cc1b7 de8170e 0e6b913 de8170e 21325a3 0e6b913 bd7cd5b 21325a3 bd7cd5b 6a38a35 0e6b913 6a38a35 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import os
import gradio as gr
import requests
import pandas as pd
from tools import AnswerTool
from smolagents import CodeAgent
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
class BasicAgent:
def __init__(self):
# Initialize CodeAgent with a single custom AnswerTool to handle GAIA Level 1 questions
self.agent = CodeAgent(
model=None,
tools=[AnswerTool()],
add_base_tools=False,
max_steps=1,
verbosity_level=0
)
def __call__(self, question: str) -> str:
# Directly run the agent on the question (single-step tool invocation)
return self.agent.run(question)
def run_and_submit_all(profile: gr.OAuthProfile | None):
"""
Fetch all GAIA Level 1 questions, run the BasicAgent, submit answers, and display results.
"""
space_id = os.getenv("SPACE_ID")
if not profile:
return "Please login to Hugging Face with the login button.", None
username = getattr(profile, "username", None) or getattr(profile, "name", None)
if not username:
return "Login error: username not found.", None
# 1. Fetch questions
questions_url = f"{DEFAULT_API_URL}/questions"
try:
resp = requests.get(questions_url, timeout=15)
resp.raise_for_status()
questions = resp.json()
except Exception as e:
return f"Error fetching questions: {e}", None
# 2. Run agent on each question
agent = BasicAgent()
results, payload = [], []
for q in questions:
task_id = q.get("task_id")
text = q.get("question")
if not task_id or not text:
continue
try:
ans = agent(text)
except Exception as e:
ans = f"ERROR: {e}"
results.append({"Task ID": task_id, "Question": text, "Answer": ans})
payload.append({"task_id": task_id, "submitted_answer": ans})
if not payload:
return "Agent returned no answers.", pd.DataFrame(results)
# 3. Submit answers
submit_url = f"{DEFAULT_API_URL}/submit"
submission = {
"username": username.strip(),
"agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
"answers": payload
}
try:
sub_resp = requests.post(submit_url, json=submission, timeout=60)
sub_resp.raise_for_status()
data = sub_resp.json()
status = (
f"Submission Successful!\n"
f"User: {data.get('username')}\n"
f"Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\n"
f"Message: {data.get('message')}"
)
except Exception as e:
status = f"Submission Failed: {e}"
return status, pd.DataFrame(results)
def test_random_question(profile: gr.OAuthProfile | None):
"""
Fetch a single random GAIA question and return the agent's answer.
"""
if not profile:
return "Please login to Hugging Face with the login button.", ""
try:
q = requests.get(f"{DEFAULT_API_URL}/random-question", timeout=15).json()
question = q.get("question", "")
ans = BasicAgent()(question)
return question, ans
except Exception as e:
return f"Error during test: {e}", ""
# --- Gradio Interface ---
with gr.Blocks() as demo:
gr.Markdown("# Basic Agent Evaluation Runner")
gr.Markdown(
"""
**Instructions:**
1. Clone this space and define your agent logic in `tools.py`.
2. Log in with your Hugging Face account using the login button below.
3. Use **Run Evaluation & Submit All Answers** or **Test Random Question**.
"""
)
login = gr.LoginButton()
run_btn = gr.Button("Run Evaluation & Submit All Answers")
test_btn = gr.Button("Test Random Question")
status_out = gr.Textbox(label="Status / Result", lines=5, interactive=False)
table_out = gr.DataFrame(label="Full Results Table", wrap=True)
question_out = gr.Textbox(label="Random Question", lines=3, interactive=False)
answer_out = gr.Textbox(label="Agent Answer", lines=3, interactive=False)
# Wire buttons to callbacks; LoginButton auto-passes profile
run_btn.click(fn=run_and_submit_all, inputs=[login], outputs=[status_out, table_out])
test_btn.click(fn=test_random_question, inputs=[login], outputs=[question_out, answer_out])
if __name__ == "__main__":
demo.launch(debug=True, share=False)
|