File size: 4,470 Bytes
5fa4369
05b8101
10e9b7d
61c2ff2
4097d7c
6a52f23
61c2ff2
 
 
1381703
61c2ff2
 
 
 
 
3635d36
abf0257
61c2ff2
8fd0023
61c2ff2
 
 
a54e373
61c2ff2
 
 
6a52f23
61c2ff2
 
 
 
 
 
 
 
 
 
 
 
 
6a52f23
61c2ff2
 
 
 
 
 
 
6a52f23
 
 
61c2ff2
 
 
6a52f23
61c2ff2
bc758d9
61c2ff2
 
ef65c0f
61c2ff2
 
6a52f23
61c2ff2
6a52f23
61c2ff2
6a52f23
61c2ff2
 
 
 
 
 
 
9e16e60
61c2ff2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e16e60
 
61c2ff2
9e16e60
94feb70
9e16e60
 
 
61c2ff2
 
 
 
9e16e60
 
 
61c2ff2
 
 
 
 
 
9e16e60
 
61c2ff2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a52f23
36b55d3
c2f416b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137


import os
import gradio as gr
import requests
import pandas as pd
from smolagents import CodeAgent, DuckDuckGoSearchTool
from smolagents.models import OpenAIServerModel
import openai

# --- Setup ---
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    raise RuntimeError("Please set OPENAI_API_KEY in your Space secrets.")
openai.api_key = OPENAI_API_KEY

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
OPENAI_MODEL_ID = os.getenv("OPENAI_MODEL_ID", "gpt-4o")

model = OpenAIServerModel(model_id=OPENAI_MODEL_ID, api_key=OPENAI_API_KEY)
search_tool = DuckDuckGoSearchTool()
agent = CodeAgent(tools=[search_tool], model=model)

answer_formatting_prompt = """
You are a smart assistant with access to tools like DuckDuckGoSearchTool(query: str). 
Think step-by-step, then output your response.

IMPORTANT: 
FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers/strings. 
Do NOT include commas, $ or % unless asked.
Write digits plainly (e.g., '10', not 'ten').

Use format:
FINAL ANSWER: <your_answer>
"""

def show_profile(profile):
    if not profile:
        return "⚠️ Not logged in."
    return f"✅ Logged in as: {profile['username']}"

def run_and_submit_all(login_info):
    # login_info comes from LoginButton, it's None if not logged in
    if not login_info:
        return "⚠️ Please log in with your Hugging Face account.", pd.DataFrame()

    username = login_info["username"]
    space_id = os.getenv("SPACE_ID", "")
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    try:
        resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
        resp.raise_for_status()
        questions = resp.json()
    except Exception as e:
        return f"❌ Error fetching questions: {e}", pd.DataFrame()

    results, payload = [], []
    for item in questions:
        task_id = item.get("task_id")
        question = item.get("question")
        if not task_id or not question:
            continue
        prompt = answer_formatting_prompt.strip() + f"\n\nQUESTION: {question.strip()}"
        try:
            answer = agent.run(prompt)
        except Exception as e:
            answer = f"AGENT ERROR: {e}"
        results.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
        payload.append({"task_id": task_id, "submitted_answer": answer})

    if not payload:
        return "⚠️ Agent returned no answers.", pd.DataFrame(results)

    try:
        post = requests.post(
            f"{DEFAULT_API_URL}/submit",
            json={"username": username, "agent_code": agent_code, "answers": payload},
            timeout=60
        )
        post.raise_for_status()
        result = post.json()
        score = result.get("score", "N/A")
        correct = result.get("correct_count", "?")
        attempted = result.get("total_attempted", "?")
        message = result.get("message", "")
        return (
            f"✅ Submission Successful!\nUser: {username}\nScore: {score}% "
            f"({correct}/{attempted})\nMessage: {message}",
            pd.DataFrame(results)
        )
    except Exception as e:
        return f"❌ Submission failed: {e}", pd.DataFrame(results)


with gr.Blocks() as demo:
    gr.Markdown("# Basic Agent Evaluation Runner")

    login_button = gr.LoginButton()
    login_status = gr.Textbox(label="Login Status")

    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

    # Show login status when user logs in
    login_button.click(fn=show_profile, inputs=[login_button], outputs=[login_status])

    # Run evaluation on click, pass login_button's state as input
    run_button.click(fn=run_and_submit_all, inputs=[login_button], outputs=[status_output, results_table])


if __name__ == "__main__":
    demo.launch()


#import gradio as gr

#def show_profile(profile):
 #   if not profile:
   #     return "⚠️ Not logged in."
  #  return f"✅ Logged in as: {profile['username']}"

 # with gr.Blocks() as demo:
   #   gr.Markdown("## 🔐 Hugging Face OAuth Login")

   #   login_button = gr.LoginButton()
   #   output = gr.Textbox(label="Login Status")

    #  login_button.click(fn=show_profile, inputs=[login_button], outputs=[output])

 # demo.launch()