File size: 6,722 Bytes
10e9b7d
 
eccf8e4
3c4371f
10e9b7d
936e8f7
 
 
e80aab9
3db6293
936e8f7
e80aab9
936e8f7
31243f4
936e8f7
 
31243f4
7d65c66
936e8f7
 
 
 
 
 
 
 
 
 
 
 
 
7e4a06b
31243f4
 
e80aab9
936e8f7
 
 
31243f4
936e8f7
31243f4
936e8f7
 
 
 
36ed51a
936e8f7
3c4371f
7d65c66
31243f4
eccf8e4
936e8f7
7d65c66
31243f4
 
3c4371f
 
31243f4
e80aab9
936e8f7
 
 
3c4371f
936e8f7
 
7d65c66
936e8f7
e80aab9
b177367
7d65c66
 
3c4371f
31243f4
 
 
 
 
 
 
7d65c66
 
 
31243f4
 
7d65c66
31243f4
 
 
 
b177367
7d65c66
3c4371f
31243f4
e80aab9
7d65c66
31243f4
e80aab9
7d65c66
e80aab9
 
31243f4
e80aab9
 
3c4371f
 
 
e80aab9
 
31243f4
 
e80aab9
3c4371f
e80aab9
 
3c4371f
e80aab9
7d65c66
3c4371f
31243f4
7d65c66
31243f4
e80aab9
31243f4
 
 
 
e80aab9
 
936e8f7
e80aab9
936e8f7
0ee0419
e514fd7
 
936e8f7
 
 
e514fd7
936e8f7
e514fd7
e80aab9
936e8f7
7e4a06b
e80aab9
31243f4
e80aab9
9088b99
7d65c66
e80aab9
31243f4
 
936e8f7
31243f4
e80aab9
 
 
3c4371f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import os
import gradio as gr
import requests
import pandas as pd

# --- Import your new agent ---
from agent import GeminiAgent

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
MY_HF_USERNAME = "benjipeng" # Your Hugging Face username

def run_and_submit_all(profile: gr.OAuthProfile | None):
    """
    Fetches all questions, runs the GeminiAgent on them, submits all answers,
    and displays the results. This function is restricted to a specific user.
    """
    # --- Determine HF Space Runtime URL and Repo URL ---
    space_id = os.getenv("SPACE_ID")

    if not profile:
        return "Please Login to Hugging Face with the button to run the evaluation.", None
    
    username = profile.username
    print(f"User logged in: {username}")

    # --- NEW: Restrict submission to a specific user ---
    if username != MY_HF_USERNAME:
        print(f"Access denied for user: {username}. Allowed user is {MY_HF_USERNAME}.")
        return f"Error: This Space is configured for a specific user. Access denied for '{username}'.", None
    
    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    # 1. Instantiate your GeminiAgent
    # The agent will fail to initialize if the GEMINI_API_KEY secret is not set.
    print("Instantiating agent...")
    try:
        agent = GeminiAgent()
    except Exception as e:
        error_msg = f"Error initializing agent: {e}"
        print(error_msg)
        return error_msg, None
    
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    print(f"Code link for submission: {agent_code}")

    # 2. Fetch Questions
    print(f"Fetching questions from: {questions_url}")
    try:
        response = requests.get(questions_url, timeout=20)
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
             print("Fetched questions list is empty.")
             return "Fetched questions list is empty or invalid format.", None
        print(f"Fetched {len(questions_data)} questions.")
    except requests.exceptions.RequestException as e:
        error_msg = f"Error fetching questions: {e}"
        print(error_msg)
        return error_msg, None
    except requests.exceptions.JSONDecodeError as e:
         error_msg = f"Error decoding server response for questions: {e}"
         print(error_msg)
         print(f"Response text: {response.text[:500]}")
         return error_msg, None

    # 3. Run your Agent
    results_log = []
    answers_payload = []
    print(f"Running agent on {len(questions_data)} questions...")
    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or question_text is None:
            print(f"Skipping item with missing task_id or question: {item}")
            continue
        try:
            submitted_answer = agent(question_text)
            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
        except Exception as e:
             print(f"Error running agent on task {task_id}: {e}")
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})

    if not answers_payload:
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

    # 4. Prepare Submission 
    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
    print(status_update)

    # 5. Submit
    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        final_status = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {result_data.get('score', 'N/A')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
            f"Message: {result_data.get('message', 'No message received.')}"
        )
        print("Submission successful.")
        results_df = pd.DataFrame(results_log)
        return final_status, results_df
    except requests.exceptions.HTTPError as e:
        error_detail = f"Server responded with status {e.response.status_code}."
        try:
            error_json = e.response.json()
            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
        except requests.exceptions.JSONDecodeError:
            error_detail += f" Response: {e.response.text[:500]}"
        status_message = f"Submission Failed: {error_detail}"
        print(status_message)
        results_df = pd.DataFrame(results_log)
        return status_message, results_df
    except requests.exceptions.RequestException as e:
        status_message = f"Submission Failed: Network error - {e}"
        print(status_message)
        results_df = pd.DataFrame(results_log)
        return status_message, results_df


# --- Build Gradio Interface using Blocks (No changes needed here) ---
with gr.Blocks() as demo:
    gr.Markdown("# Gemini Agent Evaluation Runner")
    gr.Markdown(
        """
        **Instructions:**
        1.  This Space is configured to run a Gemini-1.5-Pro based agent.
        2.  Log in to your Hugging Face account using the button below. Submission is restricted to the Space owner.
        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, submit answers, and see the score.
        ---
        **Note:** The process can take several minutes as the agent answers each question individually.
        """
    )
    # The `gr.LoginButton()` passes the OAuthProfile to any function that accepts it as an argument
    gr.LoginButton()

    run_button = gr.Button("Run Evaluation & Submit All Answers")

    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

    run_button.click(
        fn=run_and_submit_all,
        # The profile object from the LoginButton is automatically passed to the first argument of the function
        outputs=[status_output, results_table]
    )

if __name__ == "__main__":
    print("\n" + "-"*30 + " App Starting " + "-"*30)
    demo.launch(debug=True, share=False)