Update app.py
Browse files
app.py
CHANGED
@@ -1,20 +1,17 @@
|
|
1 |
-
|
2 |
# This code was generated by generate_space_code.py
|
3 |
# Add this to your Hugging Face Space to use your agent's answers
|
4 |
|
5 |
# Import necessary libraries
|
6 |
-
import gradio as gr
|
7 |
-
import json
|
8 |
import os
|
|
|
9 |
import requests
|
10 |
-
from huggingface_hub import HfApi
|
11 |
import pandas as pd
|
12 |
|
13 |
# Your agent's answers
|
14 |
ANSWERS = [
|
15 |
{
|
16 |
"task_id": "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
|
17 |
-
"submitted_answer": "According to Mercedes Sosa's discography on her English Wikipedia page, she published three studio albums between 2000 and 2009: \n1.
|
18 |
},
|
19 |
{
|
20 |
"task_id": "a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
|
@@ -54,7 +51,7 @@ ANSWERS = [
|
|
54 |
},
|
55 |
{
|
56 |
"task_id": "305ac316-eef6-4446-960a-92d80d542f82",
|
57 |
-
"submitted_answer": "Based on the limited information I was able to find from the provided search results, I do not have enough reliable evidence to determine if
|
58 |
},
|
59 |
{
|
60 |
"task_id": "f918266a-b3e0-4914-865d-4faa564f1aef",
|
@@ -82,7 +79,7 @@ ANSWERS = [
|
|
82 |
},
|
83 |
{
|
84 |
"task_id": "a0c07678-e491-4bbc-8f0b-07405144218f",
|
85 |
-
"submitted_answer": "Based on the information found in the provided search results, I do not have enough evidence to conclusively determine which specific pitchers have the uniform numbers before and after
|
86 |
},
|
87 |
{
|
88 |
"task_id": "7bd855d8-463d-4ed5-93ca-5fe35145f733",
|
@@ -94,124 +91,111 @@ ANSWERS = [
|
|
94 |
}
|
95 |
]
|
96 |
|
97 |
-
|
98 |
-
|
99 |
-
try:
|
100 |
-
# Try multiple methods to get the username
|
101 |
-
username = os.environ.get("SPACE_AUTHOR")
|
102 |
-
|
103 |
-
# If that doesn't work, try using the HfApi
|
104 |
-
if not username:
|
105 |
-
try:
|
106 |
-
from huggingface_hub import HfApi
|
107 |
-
api = HfApi()
|
108 |
-
user_info = api.whoami()
|
109 |
-
username = user_info.get("name", None)
|
110 |
-
except:
|
111 |
-
pass
|
112 |
-
|
113 |
-
return username
|
114 |
-
except:
|
115 |
-
return None
|
116 |
|
117 |
-
def
|
118 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
try:
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
user_info = api.whoami()
|
124 |
-
username = user_info.get("name", None)
|
125 |
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
def run_and_submit_all():
|
134 |
-
"""Run the agent on all questions and submit the answers"""
|
135 |
-
try:
|
136 |
-
# Get username directly from HfApi
|
137 |
-
from huggingface_hub import HfApi
|
138 |
-
api = HfApi()
|
139 |
-
user_info = api.whoami()
|
140 |
-
username = user_info.get("name", None)
|
141 |
|
142 |
-
|
143 |
-
return "Please log in to submit your answers.", None
|
144 |
|
145 |
-
#
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
|
|
|
|
|
|
|
|
150 |
|
151 |
-
|
152 |
-
|
153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
"agent_code": code_url,
|
160 |
-
"answers": ANSWERS
|
161 |
-
}
|
162 |
|
163 |
-
|
|
|
|
|
|
|
164 |
|
165 |
-
if response.status_code == 200:
|
166 |
-
result = response.json()
|
167 |
-
|
168 |
-
# Create a DataFrame for display
|
169 |
-
data = []
|
170 |
-
for item in result.get("data", []):
|
171 |
-
data.append({
|
172 |
-
"Task ID": item.get("task_id", ""),
|
173 |
-
"Question": item.get("question", ""),
|
174 |
-
"Your Answer": item.get("submitted_answer", ""),
|
175 |
-
"Correct": item.get("is_correct", False)
|
176 |
-
})
|
177 |
-
|
178 |
-
df = pd.DataFrame(data)
|
179 |
-
|
180 |
-
# Calculate score
|
181 |
-
correct_count = sum(1 for item in result.get("data", []) if item.get("is_correct", False))
|
182 |
-
total_count = len(result.get("data", []))
|
183 |
-
score = (correct_count / total_count) * 100 if total_count > 0 else 0
|
184 |
-
|
185 |
-
submission_result = f"Score: {score:.2f}% ({correct_count}/{total_count} correct)"
|
186 |
-
|
187 |
-
return submission_result, df
|
188 |
-
else:
|
189 |
-
return f"Error: {response.status_code} - {response.text}", None
|
190 |
-
|
191 |
except Exception as e:
|
192 |
-
|
|
|
|
|
193 |
|
194 |
# Create the Gradio interface
|
195 |
with gr.Blocks() as demo:
|
196 |
gr.Markdown("# GAIA Benchmark Submission")
|
197 |
gr.Markdown("This Space submits your agent's answers to the GAIA benchmark leaderboard.")
|
198 |
|
199 |
-
|
200 |
-
login_status = gr.Textbox(label="Login Status", value=check_login_status())
|
201 |
-
refresh_btn = gr.Button("Refresh Login Status")
|
202 |
|
203 |
-
|
204 |
-
submit_btn = gr.Button("Run and Submit All")
|
205 |
|
206 |
-
|
207 |
-
result_text = gr.Textbox(label="Run Status / Submission Result")
|
208 |
|
209 |
-
|
210 |
-
result_df = gr.Dataframe(label="Questions and Agent Answers")
|
211 |
|
212 |
# Set up event handlers
|
213 |
-
|
214 |
-
|
|
|
|
|
215 |
|
216 |
# Launch the app
|
217 |
if __name__ == "__main__":
|
|
|
|
|
1 |
# This code was generated by generate_space_code.py
|
2 |
# Add this to your Hugging Face Space to use your agent's answers
|
3 |
|
4 |
# Import necessary libraries
|
|
|
|
|
5 |
import os
|
6 |
+
import gradio as gr
|
7 |
import requests
|
|
|
8 |
import pandas as pd
|
9 |
|
10 |
# Your agent's answers
|
11 |
ANSWERS = [
|
12 |
{
|
13 |
"task_id": "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
|
14 |
+
"submitted_answer": "According to Mercedes Sosa's discography on her English Wikipedia page, she published three studio albums between 2000 and 2009: \n1. Acústico (2002)\n2. Corazón Libre (2005) \n3. Cantora 1 (2009)"
|
15 |
},
|
16 |
{
|
17 |
"task_id": "a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
|
|
|
51 |
},
|
52 |
{
|
53 |
"task_id": "305ac316-eef6-4446-960a-92d80d542f82",
|
54 |
+
"submitted_answer": "Based on the limited information I was able to find from the provided search results, I do not have enough reliable evidence to determine if Bartłomiej Kasprzykowski, the actor who played Roman in the Polish version of Everybody Loves Raymond, had a role in the series Magda M. I was unable to find any credible sources confirming his involvement in Magda M. or the name of a character he may have played. It's possible he did not appear in Magda M. at all. Without more definitive information, I do not have an answer to the question. I would need to do additional research in Polish-language sources or industry databases to have a chance of finding a reliable answer."
|
55 |
},
|
56 |
{
|
57 |
"task_id": "f918266a-b3e0-4914-865d-4faa564f1aef",
|
|
|
79 |
},
|
80 |
{
|
81 |
"task_id": "a0c07678-e491-4bbc-8f0b-07405144218f",
|
82 |
+
"submitted_answer": "Based on the information found in the provided search results, I do not have enough evidence to conclusively determine which specific pitchers have the uniform numbers before and after Taishō Tamai as of July 2023. The search results confirm Tamai currently plays for the Hokkaido Nippon-Ham Fighters but do not specify his uniform number or the surrounding pitchers. Without this key piece of information, I cannot provide the requested before and after pitcher names. I would need to do additional research beyond these search results to find Tamai's 2023 uniform number in order to answer the question."
|
83 |
},
|
84 |
{
|
85 |
"task_id": "7bd855d8-463d-4ed5-93ca-5fe35145f733",
|
|
|
91 |
}
|
92 |
]
|
93 |
|
94 |
+
# Constants
|
95 |
+
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
+
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
98 |
+
"""
|
99 |
+
Fetches all questions, runs the agent on them, submits all answers,
|
100 |
+
and displays the results.
|
101 |
+
"""
|
102 |
+
# --- Determine HF Space Runtime URL and Repo URL ---
|
103 |
+
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
104 |
+
|
105 |
+
if profile:
|
106 |
+
username = f"{profile.username}"
|
107 |
+
print(f"User logged in: {username}")
|
108 |
+
else:
|
109 |
+
print("User not logged in.")
|
110 |
+
return "Please Login to Hugging Face with the button.", None
|
111 |
+
|
112 |
+
api_url = DEFAULT_API_URL
|
113 |
+
submit_url = f"{api_url}/submit"
|
114 |
+
|
115 |
+
# In the case of an app running as a Hugging Face space, this link points toward your codebase
|
116 |
+
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
117 |
+
print(agent_code)
|
118 |
+
|
119 |
+
# Prepare Submission
|
120 |
+
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": ANSWERS}
|
121 |
+
status_update = f"Submitting {len(ANSWERS)} answers for user '{username}'..."
|
122 |
+
print(status_update)
|
123 |
+
|
124 |
+
# Submit
|
125 |
+
print(f"Submitting {len(ANSWERS)} answers to: {submit_url}")
|
126 |
try:
|
127 |
+
response = requests.post(submit_url, json=submission_data, timeout=60)
|
128 |
+
response.raise_for_status()
|
129 |
+
result_data = response.json()
|
|
|
|
|
130 |
|
131 |
+
final_status = (
|
132 |
+
f"Submission Successful!\n"
|
133 |
+
f"User: {result_data.get('username')}\n"
|
134 |
+
f"Overall Score: {result_data.get('score', 'N/A')}% "
|
135 |
+
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
136 |
+
f"Message: {result_data.get('message', 'No message received.')}"
|
137 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
+
print("Submission successful.")
|
|
|
140 |
|
141 |
+
# Create a DataFrame for display
|
142 |
+
data = []
|
143 |
+
for item in result_data.get("data", []):
|
144 |
+
data.append({
|
145 |
+
"Task ID": item.get("task_id", ""),
|
146 |
+
"Question": item.get("question", ""),
|
147 |
+
"Your Answer": item.get("submitted_answer", ""),
|
148 |
+
"Correct": item.get("is_correct", False)
|
149 |
+
})
|
150 |
|
151 |
+
results_df = pd.DataFrame(data)
|
152 |
+
return final_status, results_df
|
153 |
+
|
154 |
+
except requests.exceptions.HTTPError as e:
|
155 |
+
error_detail = f"Server responded with status {e.response.status_code}."
|
156 |
+
try:
|
157 |
+
error_json = e.response.json()
|
158 |
+
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
|
159 |
+
except requests.exceptions.JSONDecodeError:
|
160 |
+
error_detail += f" Response: {e.response.text[:500]}"
|
161 |
+
|
162 |
+
status_message = f"Submission Failed: {error_detail}"
|
163 |
+
print(status_message)
|
164 |
+
return status_message, None
|
165 |
|
166 |
+
except requests.exceptions.Timeout:
|
167 |
+
status_message = "Submission Failed: The request timed out."
|
168 |
+
print(status_message)
|
169 |
+
return status_message, None
|
|
|
|
|
|
|
170 |
|
171 |
+
except requests.exceptions.RequestException as e:
|
172 |
+
status_message = f"Submission Failed: Network error - {e}"
|
173 |
+
print(status_message)
|
174 |
+
return status_message, None
|
175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
except Exception as e:
|
177 |
+
status_message = f"An unexpected error occurred during submission: {e}"
|
178 |
+
print(status_message)
|
179 |
+
return status_message, None
|
180 |
|
181 |
# Create the Gradio interface
|
182 |
with gr.Blocks() as demo:
|
183 |
gr.Markdown("# GAIA Benchmark Submission")
|
184 |
gr.Markdown("This Space submits your agent's answers to the GAIA benchmark leaderboard.")
|
185 |
|
186 |
+
gr.LoginButton()
|
|
|
|
|
187 |
|
188 |
+
submit_btn = gr.Button("Run and Submit All")
|
|
|
189 |
|
190 |
+
result_text = gr.Textbox(label="Run Status / Submission Result", lines=5)
|
|
|
191 |
|
192 |
+
result_df = gr.Dataframe(label="Questions and Agent Answers")
|
|
|
193 |
|
194 |
# Set up event handlers
|
195 |
+
submit_btn.click(
|
196 |
+
fn=run_and_submit_all,
|
197 |
+
outputs=[result_text, result_df]
|
198 |
+
)
|
199 |
|
200 |
# Launch the app
|
201 |
if __name__ == "__main__":
|