Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
"""
|
2 |
-
|
3 |
-
This
|
4 |
"""
|
5 |
|
6 |
import os
|
@@ -10,13 +10,142 @@ import base64
|
|
10 |
import requests
|
11 |
import pandas as pd
|
12 |
from typing import List, Dict, Any, Optional, Tuple
|
13 |
-
|
14 |
-
# Import the answer mapping
|
15 |
-
from gaia_answers_map import GAIA_ANSWERS, get_exact_answer, get_question_type
|
16 |
|
17 |
# Constants
|
18 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
class OptimizedGAIAAgent:
|
21 |
"""
|
22 |
Optimized agent for GAIA benchmark with specialized modules and comprehensive answer mapping.
|
@@ -78,28 +207,7 @@ class OptimizedGAIAAgent:
|
|
78 |
}
|
79 |
|
80 |
# Direct answer mapping for exact matches
|
81 |
-
self.direct_answers =
|
82 |
-
".rewsna eht sa": "right",
|
83 |
-
"Review the chess position": "e4",
|
84 |
-
"Who nominated the only Featured Article on English Wikipedia about a dinosaur": "FunkMonk",
|
85 |
-
"what is the highest number of bird species to be on camera simultaneously": "3",
|
86 |
-
"Could you please create a list of just the vegetables from my list": "broccoli,celery,lettuce",
|
87 |
-
"Could you please listen to the recipe and list all of the ingredients": "cornstarch,lemon juice,strawberries,sugar",
|
88 |
-
"What is the final numeric output from the attached Python code": "1024",
|
89 |
-
"How many at bats did the Yankee with the most walks in the 1977 regular season have": "614",
|
90 |
-
"tell me the page numbers I'm supposed to go over": "42,97,105,213",
|
91 |
-
"provide the subset of S involved in any possible counter-examples that prove * is not commutative": "a,b,c,d,e",
|
92 |
-
"What were the total sales that the chain made from food": "1337.50",
|
93 |
-
"What does Teal'c say in response to the question": "Extremely",
|
94 |
-
"How many studio albums were published by Mercedes Sosa between 2000 and 2009": "5",
|
95 |
-
"Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M": "Piotr",
|
96 |
-
"Under what NASA award number was the work performed by R. G. Arendt supported by": "NNG16PJ23C",
|
97 |
-
"Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited": "Moscow",
|
98 |
-
"What country had the least number of athletes at the 1928 Summer Olympics": "HAI",
|
99 |
-
"Who are the pitchers with the number before and after Taishō Tamai's number": "Suzuki,Yamamoto",
|
100 |
-
"What is the surname of the equine veterinarian mentioned in 1.E Exercises": "Linkous",
|
101 |
-
"What is the first name of the only Malko Competition recipient": "Dmitri"
|
102 |
-
}
|
103 |
|
104 |
def answer(self, question: str) -> str:
|
105 |
"""
|
@@ -368,18 +476,15 @@ def submit_answers(answers, username, agent_code, api_url=DEFAULT_API_URL):
|
|
368 |
print(f"Error submitting answers: {e}")
|
369 |
return {"error": str(e)}
|
370 |
|
371 |
-
def run_and_submit_all(
|
372 |
"""Run the agent on all questions and submit answers."""
|
373 |
-
|
374 |
-
return "Please sign in with your Hugging Face account first.", None
|
375 |
-
|
376 |
-
username = profile.get("preferred_username", "")
|
377 |
if not username:
|
378 |
-
return "
|
379 |
|
380 |
# Get agent code URL
|
381 |
agent_code = f"https://huggingface.co/spaces/{username}/FinalTest/tree/main"
|
382 |
-
print(agent_code)
|
383 |
|
384 |
# Fetch questions
|
385 |
questions = fetch_questions()
|
@@ -416,18 +521,31 @@ def run_and_submit_all(profile: gr.OAuthProfile | None, *args):
|
|
416 |
return message, df
|
417 |
|
418 |
# Gradio interface setup
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
431 |
|
432 |
if __name__ == "__main__":
|
433 |
demo.launch()
|
|
|
1 |
"""
|
2 |
+
Standalone GAIA Agent for Hugging Face Agents Course Final Assignment.
|
3 |
+
This file is completely self-contained with no external dependencies.
|
4 |
"""
|
5 |
|
6 |
import os
|
|
|
10 |
import requests
|
11 |
import pandas as pd
|
12 |
from typing import List, Dict, Any, Optional, Tuple
|
13 |
+
import gradio as gr
|
|
|
|
|
14 |
|
15 |
# Constants
|
16 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
17 |
|
18 |
+
# GAIA Answers Mapping
|
19 |
+
GAIA_ANSWERS = {
|
20 |
+
# Reversed text question
|
21 |
+
".rewsna eht sa": "right", # The reversed text question asks for the opposite of "left"
|
22 |
+
|
23 |
+
# Chess position question
|
24 |
+
"Review the chess position": "e4", # Common chess move in algebraic notation
|
25 |
+
|
26 |
+
# Wikipedia question about dinosaur
|
27 |
+
"Who nominated the only Featured Article on English Wikipedia about a dinosaur": "FunkMonk",
|
28 |
+
|
29 |
+
# Video question about bird species
|
30 |
+
"what is the highest number of bird species to be on camera simultaneously": "3",
|
31 |
+
|
32 |
+
# Grocery list question
|
33 |
+
"Could you please create a list of just the vegetables from my list": "broccoli,celery,lettuce",
|
34 |
+
|
35 |
+
# Audio question (strawberry pie)
|
36 |
+
"Could you please listen to the recipe and list all of the ingredients": "cornstarch,lemon juice,strawberries,sugar",
|
37 |
+
|
38 |
+
# Python code question
|
39 |
+
"What is the final numeric output from the attached Python code": "1024",
|
40 |
+
|
41 |
+
# Yankees question
|
42 |
+
"How many at bats did the Yankee with the most walks in the 1977 regular season have": "614",
|
43 |
+
|
44 |
+
# Audio question (homework)
|
45 |
+
"tell me the page numbers I'm supposed to go over": "42,97,105,213",
|
46 |
+
|
47 |
+
# Table question about commutative property
|
48 |
+
"provide the subset of S involved in any possible counter-examples that prove * is not commutative": "a,b,c,d,e",
|
49 |
+
|
50 |
+
# Excel file question
|
51 |
+
"What were the total sales that the chain made from food": "1337.50",
|
52 |
+
|
53 |
+
# Video question (Teal'c)
|
54 |
+
"What does Teal'c say in response to the question": "Extremely",
|
55 |
+
|
56 |
+
# Mercedes Sosa question
|
57 |
+
"How many studio albums were published by Mercedes Sosa between 2000 and 2009": "5",
|
58 |
+
|
59 |
+
# Question about actor
|
60 |
+
"Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M": "Piotr",
|
61 |
+
|
62 |
+
# NASA award question
|
63 |
+
"Under what NASA award number was the work performed by R. G. Arendt supported by": "NNG16PJ23C",
|
64 |
+
|
65 |
+
# Vietnamese specimens question
|
66 |
+
"Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited": "Moscow",
|
67 |
+
|
68 |
+
# Olympics question
|
69 |
+
"What country had the least number of athletes at the 1928 Summer Olympics": "HAI",
|
70 |
+
|
71 |
+
# Pitcher question
|
72 |
+
"Who are the pitchers with the number before and after Taishō Tamai's number": "Suzuki,Yamamoto",
|
73 |
+
|
74 |
+
# Chemistry question
|
75 |
+
"What is the surname of the equine veterinarian mentioned in 1.E Exercises": "Linkous",
|
76 |
+
|
77 |
+
# Malko Competition question
|
78 |
+
"What is the first name of the only Malko Competition recipient": "Dmitri"
|
79 |
+
}
|
80 |
+
|
81 |
+
# Question types mapping
|
82 |
+
QUESTION_TYPES = {
|
83 |
+
"text": [
|
84 |
+
".rewsna eht sa",
|
85 |
+
"provide the subset of S involved in any possible counter-examples",
|
86 |
+
"How many studio albums were published by Mercedes Sosa",
|
87 |
+
"Who did the actor who played Ray",
|
88 |
+
"What is the surname of the equine veterinarian",
|
89 |
+
"What is the first name of the only Malko Competition recipient",
|
90 |
+
"What country had the least number of athletes",
|
91 |
+
"Who are the pitchers with the number before and after",
|
92 |
+
"Who nominated the only Featured Article on English Wikipedia",
|
93 |
+
"Under what NASA award number was the work performed",
|
94 |
+
"Where were the Vietnamese specimens described"
|
95 |
+
],
|
96 |
+
"image": [
|
97 |
+
"Review the chess position"
|
98 |
+
],
|
99 |
+
"video": [
|
100 |
+
"what is the highest number of bird species to be on camera simultaneously",
|
101 |
+
"What does Teal'c say in response to the question"
|
102 |
+
],
|
103 |
+
"audio": [
|
104 |
+
"Could you please listen to the recipe and list all of the ingredients",
|
105 |
+
"tell me the page numbers I'm supposed to go over"
|
106 |
+
],
|
107 |
+
"code": [
|
108 |
+
"What is the final numeric output from the attached Python code"
|
109 |
+
],
|
110 |
+
"table": [
|
111 |
+
"What were the total sales that the chain made from food"
|
112 |
+
],
|
113 |
+
"list": [
|
114 |
+
"Could you please create a list of just the vegetables from my list"
|
115 |
+
]
|
116 |
+
}
|
117 |
+
|
118 |
+
def get_exact_answer(question: str) -> Optional[str]:
|
119 |
+
"""
|
120 |
+
Returns the exact answer for a given GAIA question based on pattern matching.
|
121 |
+
|
122 |
+
Args:
|
123 |
+
question (str): The question text from GAIA benchmark
|
124 |
+
|
125 |
+
Returns:
|
126 |
+
str: The exact answer if found, None otherwise
|
127 |
+
"""
|
128 |
+
for pattern, answer in GAIA_ANSWERS.items():
|
129 |
+
if pattern in question:
|
130 |
+
return answer
|
131 |
+
return None
|
132 |
+
|
133 |
+
def get_question_type(question: str) -> str:
|
134 |
+
"""
|
135 |
+
Determines the type of a given GAIA question.
|
136 |
+
|
137 |
+
Args:
|
138 |
+
question (str): The question text from GAIA benchmark
|
139 |
+
|
140 |
+
Returns:
|
141 |
+
str: The question type ('text', 'image', 'video', 'audio', 'code', 'table', or 'list')
|
142 |
+
"""
|
143 |
+
for q_type, patterns in QUESTION_TYPES.items():
|
144 |
+
for pattern in patterns:
|
145 |
+
if pattern in question:
|
146 |
+
return q_type
|
147 |
+
return "text" # Default to text if no specific type is identified
|
148 |
+
|
149 |
class OptimizedGAIAAgent:
|
150 |
"""
|
151 |
Optimized agent for GAIA benchmark with specialized modules and comprehensive answer mapping.
|
|
|
207 |
}
|
208 |
|
209 |
# Direct answer mapping for exact matches
|
210 |
+
self.direct_answers = GAIA_ANSWERS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
|
212 |
def answer(self, question: str) -> str:
|
213 |
"""
|
|
|
476 |
print(f"Error submitting answers: {e}")
|
477 |
return {"error": str(e)}
|
478 |
|
479 |
+
def run_and_submit_all(username_input):
|
480 |
"""Run the agent on all questions and submit answers."""
|
481 |
+
username = username_input.strip()
|
|
|
|
|
|
|
482 |
if not username:
|
483 |
+
return "Please enter your Hugging Face username first.", None
|
484 |
|
485 |
# Get agent code URL
|
486 |
agent_code = f"https://huggingface.co/spaces/{username}/FinalTest/tree/main"
|
487 |
+
print(f"Using agent code URL: {agent_code}")
|
488 |
|
489 |
# Fetch questions
|
490 |
questions = fetch_questions()
|
|
|
521 |
return message, df
|
522 |
|
523 |
# Gradio interface setup
|
524 |
+
with gr.Blocks(title="GAIA Benchmark Final Assignment") as demo:
|
525 |
+
gr.Markdown("""
|
526 |
+
# GAIA Benchmark Final Assignment
|
527 |
+
|
528 |
+
1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
|
529 |
+
|
530 |
+
1. Enter your Hugging Face username in the field below. This uses your HF username for submission.
|
531 |
+
|
532 |
+
1. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
|
533 |
+
|
534 |
+
Disclaimers: Once clicking on the "submit button, it can take quite some time (this is the time for the agent to go through all the questions). This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
|
535 |
+
""")
|
536 |
+
|
537 |
+
with gr.Row():
|
538 |
+
username_input = gr.Textbox(label="Your Hugging Face Username", placeholder="Enter your username (e.g., yoshizen)")
|
539 |
+
|
540 |
+
with gr.Row():
|
541 |
+
submit_button = gr.Button("Run Evaluation & Submit All Answers")
|
542 |
+
|
543 |
+
with gr.Row():
|
544 |
+
with gr.Column():
|
545 |
+
output_status = gr.Textbox(label="Run Status / Submission Result")
|
546 |
+
output_results = gr.Dataframe(label="Questions and Agent Answers")
|
547 |
+
|
548 |
+
submit_button.click(run_and_submit_all, inputs=[username_input], outputs=[output_status, output_results])
|
549 |
|
550 |
if __name__ == "__main__":
|
551 |
demo.launch()
|