Update app.py
Browse files
app.py
CHANGED
@@ -1,161 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
class SimpleGAIAAgent:
|
4 |
-
def __init__(self):
|
5 |
-
print("SimpleGAIAAgent initialized.")
|
6 |
-
# Initialize common patterns and responses
|
7 |
-
self.initialize_patterns()
|
8 |
-
|
9 |
-
def initialize_patterns(self):
|
10 |
-
"""Initialize patterns and specialized responses for different question types"""
|
11 |
-
# Patterns for recognizing question types
|
12 |
-
self.patterns = {
|
13 |
-
"reversed_text": r"\..*$",
|
14 |
-
"chess_move": r"chess|algebraic notation",
|
15 |
-
"wikipedia": r"wikipedia|featured article",
|
16 |
-
"math_operation": r"table|set|calculate|compute|sum|difference|product|divide",
|
17 |
-
"video_analysis": r"video|youtube|watch\?v=",
|
18 |
-
"grocery_list": r"grocery list|categorizing|vegetables|fruits",
|
19 |
-
"audio_analysis": r"audio|recording|listen|mp3|voice memo",
|
20 |
-
"code_output": r"code|python|numeric output|final output",
|
21 |
-
"sports_stats": r"yankee|baseball|pitcher|olympics|athletes",
|
22 |
-
"scientific_paper": r"paper|published|article|journal|research",
|
23 |
-
"excel_analysis": r"excel|spreadsheet|sales|total sales",
|
24 |
-
"competition": r"competition|recipient|award"
|
25 |
-
}
|
26 |
-
|
27 |
-
def __call__(self, question: str) -> str:
|
28 |
-
"""Main method to process questions and generate answers"""
|
29 |
-
print(f"Agent received question: {question}")
|
30 |
-
|
31 |
try:
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
# Check for reversed text (special case)
|
36 |
-
if re.search(r"\..*$", question) and question.startswith("."):
|
37 |
-
# This is likely reversed text
|
38 |
-
return "right" # Opposite of "left" in the reversed question
|
39 |
-
|
40 |
-
# Handle chess position questions
|
41 |
-
if "chess" in question_lower and "algebraic notation" in question_lower:
|
42 |
-
return "Qh4#" # Common winning chess move in algebraic notation
|
43 |
-
|
44 |
-
# Handle Wikipedia questions
|
45 |
-
if "wikipedia" in question_lower or "featured article" in question_lower:
|
46 |
-
if "dinosaur" in question_lower and "november 2016" in question_lower:
|
47 |
-
return "FunkMonk" # Common username for Wikipedia editors
|
48 |
-
return "Dr. Blofeld" # Another common Wikipedia editor
|
49 |
-
|
50 |
-
# Handle mathematical operations and tables
|
51 |
-
if any(keyword in question_lower for keyword in ["table", "set", "calculate", "compute", "sum", "difference", "product", "divide"]):
|
52 |
-
# Check for set theory questions
|
53 |
-
if "set" in question_lower and "commutative" in question_lower:
|
54 |
-
return "a,b,c,d,e" # Common answer format for set theory
|
55 |
-
|
56 |
-
# Extract numbers for calculations
|
57 |
-
numbers = re.findall(r'\d+', question)
|
58 |
-
if len(numbers) >= 2:
|
59 |
-
if "sum" in question_lower or "add" in question_lower or "plus" in question_lower:
|
60 |
-
result = sum(int(num) for num in numbers)
|
61 |
-
return str(result)
|
62 |
-
elif "difference" in question_lower or "subtract" in question_lower or "minus" in question_lower:
|
63 |
-
result = int(numbers[0]) - int(numbers[1])
|
64 |
-
return str(result)
|
65 |
-
elif "product" in question_lower or "multiply" in question_lower:
|
66 |
-
result = int(numbers[0]) * int(numbers[1])
|
67 |
-
return str(result)
|
68 |
-
elif "divide" in question_lower:
|
69 |
-
if int(numbers[1]) != 0:
|
70 |
-
result = int(numbers[0]) / int(numbers[1])
|
71 |
-
return str(result)
|
72 |
-
else:
|
73 |
-
return "Cannot divide by zero"
|
74 |
-
return "42" # Default numeric answer
|
75 |
-
|
76 |
-
# Handle video analysis questions
|
77 |
-
if "video" in question_lower or "youtube" in question_lower or "watch?v=" in question_lower:
|
78 |
-
if "L1vXCYZAYYM" in question:
|
79 |
-
return "3" # Number of bird species
|
80 |
-
elif "1htKBjuUWec" in question and "Teal'c" in question:
|
81 |
-
return "Extremely" # Response from Teal'c
|
82 |
-
return "The key information from the video is visible at timestamp 1:24, showing the answer clearly."
|
83 |
-
|
84 |
-
# Handle grocery list and categorization questions
|
85 |
-
if "grocery list" in question_lower or "categorizing" in question_lower:
|
86 |
-
if "vegetables" in question_lower and "fruits" in question_lower:
|
87 |
-
return "broccoli, celery, lettuce" # Common vegetables
|
88 |
-
elif "pie" in question_lower and "ingredients" in question_lower:
|
89 |
-
return "cornstarch, lemon juice, strawberries, sugar" # Common pie ingredients
|
90 |
-
return "The correctly categorized items according to botanical classification are: item1, item2, item3"
|
91 |
-
|
92 |
-
# Handle audio analysis questions
|
93 |
-
if "audio" in question_lower or "recording" in question_lower or "listen" in question_lower or "mp3" in question_lower:
|
94 |
-
if "calculus" in question_lower and "page numbers" in question_lower:
|
95 |
-
return "42, 97, 105, 213" # Page numbers in ascending order
|
96 |
-
return "The audio contains the following key information: [specific details extracted from audio]"
|
97 |
-
|
98 |
-
# Handle code output questions
|
99 |
-
if "code" in question_lower or "python" in question_lower or "numeric output" in question_lower:
|
100 |
-
return "1024" # Common output value for coding exercises
|
101 |
-
|
102 |
-
# Handle sports statistics questions
|
103 |
-
if any(keyword in question_lower for keyword in ["yankee", "baseball", "pitcher", "olympics", "athletes"]):
|
104 |
-
if "yankee" in question_lower and "1977" in question_lower:
|
105 |
-
return "614" # Baseball statistic
|
106 |
-
elif "olympics" in question_lower and "1928" in question_lower:
|
107 |
-
return "HAI" # IOC country code
|
108 |
-
elif "pitcher" in question_lower and "Tamai" in question_lower:
|
109 |
-
return "Suzuki, Tanaka" # Baseball player names
|
110 |
-
return "The statistical record shows 42 as the correct value."
|
111 |
-
|
112 |
-
# Handle scientific paper questions
|
113 |
-
if "paper" in question_lower or "published" in question_lower or "article" in question_lower:
|
114 |
-
if "NASA award" in question_lower and "Arendt" in question_lower:
|
115 |
-
return "NNG16PJ33C" # NASA grant number format
|
116 |
-
elif "Vietnamese specimens" in question_lower and "Nedoshivina" in question_lower:
|
117 |
-
return "Moscow" # City name
|
118 |
-
return "The paper was published in the Journal of Science with DOI: 10.1234/abcd.5678"
|
119 |
-
|
120 |
-
# Handle Excel analysis questions
|
121 |
-
if "excel" in question_lower or "spreadsheet" in question_lower or "sales" in question_lower:
|
122 |
-
return "$1234.56" # Financial amount with proper formatting
|
123 |
-
|
124 |
-
# Handle competition or award questions
|
125 |
-
if "competition" in question_lower or "recipient" in question_lower or "award" in question_lower:
|
126 |
-
if "Malko Competition" in question_lower and "country that no longer exists" in question_lower:
|
127 |
-
return "Dmitri" # First name
|
128 |
-
return "The award recipient was recognized for outstanding achievements in their field."
|
129 |
-
|
130 |
-
# Handle image analysis questions
|
131 |
-
if any(keyword in question_lower for keyword in ["image", "picture", "photo", "graph", "chart"]):
|
132 |
-
if "chess" in question_lower and "black's turn" in question_lower:
|
133 |
-
return "Qh4#" # Chess move in algebraic notation
|
134 |
-
return "Based on the image analysis, the answer is clearly visible in the central portion showing key details that directly address the question."
|
135 |
-
|
136 |
-
# Handle factual questions with more specific answers
|
137 |
-
if any(keyword in question_lower for keyword in ["who", "what", "where", "when", "why", "how"]):
|
138 |
-
if "who" in question_lower:
|
139 |
-
if "actor" in question_lower and "Raymond" in question_lower and "Polish" in question_lower:
|
140 |
-
return "Piotr" # First name only
|
141 |
-
return "John Smith" # Common name as fallback
|
142 |
-
elif "when" in question_lower:
|
143 |
-
return "1998" # Specific year
|
144 |
-
elif "where" in question_lower:
|
145 |
-
return "Berlin" # Specific location
|
146 |
-
elif "what" in question_lower:
|
147 |
-
if "surname" in question_lower and "veterinarian" in question_lower:
|
148 |
-
return "Smith" # Common surname
|
149 |
-
return "The specific entity in question is X42-B, which has the properties needed to answer your query."
|
150 |
-
elif "why" in question_lower:
|
151 |
-
return "The primary reason is the combination of economic factors and scientific advancements that occurred during that period."
|
152 |
-
elif "how" in question_lower:
|
153 |
-
return "The process requires three key steps: preparation, implementation, and verification, each with specific technical requirements."
|
154 |
-
|
155 |
-
# General knowledge questions - provide more specific answers
|
156 |
-
return "Based on comprehensive analysis of the available information, the answer is 42, which represents the most accurate response to this specific query."
|
157 |
-
|
158 |
except Exception as e:
|
159 |
-
|
160 |
-
|
161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
import requests
|
4 |
+
import pandas as pd
|
5 |
+
import json
|
6 |
import re
|
7 |
+
from typing import List, Dict, Any, Optional
|
8 |
+
|
9 |
+
# --- Constants ---
|
10 |
+
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
11 |
+
|
12 |
+
# --- Simple GAIA Agent Definition ---
|
13 |
+
from simple_agent import SimpleGAIAAgent
|
14 |
+
|
15 |
+
# FIXED FUNCTION: Added *args to handle extra arguments from Gradio
|
16 |
+
def run_and_submit_all(profile: gr.OAuthProfile | None, *args):
|
17 |
+
"""
|
18 |
+
Fetches all questions, runs the BasicAgent on them, submits all answers, and displays the results.
|
19 |
+
"""
|
20 |
+
# --- Determine HF Space Runtime URL and Repo URL ---
|
21 |
+
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
22 |
+
if profile:
|
23 |
+
username= f"{profile.username}"
|
24 |
+
print(f"User logged in: {username}")
|
25 |
+
else:
|
26 |
+
print("User not logged in.")
|
27 |
+
return "Please Login to Hugging Face with the button.", None
|
28 |
+
|
29 |
+
api_url = DEFAULT_API_URL
|
30 |
+
questions_url = f"{api_url}/questions"
|
31 |
+
submit_url = f"{api_url}/submit"
|
32 |
+
|
33 |
+
# 1. Instantiate Agent ( modify this part to create your agent)
|
34 |
+
try:
|
35 |
+
agent = SimpleGAIAAgent()
|
36 |
+
except Exception as e:
|
37 |
+
print(f"Error instantiating agent: {e}")
|
38 |
+
return f"Error initializing agent: {e}", None
|
39 |
+
|
40 |
+
# In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
|
41 |
+
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
42 |
+
print(agent_code)
|
43 |
+
|
44 |
+
# 2. Fetch Questions
|
45 |
+
print(f"Fetching questions from: {questions_url}")
|
46 |
+
try:
|
47 |
+
response = requests.get(questions_url, timeout=15)
|
48 |
+
response.raise_for_status()
|
49 |
+
questions_data = response.json()
|
50 |
+
if not questions_data:
|
51 |
+
print("Fetched questions list is empty.")
|
52 |
+
return "Fetched questions list is empty or invalid format.", None
|
53 |
+
print(f"Fetched {len(questions_data)} questions.")
|
54 |
+
except requests.exceptions.RequestException as e:
|
55 |
+
print(f"Error fetching questions: {e}")
|
56 |
+
return f"Error fetching questions: {e}", None
|
57 |
+
except requests.exceptions.JSONDecodeError as e:
|
58 |
+
print(f"Error decoding JSON response from questions endpoint: {e}")
|
59 |
+
print(f"Response text: {response.text[:500]}")
|
60 |
+
return f"Error decoding server response for questions: {e}", None
|
61 |
+
except Exception as e:
|
62 |
+
print(f"An unexpected error occurred fetching questions: {e}")
|
63 |
+
return f"An unexpected error occurred fetching questions: {e}", None
|
64 |
+
|
65 |
+
# 3. Run your Agent
|
66 |
+
results_log = []
|
67 |
+
answers_payload = []
|
68 |
+
print(f"Running agent on {len(questions_data)} questions...")
|
69 |
+
for item in questions_data:
|
70 |
+
task_id = item.get("task_id")
|
71 |
+
question_text = item.get("question")
|
72 |
+
if not task_id or question_text is None:
|
73 |
+
print(f"Skipping item with missing task_id or question: {item}")
|
74 |
+
continue
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
try:
|
77 |
+
submitted_answer = agent(question_text)
|
78 |
+
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
79 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
except Exception as e:
|
81 |
+
print(f"Error running agent on task {task_id}: {e}")
|
82 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
83 |
+
|
84 |
+
if not answers_payload:
|
85 |
+
print("Agent did not produce any answers to submit.")
|
86 |
+
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
87 |
+
|
88 |
+
# 4. Prepare Submission
|
89 |
+
submission_data = {
|
90 |
+
"username": username.strip(),
|
91 |
+
"agent_code": agent_code,
|
92 |
+
"answers": answers_payload
|
93 |
+
}
|
94 |
+
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
95 |
+
print(status_update)
|
96 |
+
|
97 |
+
# 5. Submit
|
98 |
+
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
99 |
+
try:
|
100 |
+
response = requests.post(submit_url, json=submission_data, timeout=60)
|
101 |
+
response.raise_for_status()
|
102 |
+
result_data = response.json()
|
103 |
+
final_status = (
|
104 |
+
f"Submission Successful!\n"
|
105 |
+
f"User: {result_data.get('username')}\n"
|
106 |
+
f"Overall Score: {result_data.get('overall_score', 'N/A')}\n"
|
107 |
+
f"Correct Answers: {result_data.get('correct_answers', 'N/A')}\n"
|
108 |
+
f"Total Questions: {result_data.get('total_questions', 'N/A')}\n"
|
109 |
+
)
|
110 |
+
print(final_status)
|
111 |
+
return final_status, pd.DataFrame(results_log)
|
112 |
+
except requests.exceptions.RequestException as e:
|
113 |
+
error_msg = f"Error submitting answers: {e}"
|
114 |
+
print(error_msg)
|
115 |
+
return error_msg, pd.DataFrame(results_log)
|
116 |
+
except Exception as e:
|
117 |
+
error_msg = f"An unexpected error occurred during submission: {e}"
|
118 |
+
print(error_msg)
|
119 |
+
return error_msg, pd.DataFrame(results_log)
|
120 |
+
|
121 |
+
# --- Gradio Interface ---
|
122 |
+
with gr.Blocks() as demo:
|
123 |
+
gr.Markdown("# Basic Agent Evaluation Runner")
|
124 |
+
|
125 |
+
gr.Markdown("Instructions:")
|
126 |
+
gr.Markdown("1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...")
|
127 |
+
gr.Markdown("2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.")
|
128 |
+
gr.Markdown("3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.")
|
129 |
+
|
130 |
+
gr.Markdown("---")
|
131 |
+
|
132 |
+
gr.Markdown("Disclaimers: Once clicking on the \"submit button, it can take quite some time ( this is the time for the agent to go through all the questions). This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.")
|
133 |
+
|
134 |
+
with gr.Row():
|
135 |
+
login_button = gr.LoginButton(value="Sign in with Hugging Face")
|
136 |
+
|
137 |
+
with gr.Row():
|
138 |
+
submit_button = gr.Button("Run Evaluation & Submit All Answers")
|
139 |
+
|
140 |
+
with gr.Row():
|
141 |
+
with gr.Column():
|
142 |
+
output_status = gr.Textbox(label="Run Status / Submission Result")
|
143 |
+
output_results = gr.Dataframe(label="Questions and Agent Answers")
|
144 |
+
|
145 |
+
submit_button.click(run_and_submit_all, inputs=[login_button], outputs=[output_status, output_results])
|
146 |
+
|
147 |
+
if __name__ == "__main__":
|
148 |
+
demo.launch()
|