Update app.py
Browse files
app.py
CHANGED
@@ -5,34 +5,34 @@ import requests
|
|
5 |
import pandas as pd
|
6 |
|
7 |
from smolagents import CodeAgent, OpenAIServerModel
|
8 |
-
from
|
9 |
|
10 |
# Constants
|
11 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
12 |
MAX_QUESTION_LENGTH = 4000
|
13 |
-
MAX_WEBPAGE_CONTENT = 3000
|
14 |
|
15 |
-
# --- Reliable DuckDuckGo Tool
|
16 |
-
class ReliableDuckDuckGoTool(
|
17 |
-
def
|
18 |
for attempt in range(3):
|
19 |
try:
|
20 |
-
return super().
|
21 |
except Exception as e:
|
22 |
-
if "
|
23 |
-
print(f"Rate
|
24 |
-
time.sleep(
|
25 |
else:
|
26 |
raise e
|
27 |
raise RuntimeError("DuckDuckGo search failed after retries")
|
28 |
|
29 |
-
# ---
|
30 |
class SmartGAIAAgent:
|
31 |
def __init__(self):
|
32 |
self.api_key = os.getenv("OPENAI_API_KEY")
|
33 |
if not self.api_key:
|
34 |
raise ValueError("Missing OPENAI_API_KEY")
|
35 |
self.model = OpenAIServerModel(model_id="gpt-4", api_key=self.api_key)
|
|
|
36 |
self.agent = CodeAgent(
|
37 |
tools=[ReliableDuckDuckGoTool()],
|
38 |
model=self.model,
|
@@ -44,14 +44,13 @@ class SmartGAIAAgent:
|
|
44 |
|
45 |
def __call__(self, question: str) -> str:
|
46 |
try:
|
47 |
-
|
48 |
-
|
49 |
-
return result.strip()
|
50 |
except Exception as e:
|
51 |
print(f"Agent error: {e}")
|
52 |
return "error"
|
53 |
|
54 |
-
# --- Evaluation
|
55 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
56 |
space_id = os.getenv("SPACE_ID")
|
57 |
if profile:
|
@@ -93,7 +92,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
93 |
'attached', '.mp3', '.wav', '.png', '.jpg', '.jpeg',
|
94 |
'youtube', '.mp4', 'video', 'listen', 'watch'
|
95 |
]):
|
96 |
-
print(f"Skipping unsupported
|
97 |
continue
|
98 |
|
99 |
try:
|
@@ -125,17 +124,17 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
125 |
response.raise_for_status()
|
126 |
result_data = response.json()
|
127 |
final_status = (
|
128 |
-
f"Submission Successful
|
129 |
-
f"User: {result_data.get('username')}
|
130 |
f"Score: {result_data.get('score')}% "
|
131 |
-
f"({result_data.get('correct_count')}/{result_data.get('total_attempted')})
|
132 |
f"Message: {result_data.get('message')}"
|
133 |
)
|
134 |
return final_status, pd.DataFrame(results_log)
|
135 |
except Exception as e:
|
136 |
return f"Submission failed: {e}", pd.DataFrame(results_log)
|
137 |
|
138 |
-
# --- Gradio
|
139 |
with gr.Blocks() as demo:
|
140 |
gr.Markdown("# 🧠 GAIA Agent Evaluation")
|
141 |
gr.Markdown("""
|
|
|
5 |
import pandas as pd
|
6 |
|
7 |
from smolagents import CodeAgent, OpenAIServerModel
|
8 |
+
from smolagents.tools import DuckDuckGoSearchTool
|
9 |
|
10 |
# Constants
|
11 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
12 |
MAX_QUESTION_LENGTH = 4000
|
|
|
13 |
|
14 |
+
# --- Reliable DuckDuckGo Tool using smolagents ---
|
15 |
+
class ReliableDuckDuckGoTool(DuckDuckGoSearchTool):
|
16 |
+
def run(self, query: str) -> str:
|
17 |
for attempt in range(3):
|
18 |
try:
|
19 |
+
return super().run(query)
|
20 |
except Exception as e:
|
21 |
+
if "rate" in str(e).lower():
|
22 |
+
print(f"[DuckDuckGo] Rate limit hit. Retrying ({attempt + 1}/3)...")
|
23 |
+
time.sleep(2 * (attempt + 1))
|
24 |
else:
|
25 |
raise e
|
26 |
raise RuntimeError("DuckDuckGo search failed after retries")
|
27 |
|
28 |
+
# --- Main Agent ---
|
29 |
class SmartGAIAAgent:
|
30 |
def __init__(self):
|
31 |
self.api_key = os.getenv("OPENAI_API_KEY")
|
32 |
if not self.api_key:
|
33 |
raise ValueError("Missing OPENAI_API_KEY")
|
34 |
self.model = OpenAIServerModel(model_id="gpt-4", api_key=self.api_key)
|
35 |
+
|
36 |
self.agent = CodeAgent(
|
37 |
tools=[ReliableDuckDuckGoTool()],
|
38 |
model=self.model,
|
|
|
44 |
|
45 |
def __call__(self, question: str) -> str:
|
46 |
try:
|
47 |
+
question = self.truncate_question(question)
|
48 |
+
return self.agent.run(question).strip()
|
|
|
49 |
except Exception as e:
|
50 |
print(f"Agent error: {e}")
|
51 |
return "error"
|
52 |
|
53 |
+
# --- Evaluation and Submission Logic ---
|
54 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
55 |
space_id = os.getenv("SPACE_ID")
|
56 |
if profile:
|
|
|
92 |
'attached', '.mp3', '.wav', '.png', '.jpg', '.jpeg',
|
93 |
'youtube', '.mp4', 'video', 'listen', 'watch'
|
94 |
]):
|
95 |
+
print(f"Skipping unsupported question: {task_id}")
|
96 |
continue
|
97 |
|
98 |
try:
|
|
|
124 |
response.raise_for_status()
|
125 |
result_data = response.json()
|
126 |
final_status = (
|
127 |
+
f"Submission Successful!\n"
|
128 |
+
f"User: {result_data.get('username')}\n"
|
129 |
f"Score: {result_data.get('score')}% "
|
130 |
+
f"({result_data.get('correct_count')}/{result_data.get('total_attempted')})\n"
|
131 |
f"Message: {result_data.get('message')}"
|
132 |
)
|
133 |
return final_status, pd.DataFrame(results_log)
|
134 |
except Exception as e:
|
135 |
return f"Submission failed: {e}", pd.DataFrame(results_log)
|
136 |
|
137 |
+
# --- Gradio UI ---
|
138 |
with gr.Blocks() as demo:
|
139 |
gr.Markdown("# 🧠 GAIA Agent Evaluation")
|
140 |
gr.Markdown("""
|