Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,16 +1,15 @@
|
|
1 |
-
# app.py
|
2 |
-
|
3 |
import os
|
4 |
import gradio as gr
|
5 |
import requests
|
6 |
import pandas as pd
|
7 |
-
|
8 |
from smolagents import (
|
9 |
CodeAgent,
|
10 |
DuckDuckGoSearchTool,
|
11 |
PythonREPLTool,
|
12 |
OpenAIServerModel,
|
13 |
)
|
|
|
|
|
14 |
|
15 |
# --- Constants ---
|
16 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
@@ -21,12 +20,12 @@ class GaiaAgent:
|
|
21 |
self.openai_key = openai_key
|
22 |
# 1) Initialize the LLM-backed model
|
23 |
self.model = OpenAIServerModel(
|
24 |
-
model_id="gpt-4",
|
25 |
api_key=self.openai_key,
|
26 |
)
|
27 |
# 2) Define the tools
|
28 |
self.search_tool = DuckDuckGoSearchTool()
|
29 |
-
self.python_tool = PythonREPLTool()
|
30 |
# 3) Create the CodeAgent
|
31 |
self.agent = CodeAgent(
|
32 |
model=self.model,
|
@@ -38,23 +37,31 @@ class GaiaAgent:
|
|
38 |
"Always think in Python code using the available tools. "
|
39 |
"Never answer without executing or checking with a tool. "
|
40 |
"Use DuckDuckGoSearchTool for lookups, PythonREPLTool for "
|
41 |
-
"calculations, string or list manipulations."
|
|
|
42 |
)
|
43 |
)
|
44 |
|
45 |
def __call__(self, question: str) -> str:
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
def run_and_submit_all(profile: gr.OAuthProfile | None, openai_key: str):
|
49 |
# --- Login & Setup ---
|
50 |
if not profile:
|
51 |
return "Please log in to Hugging Face to submit your score.", None
|
52 |
username = profile.username.strip()
|
53 |
-
|
54 |
# 1) Instantiate our improved agent
|
55 |
try:
|
56 |
agent = GaiaAgent(openai_key)
|
57 |
except Exception as e:
|
|
|
|
|
58 |
return f"Error initializing agent: {e}", None
|
59 |
|
60 |
# 2) Fetch the GAIA questions
|
@@ -64,6 +71,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None, openai_key: str):
|
|
64 |
resp.raise_for_status()
|
65 |
questions = resp.json()
|
66 |
except Exception as e:
|
|
|
|
|
67 |
return f"Error fetching questions: {e}", None
|
68 |
|
69 |
# 3) Run the agent on each question
|
@@ -75,6 +84,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None, openai_key: str):
|
|
75 |
try:
|
76 |
ans = agent(q)
|
77 |
except Exception as e:
|
|
|
|
|
78 |
ans = f"ERROR: {e}"
|
79 |
answers.append({"task_id": tid, "submitted_answer": ans})
|
80 |
log.append({"Task ID": tid, "Question": q, "Answer": ans})
|
@@ -94,13 +105,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None, openai_key: str):
|
|
94 |
f"✅ Submission Successful!\n"
|
95 |
f"User: {data['username']}\n"
|
96 |
f"Score: {data['score']}% ({data['correct_count']}/{data['total_attempted']})\n"
|
97 |
-
f"Message: {data.get('message','')}"
|
98 |
)
|
99 |
except Exception as e:
|
|
|
|
|
100 |
status = f"Submission failed: {e}"
|
101 |
-
|
102 |
return status, pd.DataFrame(log)
|
103 |
|
|
|
104 |
# --- Gradio UI ---
|
105 |
with gr.Blocks() as demo:
|
106 |
gr.Markdown("# GAIA Benchmark Runner")
|
@@ -115,7 +128,6 @@ with gr.Blocks() as demo:
|
|
115 |
run_btn = gr.Button("Run & Submit")
|
116 |
out_status = gr.Textbox(label="Status", lines=4)
|
117 |
out_table = gr.DataFrame(label="Questions & Answers")
|
118 |
-
|
119 |
run_btn.click(fn=run_and_submit_all, inputs=[login, key_in], outputs=[out_status, out_table])
|
120 |
|
121 |
if __name__ == "__main__":
|
|
|
|
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
import pandas as pd
|
|
|
5 |
from smolagents import (
|
6 |
CodeAgent,
|
7 |
DuckDuckGoSearchTool,
|
8 |
PythonREPLTool,
|
9 |
OpenAIServerModel,
|
10 |
)
|
11 |
+
from smolagents.tools.python_repl import PythonREPL
|
12 |
+
import traceback # Import traceback for detailed error logging
|
13 |
|
14 |
# --- Constants ---
|
15 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
|
20 |
self.openai_key = openai_key
|
21 |
# 1) Initialize the LLM-backed model
|
22 |
self.model = OpenAIServerModel(
|
23 |
+
model_id="gpt-4", # or "gpt-3.5-turbo" if you prefer
|
24 |
api_key=self.openai_key,
|
25 |
)
|
26 |
# 2) Define the tools
|
27 |
self.search_tool = DuckDuckGoSearchTool()
|
28 |
+
self.python_tool = PythonREPLTool(timeout=10) # Initialize PythonREPLTool
|
29 |
# 3) Create the CodeAgent
|
30 |
self.agent = CodeAgent(
|
31 |
model=self.model,
|
|
|
37 |
"Always think in Python code using the available tools. "
|
38 |
"Never answer without executing or checking with a tool. "
|
39 |
"Use DuckDuckGoSearchTool for lookups, PythonREPLTool for "
|
40 |
+
"calculations, string or list manipulations. "
|
41 |
+
"Respond with the final answer only. Do not include any extra explanation. "
|
42 |
)
|
43 |
)
|
44 |
|
45 |
def __call__(self, question: str) -> str:
|
46 |
+
try:
|
47 |
+
return self.agent.run(question)
|
48 |
+
except Exception as e:
|
49 |
+
error_message = f"Agent execution failed: {e}\n{traceback.format_exc()}"
|
50 |
+
print(error_message) # Log the error for debugging
|
51 |
+
return "ERROR: Agent failed to answer." # Return a string, not an exception
|
52 |
+
|
53 |
|
54 |
def run_and_submit_all(profile: gr.OAuthProfile | None, openai_key: str):
|
55 |
# --- Login & Setup ---
|
56 |
if not profile:
|
57 |
return "Please log in to Hugging Face to submit your score.", None
|
58 |
username = profile.username.strip()
|
|
|
59 |
# 1) Instantiate our improved agent
|
60 |
try:
|
61 |
agent = GaiaAgent(openai_key)
|
62 |
except Exception as e:
|
63 |
+
error_message = f"Error initializing agent: {e}\n{traceback.format_exc()}"
|
64 |
+
print(error_message)
|
65 |
return f"Error initializing agent: {e}", None
|
66 |
|
67 |
# 2) Fetch the GAIA questions
|
|
|
71 |
resp.raise_for_status()
|
72 |
questions = resp.json()
|
73 |
except Exception as e:
|
74 |
+
error_message = f"Error fetching questions: {e}\n{traceback.format_exc()}"
|
75 |
+
print(error_message)
|
76 |
return f"Error fetching questions: {e}", None
|
77 |
|
78 |
# 3) Run the agent on each question
|
|
|
84 |
try:
|
85 |
ans = agent(q)
|
86 |
except Exception as e:
|
87 |
+
error_message = f"Error processing question {tid}: {e}\n{traceback.format_exc()}"
|
88 |
+
print(error_message) # Print full traceback
|
89 |
ans = f"ERROR: {e}"
|
90 |
answers.append({"task_id": tid, "submitted_answer": ans})
|
91 |
log.append({"Task ID": tid, "Question": q, "Answer": ans})
|
|
|
105 |
f"✅ Submission Successful!\n"
|
106 |
f"User: {data['username']}\n"
|
107 |
f"Score: {data['score']}% ({data['correct_count']}/{data['total_attempted']})\n"
|
108 |
+
f"Message: {data.get('message', '')}"
|
109 |
)
|
110 |
except Exception as e:
|
111 |
+
error_message = f"Submission failed: {e}\n{traceback.format_exc()}"
|
112 |
+
print(error_message)
|
113 |
status = f"Submission failed: {e}"
|
|
|
114 |
return status, pd.DataFrame(log)
|
115 |
|
116 |
+
|
117 |
# --- Gradio UI ---
|
118 |
with gr.Blocks() as demo:
|
119 |
gr.Markdown("# GAIA Benchmark Runner")
|
|
|
128 |
run_btn = gr.Button("Run & Submit")
|
129 |
out_status = gr.Textbox(label="Status", lines=4)
|
130 |
out_table = gr.DataFrame(label="Questions & Answers")
|
|
|
131 |
run_btn.click(fn=run_and_submit_all, inputs=[login, key_in], outputs=[out_status, out_table])
|
132 |
|
133 |
if __name__ == "__main__":
|