Spaces:
Sleeping
Sleeping
Updates to mitigate simulation error
Browse files- agent.py +3 -13
- app.py +100 -60
- requirements.txt +5 -1
agent.py
CHANGED
@@ -134,18 +134,8 @@ def get_tools() -> List[BaseTool]:
|
|
134 |
web_tool
|
135 |
]
|
136 |
|
137 |
-
#
|
138 |
-
#
|
139 |
-
from app import BasicAgent
|
140 |
-
|
141 |
-
def build_agent():
|
142 |
-
"""Build and return a BasicAgent instance."""
|
143 |
-
return BasicAgent()
|
144 |
|
145 |
if __name__ == "__main__":
|
146 |
-
|
147 |
-
agent = build_agent()
|
148 |
-
test_question = "What is the capital of France?"
|
149 |
-
answer = agent(test_question)
|
150 |
-
print(f"Question: {test_question}")
|
151 |
-
print(f"Answer: {answer}")
|
|
|
134 |
web_tool
|
135 |
]
|
136 |
|
137 |
+
# REMOVED circular import from app.py
|
138 |
+
# This file now just defines tools and doesn't attempt to build the agent
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
if __name__ == "__main__":
|
141 |
+
print("This module defines tools for the agent. Run app.py or standalone_debug.py to test the agent.")
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -5,6 +5,7 @@ import pandas as pd
|
|
5 |
from typing import List, Dict, Any
|
6 |
from dotenv import load_dotenv
|
7 |
import json
|
|
|
8 |
|
9 |
# LlamaIndex Imports
|
10 |
from llama_index.core.llms import LLM
|
@@ -20,27 +21,24 @@ load_dotenv()
|
|
20 |
# (Keep Constants as is)
|
21 |
# --- Constants ---
|
22 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
|
23 |
|
24 |
# --- Basic Agent Definition ---
|
25 |
class BasicAgent:
|
26 |
"""A LlamaIndex-based agent."""
|
27 |
def __init__(self):
|
28 |
print("BasicAgent initialized.")
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
print("Agent setup complete.")
|
41 |
-
except Exception as e:
|
42 |
-
print(f"Warning: Error during agent initialization: {e}")
|
43 |
-
# Continue despite error - we'll handle this in the __call__ method
|
44 |
|
45 |
def _initialize_llm(self) -> LLM:
|
46 |
"""Initialize the LLM based on configuration."""
|
@@ -75,15 +73,13 @@ class BasicAgent:
|
|
75 |
|
76 |
def _build_agent(self) -> ReActAgent:
|
77 |
"""Build and return the agent."""
|
78 |
-
# Load system prompt from file
|
79 |
try:
|
80 |
with open("system_prompt.txt", "r", encoding="utf-8") as f:
|
81 |
system_prompt = f.read()
|
82 |
-
# Append output format to system prompt
|
83 |
-
system_prompt = f"{system_prompt}\n\nIMPORTANT OUTPUT FORMAT:\n{OUTPUT_FORMAT}"
|
84 |
except Exception as e:
|
85 |
print(f"Error loading system prompt: {e}")
|
86 |
-
system_prompt =
|
87 |
|
88 |
return ReActAgent.from_tools(
|
89 |
tools=self.tools,
|
@@ -95,13 +91,7 @@ class BasicAgent:
|
|
95 |
|
96 |
def __call__(self, question: str) -> str:
|
97 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
98 |
-
|
99 |
try:
|
100 |
-
# Check if agent was properly initialized
|
101 |
-
if not hasattr(self, 'agent') or self.agent is None:
|
102 |
-
# Fallback to a simple response if agent initialization failed
|
103 |
-
return "I'm unable to process your request due to initialization errors."
|
104 |
-
|
105 |
# Process the question
|
106 |
response = self.agent.query(question)
|
107 |
answer_text = str(response)
|
@@ -111,16 +101,24 @@ class BasicAgent:
|
|
111 |
reasoning_trace = answer_text.split("FINAL ANSWER:")[0].strip()
|
112 |
model_answer = answer_text.split("FINAL ANSWER:")[1].strip()
|
113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
print(f"Agent generated answer: {model_answer[:50]}..." if len(model_answer) > 50 else f"Agent generated answer: {model_answer}")
|
115 |
-
return
|
116 |
else:
|
117 |
# If no FINAL ANSWER pattern, return the whole response
|
118 |
print(f"No 'FINAL ANSWER' found in response. Returning full response.")
|
119 |
-
return answer_text
|
120 |
|
121 |
except Exception as e:
|
122 |
print(f"Error generating answer: {e}")
|
123 |
-
|
|
|
124 |
|
125 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
126 |
"""
|
@@ -186,8 +184,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
186 |
print(f"Skipping item with missing task_id or question: {item}")
|
187 |
continue
|
188 |
try:
|
189 |
-
# Get agent response
|
190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
|
192 |
# Add to answers payload
|
193 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
@@ -196,14 +201,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
196 |
results_log.append({
|
197 |
"Task ID": task_id,
|
198 |
"Question": question_text,
|
199 |
-
"Submitted Answer": submitted_answer
|
|
|
200 |
})
|
201 |
|
202 |
-
# Add to JSONL output
|
203 |
jsonl_output.append({
|
204 |
"task_id": task_id,
|
205 |
-
"model_answer":
|
206 |
-
"reasoning_trace":
|
207 |
})
|
208 |
|
209 |
except Exception as e:
|
@@ -278,37 +284,72 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
278 |
results_df = pd.DataFrame(results_log)
|
279 |
return status_message, results_df
|
280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
gr.Markdown(
|
286 |
-
"""
|
287 |
-
**Instructions:**
|
288 |
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
|
296 |
-
This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
|
297 |
-
"""
|
298 |
-
)
|
299 |
|
300 |
-
|
301 |
|
302 |
-
|
303 |
|
304 |
-
|
305 |
-
|
306 |
-
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
307 |
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
312 |
|
313 |
if __name__ == "__main__":
|
314 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
@@ -332,4 +373,3 @@ if __name__ == "__main__":
|
|
332 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
333 |
|
334 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
335 |
-
demo.launch(debug=True, share=False)
|
|
|
5 |
from typing import List, Dict, Any
|
6 |
from dotenv import load_dotenv
|
7 |
import json
|
8 |
+
import traceback
|
9 |
|
10 |
# LlamaIndex Imports
|
11 |
from llama_index.core.llms import LLM
|
|
|
21 |
# (Keep Constants as is)
|
22 |
# --- Constants ---
|
23 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
24 |
+
OUTPUT_FORMAT = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
|
25 |
|
26 |
# --- Basic Agent Definition ---
|
27 |
class BasicAgent:
|
28 |
"""A LlamaIndex-based agent."""
|
29 |
def __init__(self):
|
30 |
print("BasicAgent initialized.")
|
31 |
+
# Initialize the core components
|
32 |
+
self.llm = self._initialize_llm()
|
33 |
+
|
34 |
+
# Import get_tools from agent.py here to avoid circular imports
|
35 |
+
from agent import get_tools
|
36 |
+
self.tools = get_tools()
|
37 |
+
|
38 |
+
self.memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
|
39 |
+
# Build the agent
|
40 |
+
self.agent = self._build_agent()
|
41 |
+
print("Agent setup complete.")
|
|
|
|
|
|
|
|
|
42 |
|
43 |
def _initialize_llm(self) -> LLM:
|
44 |
"""Initialize the LLM based on configuration."""
|
|
|
73 |
|
74 |
def _build_agent(self) -> ReActAgent:
|
75 |
"""Build and return the agent."""
|
76 |
+
# Load system prompt from file
|
77 |
try:
|
78 |
with open("system_prompt.txt", "r", encoding="utf-8") as f:
|
79 |
system_prompt = f.read()
|
|
|
|
|
80 |
except Exception as e:
|
81 |
print(f"Error loading system prompt: {e}")
|
82 |
+
system_prompt = "You are an intelligent agent designed to answer a wide variety of questions."
|
83 |
|
84 |
return ReActAgent.from_tools(
|
85 |
tools=self.tools,
|
|
|
91 |
|
92 |
def __call__(self, question: str) -> str:
|
93 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
|
|
94 |
try:
|
|
|
|
|
|
|
|
|
|
|
95 |
# Process the question
|
96 |
response = self.agent.query(question)
|
97 |
answer_text = str(response)
|
|
|
101 |
reasoning_trace = answer_text.split("FINAL ANSWER:")[0].strip()
|
102 |
model_answer = answer_text.split("FINAL ANSWER:")[1].strip()
|
103 |
|
104 |
+
# Include the reasoning trace in the response but formatted for JSON
|
105 |
+
result = {
|
106 |
+
"model_answer": model_answer,
|
107 |
+
"reasoning_trace": reasoning_trace
|
108 |
+
}
|
109 |
+
|
110 |
+
# Return just the answer part for direct evaluation
|
111 |
print(f"Agent generated answer: {model_answer[:50]}..." if len(model_answer) > 50 else f"Agent generated answer: {model_answer}")
|
112 |
+
return json.dumps(result)
|
113 |
else:
|
114 |
# If no FINAL ANSWER pattern, return the whole response
|
115 |
print(f"No 'FINAL ANSWER' found in response. Returning full response.")
|
116 |
+
return json.dumps({"model_answer": answer_text, "reasoning_trace": ""})
|
117 |
|
118 |
except Exception as e:
|
119 |
print(f"Error generating answer: {e}")
|
120 |
+
error_msg = f"I encountered an error while answering your question: {str(e)}"
|
121 |
+
return json.dumps({"model_answer": error_msg, "reasoning_trace": ""})
|
122 |
|
123 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
124 |
"""
|
|
|
184 |
print(f"Skipping item with missing task_id or question: {item}")
|
185 |
continue
|
186 |
try:
|
187 |
+
# Get agent response which is now a JSON string
|
188 |
+
agent_response_json = agent(question_text)
|
189 |
+
agent_response = json.loads(agent_response_json)
|
190 |
+
|
191 |
+
model_answer = agent_response.get("model_answer", "")
|
192 |
+
reasoning_trace = agent_response.get("reasoning_trace", "")
|
193 |
+
|
194 |
+
# Format for submission payload
|
195 |
+
submitted_answer = model_answer
|
196 |
|
197 |
# Add to answers payload
|
198 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
|
|
201 |
results_log.append({
|
202 |
"Task ID": task_id,
|
203 |
"Question": question_text,
|
204 |
+
"Submitted Answer": submitted_answer,
|
205 |
+
"Reasoning": reasoning_trace[:100] + "..." if len(reasoning_trace) > 100 else reasoning_trace
|
206 |
})
|
207 |
|
208 |
+
# Add to JSONL output
|
209 |
jsonl_output.append({
|
210 |
"task_id": task_id,
|
211 |
+
"model_answer": model_answer,
|
212 |
+
"reasoning_trace": reasoning_trace
|
213 |
})
|
214 |
|
215 |
except Exception as e:
|
|
|
284 |
results_df = pd.DataFrame(results_log)
|
285 |
return status_message, results_df
|
286 |
|
287 |
+
# Try to load Gradio components, handling potential OAuth errors
|
288 |
+
try:
|
289 |
+
# --- Build Gradio Interface using Blocks ---
|
290 |
+
with gr.Blocks() as demo:
|
291 |
+
gr.Markdown("# Basic Agent Evaluation Runner")
|
292 |
+
gr.Markdown(
|
293 |
+
"""
|
294 |
+
**Instructions:**
|
295 |
|
296 |
+
1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
|
297 |
+
2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
|
298 |
+
3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
|
|
|
|
|
|
|
299 |
|
300 |
+
---
|
301 |
+
**Disclaimers:**
|
302 |
+
Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
|
303 |
+
This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
|
304 |
+
"""
|
305 |
+
)
|
|
|
|
|
|
|
|
|
306 |
|
307 |
+
gr.LoginButton()
|
308 |
|
309 |
+
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
310 |
|
311 |
+
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
312 |
+
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
|
|
313 |
|
314 |
+
run_button.click(
|
315 |
+
fn=run_and_submit_all,
|
316 |
+
outputs=[status_output, results_table]
|
317 |
+
)
|
318 |
+
except ImportError as e:
|
319 |
+
print(f"Error initializing Gradio OAuth components: {e}")
|
320 |
+
print("This error is expected when running locally without OAuth dependencies.")
|
321 |
+
print("You can test the agent using standalone_debug.py or mini_test.py instead.")
|
322 |
+
# Create a minimal demo without OAuth if running locally
|
323 |
+
try:
|
324 |
+
import gradio as gr
|
325 |
+
with gr.Blocks() as demo:
|
326 |
+
gr.Markdown("# Agent Test Environment (Local Mode)")
|
327 |
+
gr.Markdown("OAuth dependencies not found. Running in local test mode.")
|
328 |
+
|
329 |
+
with gr.Row():
|
330 |
+
with gr.Column():
|
331 |
+
question_input = gr.Textbox(label="Enter your question", lines=2)
|
332 |
+
test_button = gr.Button("Test Agent")
|
333 |
+
|
334 |
+
with gr.Column():
|
335 |
+
answer_output = gr.Textbox(label="Agent Answer", lines=10)
|
336 |
+
|
337 |
+
def test_agent_locally(question):
|
338 |
+
try:
|
339 |
+
agent = BasicAgent()
|
340 |
+
result = agent(question)
|
341 |
+
return result
|
342 |
+
except Exception as e:
|
343 |
+
return f"Error: {str(e)}\n\n{traceback.format_exc()}"
|
344 |
+
|
345 |
+
test_button.click(
|
346 |
+
fn=test_agent_locally,
|
347 |
+
inputs=[question_input],
|
348 |
+
outputs=[answer_output]
|
349 |
+
)
|
350 |
+
except Exception as e:
|
351 |
+
print(f"Failed to create even minimal Gradio interface: {e}")
|
352 |
+
demo = None
|
353 |
|
354 |
if __name__ == "__main__":
|
355 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
|
|
373 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
374 |
|
375 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
|
requirements.txt
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
-
gradio>=4.0.0
|
2 |
requests>=2.31.0
|
3 |
pandas>=2.0.0
|
4 |
python-dotenv>=1.0.0
|
|
|
5 |
|
6 |
# LlamaIndex packages
|
7 |
llama-index>=0.10.0
|
@@ -10,3 +11,6 @@ llama-index-llms-huggingface>=0.1.0
|
|
10 |
llama-index-llms-huggingface-api>=0.1.0
|
11 |
llama-index-readers-web>=0.1.0
|
12 |
llama-index-readers-wikipedia>=0.1.0
|
|
|
|
|
|
|
|
1 |
+
gradio[oauth]>=4.0.0
|
2 |
requests>=2.31.0
|
3 |
pandas>=2.0.0
|
4 |
python-dotenv>=1.0.0
|
5 |
+
itsdangerous>=2.0.0
|
6 |
|
7 |
# LlamaIndex packages
|
8 |
llama-index>=0.10.0
|
|
|
11 |
llama-index-llms-huggingface-api>=0.1.0
|
12 |
llama-index-readers-web>=0.1.0
|
13 |
llama-index-readers-wikipedia>=0.1.0
|
14 |
+
|
15 |
+
# For Google's newer Gemini API (recommended over the deprecated version)
|
16 |
+
google-generativeai>=0.3.0
|