riokorb commited on
Commit
97858fd
·
verified ·
1 Parent(s): 9fb6b7c

Updates to mitigate simulation error

Browse files
Files changed (3) hide show
  1. agent.py +3 -13
  2. app.py +100 -60
  3. requirements.txt +5 -1
agent.py CHANGED
@@ -134,18 +134,8 @@ def get_tools() -> List[BaseTool]:
134
  web_tool
135
  ]
136
 
137
- # Import BasicAgent class from app.py
138
- # The build_agent function is still exposed for compatibility
139
- from app import BasicAgent
140
-
141
- def build_agent():
142
- """Build and return a BasicAgent instance."""
143
- return BasicAgent()
144
 
145
  if __name__ == "__main__":
146
- # Test the agent with a simple question
147
- agent = build_agent()
148
- test_question = "What is the capital of France?"
149
- answer = agent(test_question)
150
- print(f"Question: {test_question}")
151
- print(f"Answer: {answer}")
 
134
  web_tool
135
  ]
136
 
137
+ # REMOVED circular import from app.py
138
+ # This file now just defines tools and doesn't attempt to build the agent
 
 
 
 
 
139
 
140
  if __name__ == "__main__":
141
+ print("This module defines tools for the agent. Run app.py or standalone_debug.py to test the agent.")
 
 
 
 
 
app.py CHANGED
@@ -5,6 +5,7 @@ import pandas as pd
5
  from typing import List, Dict, Any
6
  from dotenv import load_dotenv
7
  import json
 
8
 
9
  # LlamaIndex Imports
10
  from llama_index.core.llms import LLM
@@ -20,27 +21,24 @@ load_dotenv()
20
  # (Keep Constants as is)
21
  # --- Constants ---
22
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
23
 
24
  # --- Basic Agent Definition ---
25
  class BasicAgent:
26
  """A LlamaIndex-based agent."""
27
  def __init__(self):
28
  print("BasicAgent initialized.")
29
- try:
30
- # Initialize the core components
31
- self.llm = self._initialize_llm()
32
-
33
- # Import get_tools from agent.py here to avoid circular imports
34
- from agent import get_tools
35
- self.tools = get_tools()
36
-
37
- self.memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
38
- # Build the agent
39
- self.agent = self._build_agent()
40
- print("Agent setup complete.")
41
- except Exception as e:
42
- print(f"Warning: Error during agent initialization: {e}")
43
- # Continue despite error - we'll handle this in the __call__ method
44
 
45
  def _initialize_llm(self) -> LLM:
46
  """Initialize the LLM based on configuration."""
@@ -75,15 +73,13 @@ class BasicAgent:
75
 
76
  def _build_agent(self) -> ReActAgent:
77
  """Build and return the agent."""
78
- # Load system prompt from file and append output format requirements
79
  try:
80
  with open("system_prompt.txt", "r", encoding="utf-8") as f:
81
  system_prompt = f.read()
82
- # Append output format to system prompt
83
- system_prompt = f"{system_prompt}\n\nIMPORTANT OUTPUT FORMAT:\n{OUTPUT_FORMAT}"
84
  except Exception as e:
85
  print(f"Error loading system prompt: {e}")
86
- system_prompt = f"You are an intelligent agent designed to answer a wide variety of questions.\n\nIMPORTANT OUTPUT FORMAT:\n{OUTPUT_FORMAT}"
87
 
88
  return ReActAgent.from_tools(
89
  tools=self.tools,
@@ -95,13 +91,7 @@ class BasicAgent:
95
 
96
  def __call__(self, question: str) -> str:
97
  print(f"Agent received question (first 50 chars): {question[:50]}...")
98
-
99
  try:
100
- # Check if agent was properly initialized
101
- if not hasattr(self, 'agent') or self.agent is None:
102
- # Fallback to a simple response if agent initialization failed
103
- return "I'm unable to process your request due to initialization errors."
104
-
105
  # Process the question
106
  response = self.agent.query(question)
107
  answer_text = str(response)
@@ -111,16 +101,24 @@ class BasicAgent:
111
  reasoning_trace = answer_text.split("FINAL ANSWER:")[0].strip()
112
  model_answer = answer_text.split("FINAL ANSWER:")[1].strip()
113
 
 
 
 
 
 
 
 
114
  print(f"Agent generated answer: {model_answer[:50]}..." if len(model_answer) > 50 else f"Agent generated answer: {model_answer}")
115
- return model_answer # Return just the answer part
116
  else:
117
  # If no FINAL ANSWER pattern, return the whole response
118
  print(f"No 'FINAL ANSWER' found in response. Returning full response.")
119
- return answer_text
120
 
121
  except Exception as e:
122
  print(f"Error generating answer: {e}")
123
- return f"I encountered an error while answering your question: {str(e)}"
 
124
 
125
  def run_and_submit_all(profile: gr.OAuthProfile | None):
126
  """
@@ -186,8 +184,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
186
  print(f"Skipping item with missing task_id or question: {item}")
187
  continue
188
  try:
189
- # Get agent response - now it's a direct string
190
- submitted_answer = agent(question_text)
 
 
 
 
 
 
 
191
 
192
  # Add to answers payload
193
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
@@ -196,14 +201,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
196
  results_log.append({
197
  "Task ID": task_id,
198
  "Question": question_text,
199
- "Submitted Answer": submitted_answer
 
200
  })
201
 
202
- # Add to JSONL output - save both the answer and reasoning
203
  jsonl_output.append({
204
  "task_id": task_id,
205
- "model_answer": submitted_answer,
206
- "reasoning_trace": "" # No separate reasoning trace now
207
  })
208
 
209
  except Exception as e:
@@ -278,37 +284,72 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
278
  results_df = pd.DataFrame(results_log)
279
  return status_message, results_df
280
 
 
 
 
 
 
 
 
 
281
 
282
- # --- Build Gradio Interface using Blocks ---
283
- with gr.Blocks() as demo:
284
- gr.Markdown("# Basic Agent Evaluation Runner")
285
- gr.Markdown(
286
- """
287
- **Instructions:**
288
 
289
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
290
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
291
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
292
-
293
- ---
294
- **Disclaimers:**
295
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
296
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
297
- """
298
- )
299
 
300
- gr.LoginButton()
301
 
302
- run_button = gr.Button("Run Evaluation & Submit All Answers")
303
 
304
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
305
- # Removed max_rows=10 from DataFrame constructor
306
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
307
 
308
- run_button.click(
309
- fn=run_and_submit_all,
310
- outputs=[status_output, results_table]
311
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
 
313
  if __name__ == "__main__":
314
  print("\n" + "-"*30 + " App Starting " + "-"*30)
@@ -332,4 +373,3 @@ if __name__ == "__main__":
332
  print("-"*(60 + len(" App Starting ")) + "\n")
333
 
334
  print("Launching Gradio Interface for Basic Agent Evaluation...")
335
- demo.launch(debug=True, share=False)
 
5
  from typing import List, Dict, Any
6
  from dotenv import load_dotenv
7
  import json
8
+ import traceback
9
 
10
  # LlamaIndex Imports
11
  from llama_index.core.llms import LLM
 
21
  # (Keep Constants as is)
22
  # --- Constants ---
23
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
+ OUTPUT_FORMAT = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
25
 
26
  # --- Basic Agent Definition ---
27
  class BasicAgent:
28
  """A LlamaIndex-based agent."""
29
  def __init__(self):
30
  print("BasicAgent initialized.")
31
+ # Initialize the core components
32
+ self.llm = self._initialize_llm()
33
+
34
+ # Import get_tools from agent.py here to avoid circular imports
35
+ from agent import get_tools
36
+ self.tools = get_tools()
37
+
38
+ self.memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
39
+ # Build the agent
40
+ self.agent = self._build_agent()
41
+ print("Agent setup complete.")
 
 
 
 
42
 
43
  def _initialize_llm(self) -> LLM:
44
  """Initialize the LLM based on configuration."""
 
73
 
74
  def _build_agent(self) -> ReActAgent:
75
  """Build and return the agent."""
76
+ # Load system prompt from file
77
  try:
78
  with open("system_prompt.txt", "r", encoding="utf-8") as f:
79
  system_prompt = f.read()
 
 
80
  except Exception as e:
81
  print(f"Error loading system prompt: {e}")
82
+ system_prompt = "You are an intelligent agent designed to answer a wide variety of questions."
83
 
84
  return ReActAgent.from_tools(
85
  tools=self.tools,
 
91
 
92
  def __call__(self, question: str) -> str:
93
  print(f"Agent received question (first 50 chars): {question[:50]}...")
 
94
  try:
 
 
 
 
 
95
  # Process the question
96
  response = self.agent.query(question)
97
  answer_text = str(response)
 
101
  reasoning_trace = answer_text.split("FINAL ANSWER:")[0].strip()
102
  model_answer = answer_text.split("FINAL ANSWER:")[1].strip()
103
 
104
+ # Include the reasoning trace in the response but formatted for JSON
105
+ result = {
106
+ "model_answer": model_answer,
107
+ "reasoning_trace": reasoning_trace
108
+ }
109
+
110
+ # Return just the answer part for direct evaluation
111
  print(f"Agent generated answer: {model_answer[:50]}..." if len(model_answer) > 50 else f"Agent generated answer: {model_answer}")
112
+ return json.dumps(result)
113
  else:
114
  # If no FINAL ANSWER pattern, return the whole response
115
  print(f"No 'FINAL ANSWER' found in response. Returning full response.")
116
+ return json.dumps({"model_answer": answer_text, "reasoning_trace": ""})
117
 
118
  except Exception as e:
119
  print(f"Error generating answer: {e}")
120
+ error_msg = f"I encountered an error while answering your question: {str(e)}"
121
+ return json.dumps({"model_answer": error_msg, "reasoning_trace": ""})
122
 
123
  def run_and_submit_all(profile: gr.OAuthProfile | None):
124
  """
 
184
  print(f"Skipping item with missing task_id or question: {item}")
185
  continue
186
  try:
187
+ # Get agent response which is now a JSON string
188
+ agent_response_json = agent(question_text)
189
+ agent_response = json.loads(agent_response_json)
190
+
191
+ model_answer = agent_response.get("model_answer", "")
192
+ reasoning_trace = agent_response.get("reasoning_trace", "")
193
+
194
+ # Format for submission payload
195
+ submitted_answer = model_answer
196
 
197
  # Add to answers payload
198
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
 
201
  results_log.append({
202
  "Task ID": task_id,
203
  "Question": question_text,
204
+ "Submitted Answer": submitted_answer,
205
+ "Reasoning": reasoning_trace[:100] + "..." if len(reasoning_trace) > 100 else reasoning_trace
206
  })
207
 
208
+ # Add to JSONL output
209
  jsonl_output.append({
210
  "task_id": task_id,
211
+ "model_answer": model_answer,
212
+ "reasoning_trace": reasoning_trace
213
  })
214
 
215
  except Exception as e:
 
284
  results_df = pd.DataFrame(results_log)
285
  return status_message, results_df
286
 
287
+ # Try to load Gradio components, handling potential OAuth errors
288
+ try:
289
+ # --- Build Gradio Interface using Blocks ---
290
+ with gr.Blocks() as demo:
291
+ gr.Markdown("# Basic Agent Evaluation Runner")
292
+ gr.Markdown(
293
+ """
294
+ **Instructions:**
295
 
296
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
297
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
298
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
 
 
299
 
300
+ ---
301
+ **Disclaimers:**
302
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
303
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
304
+ """
305
+ )
 
 
 
 
306
 
307
+ gr.LoginButton()
308
 
309
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
310
 
311
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
312
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
313
 
314
+ run_button.click(
315
+ fn=run_and_submit_all,
316
+ outputs=[status_output, results_table]
317
+ )
318
+ except ImportError as e:
319
+ print(f"Error initializing Gradio OAuth components: {e}")
320
+ print("This error is expected when running locally without OAuth dependencies.")
321
+ print("You can test the agent using standalone_debug.py or mini_test.py instead.")
322
+ # Create a minimal demo without OAuth if running locally
323
+ try:
324
+ import gradio as gr
325
+ with gr.Blocks() as demo:
326
+ gr.Markdown("# Agent Test Environment (Local Mode)")
327
+ gr.Markdown("OAuth dependencies not found. Running in local test mode.")
328
+
329
+ with gr.Row():
330
+ with gr.Column():
331
+ question_input = gr.Textbox(label="Enter your question", lines=2)
332
+ test_button = gr.Button("Test Agent")
333
+
334
+ with gr.Column():
335
+ answer_output = gr.Textbox(label="Agent Answer", lines=10)
336
+
337
+ def test_agent_locally(question):
338
+ try:
339
+ agent = BasicAgent()
340
+ result = agent(question)
341
+ return result
342
+ except Exception as e:
343
+ return f"Error: {str(e)}\n\n{traceback.format_exc()}"
344
+
345
+ test_button.click(
346
+ fn=test_agent_locally,
347
+ inputs=[question_input],
348
+ outputs=[answer_output]
349
+ )
350
+ except Exception as e:
351
+ print(f"Failed to create even minimal Gradio interface: {e}")
352
+ demo = None
353
 
354
  if __name__ == "__main__":
355
  print("\n" + "-"*30 + " App Starting " + "-"*30)
 
373
  print("-"*(60 + len(" App Starting ")) + "\n")
374
 
375
  print("Launching Gradio Interface for Basic Agent Evaluation...")
 
requirements.txt CHANGED
@@ -1,7 +1,8 @@
1
- gradio>=4.0.0
2
  requests>=2.31.0
3
  pandas>=2.0.0
4
  python-dotenv>=1.0.0
 
5
 
6
  # LlamaIndex packages
7
  llama-index>=0.10.0
@@ -10,3 +11,6 @@ llama-index-llms-huggingface>=0.1.0
10
  llama-index-llms-huggingface-api>=0.1.0
11
  llama-index-readers-web>=0.1.0
12
  llama-index-readers-wikipedia>=0.1.0
 
 
 
 
1
+ gradio[oauth]>=4.0.0
2
  requests>=2.31.0
3
  pandas>=2.0.0
4
  python-dotenv>=1.0.0
5
+ itsdangerous>=2.0.0
6
 
7
  # LlamaIndex packages
8
  llama-index>=0.10.0
 
11
  llama-index-llms-huggingface-api>=0.1.0
12
  llama-index-readers-web>=0.1.0
13
  llama-index-readers-wikipedia>=0.1.0
14
+
15
+ # For Google's newer Gemini API (recommended over the deprecated version)
16
+ google-generativeai>=0.3.0