Yago Bolivar commited on
Commit
133fe7e
·
1 Parent(s): 1cfb30c

feat: implement configuration files and enhance tool definitions for GAIA Agent

Browse files
app.py CHANGED
@@ -1,24 +1,19 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect # Keep if you plan to use it for agent introspection later
5
  import pandas as pd
 
6
 
7
  from smolagents import ToolCallingAgent, CodeAgent, InferenceClientModel, Tool
8
- # Ensure that Tool has a .name attribute for ToolCallingAgent compatibility
9
  if not hasattr(Tool, "name"):
10
  Tool.name = property(lambda self: getattr(self, "tool_name", None))
11
  if not hasattr(Tool, "description"):
12
- # Proxy description to the internal attribute (e.g., tool_description)
13
  Tool.description = property(lambda self: getattr(self, "tool_description", None))
14
  if not hasattr(Tool, "inputs"):
15
- # Provide an empty inputs list to satisfy Jinja templates
16
  Tool.inputs = property(lambda self: [])
17
  if not hasattr(Tool, "outputs"):
18
- # Provide an empty outputs list to satisfy Jinja templates
19
  Tool.outputs = property(lambda self: [])
20
  if not hasattr(Tool, "output_type"):
21
- # Provide a default output_type for template rendering
22
  Tool.output_type = property(lambda self: None)
23
 
24
  from src.file_processing_tool import FileIdentifier
@@ -26,24 +21,48 @@ from src.speech_to_text import transcribe_audio
26
  from src.download_utils import download_file
27
  from src.spreadsheet_tool import SpreadsheetTool
28
  from src.web_browsing_tool import WebBrowser
29
- from src.text_reversal_tool import reverse_text
30
  from src.markdown_table_parser import parse_markdown_table
31
  from src.image_processing_tool import ImageProcessor
32
  from src.video_processing_tool import VideoProcessingTool
33
  from src.python_tool import CodeExecutionTool
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- # --- Constants ---
36
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
37
- DOWNLOADED_FILES_DIR = "downloaded_task_files" # Directory to store downloaded files
38
  os.makedirs(DOWNLOADED_FILES_DIR, exist_ok=True)
39
 
40
  # --- Smol Agents Setup ---
41
- # TODO: Replace with your desired model ID
42
- MODEL_ID = "gpt4all/gpt4all-j-v1.3-groovy" # Placeholder
43
  try:
44
- model = InferenceClientModel(model_id=MODEL_ID)
 
 
 
 
45
  except Exception as e:
46
- print(f"Error initializing InferenceClientModel with {MODEL_ID}: {e}")
47
  print("Please ensure the model ID is correct and you have the necessary HF_TOKEN or access.")
48
  # Fallback or alternative model can be set here if needed
49
  # For now, we'll let the app fail later if the model is crucial and not loaded.
@@ -52,8 +71,7 @@ except Exception as e:
52
 
53
  # --- Tool Instantiation ---
54
  # These are instantiated once and passed to agents
55
- file_identifier_tool = FileIdentifier()
56
- spreadsheet_tool_instance = SpreadsheetTool()
57
  web_browser_tool = WebBrowser()
58
  image_processor_tool = ImageProcessor()
59
  video_processor_tool = VideoProcessingTool()
@@ -76,11 +94,7 @@ web_agent = ToolCallingAgent(
76
 
77
  file_processing_agent = ToolCallingAgent(
78
  tools=[
79
- Tool(
80
- name="identify_file_type",
81
- description="Identifies the type of a file and suggests an action. Input should be the file path.",
82
- func=file_identifier_tool.identify_file
83
- ),
84
  Tool(
85
  name="parse_spreadsheet",
86
  description="Parses data from a spreadsheet file (e.g., CSV, Excel). Input should be the file path.",
@@ -168,11 +182,7 @@ code_interpreter_agent = ToolCallingAgent(
168
 
169
  text_tool_agent = ToolCallingAgent(
170
  tools=[
171
- Tool(
172
- name="reverse_string_text",
173
- description="Reverses the order of characters in a given string. Input should be the string to reverse.",
174
- func=reverse_text
175
- )
176
  ],
177
  model=model,
178
  name="TextToolAgent",
@@ -267,8 +277,16 @@ Your final response should be the answer to the task.
267
  print(f"Error during ManagerAgent execution: {e}")
268
  return f"Error during agent execution: {str(e)}"
269
 
 
 
 
 
 
 
 
 
270
  # Initialize the Manager Agent
271
- if model: # Only initialize if the model loaded successfully
272
  manager_agent = GaiaManagerAgent(
273
  model=model,
274
  file_download_dir=DOWNLOADED_FILES_DIR,
@@ -280,12 +298,12 @@ if model: # Only initialize if the model loaded successfully
280
  code_interpreter_agent,
281
  text_tool_agent,
282
  ],
283
- # Ensure the manager agent can import modules used by the tools if necessary,
284
- # or that the tools are self-contained.
285
- # additional_authorized_imports=["pandas", "numpy", "re", "requests", "bs4", "cv2", "PIL", "chess"] # Add as needed
286
  )
287
  else:
288
- manager_agent = None
289
 
290
  # --- Main Application Logic ---
291
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
+ import json
6
 
7
  from smolagents import ToolCallingAgent, CodeAgent, InferenceClientModel, Tool
 
8
  if not hasattr(Tool, "name"):
9
  Tool.name = property(lambda self: getattr(self, "tool_name", None))
10
  if not hasattr(Tool, "description"):
 
11
  Tool.description = property(lambda self: getattr(self, "tool_description", None))
12
  if not hasattr(Tool, "inputs"):
 
13
  Tool.inputs = property(lambda self: [])
14
  if not hasattr(Tool, "outputs"):
 
15
  Tool.outputs = property(lambda self: [])
16
  if not hasattr(Tool, "output_type"):
 
17
  Tool.output_type = property(lambda self: None)
18
 
19
  from src.file_processing_tool import FileIdentifier
 
21
  from src.download_utils import download_file
22
  from src.spreadsheet_tool import SpreadsheetTool
23
  from src.web_browsing_tool import WebBrowser
24
+ from src.text_reversal_tool import reverse_string_text # Changed import
25
  from src.markdown_table_parser import parse_markdown_table
26
  from src.image_processing_tool import ImageProcessor
27
  from src.video_processing_tool import VideoProcessingTool
28
  from src.python_tool import CodeExecutionTool
29
+ from src.final_answer_tool import final_answer # Import the final_answer tool
30
+
31
+ import yaml # for loading prompt templates
32
+
33
+ # --- Constants & Config Loading ---
34
+ # Load model configuration
35
+ try:
36
+ with open("model_config.json", 'r') as f:
37
+ model_config = json.load(f)
38
+ except FileNotFoundError:
39
+ print("Warning: model_config.json not found. Using default model settings.")
40
+ model_config = {
41
+ "model_id": "gpt4all/gpt4all-j-v1.3-groovy", # Default model
42
+ "max_tokens": 2048,
43
+ "temperature": 0.7
44
+ }
45
+ except json.JSONDecodeError:
46
+ print("Warning: Error decoding model_config.json. Using default model settings.")
47
+ model_config = {
48
+ "model_id": "gpt4all/gpt4all-j-v1.3-groovy",
49
+ "max_tokens": 2048,
50
+ "temperature": 0.7
51
+ }
52
 
 
53
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
54
+ DOWNLOADED_FILES_DIR = "downloaded_task_files" # Directory to store downloaded files
55
  os.makedirs(DOWNLOADED_FILES_DIR, exist_ok=True)
56
 
57
  # --- Smol Agents Setup ---
 
 
58
  try:
59
+ model = InferenceClientModel(
60
+ model_id=model_config.get("model_id"),
61
+ max_tokens=model_config.get("max_tokens"),
62
+ temperature=model_config.get("temperature")
63
+ )
64
  except Exception as e:
65
+ print(f"Error initializing InferenceClientModel with {model_config.get('model_id')}: {e}")
66
  print("Please ensure the model ID is correct and you have the necessary HF_TOKEN or access.")
67
  # Fallback or alternative model can be set here if needed
68
  # For now, we'll let the app fail later if the model is crucial and not loaded.
 
71
 
72
  # --- Tool Instantiation ---
73
  # These are instantiated once and passed to agents
74
+ spreadsheet_tool_instance = SpreadsheetTool() # Keep instance if methods need shared state
 
75
  web_browser_tool = WebBrowser()
76
  image_processor_tool = ImageProcessor()
77
  video_processor_tool = VideoProcessingTool()
 
94
 
95
  file_processing_agent = ToolCallingAgent(
96
  tools=[
97
+ FileIdentifier().identify_file, # Use the decorated method
 
 
 
 
98
  Tool(
99
  name="parse_spreadsheet",
100
  description="Parses data from a spreadsheet file (e.g., CSV, Excel). Input should be the file path.",
 
182
 
183
  text_tool_agent = ToolCallingAgent(
184
  tools=[
185
+ reverse_string_text # Use the decorated tool object directly
 
 
 
 
186
  ],
187
  model=model,
188
  name="TextToolAgent",
 
277
  print(f"Error during ManagerAgent execution: {e}")
278
  return f"Error during agent execution: {str(e)}"
279
 
280
+ ## Load prompt templates for manager agent
281
+ prompt_templates = {}
282
+ try:
283
+ with open("prompts.yaml", 'r') as f:
284
+ prompt_templates = yaml.safe_load(f)
285
+ except Exception:
286
+ print("Warning: Could not load prompts.yaml, proceeding without prompt templates.")
287
+
288
  # Initialize the Manager Agent
289
+ if model: # Only initialize if the model loaded successfully
290
  manager_agent = GaiaManagerAgent(
291
  model=model,
292
  file_download_dir=DOWNLOADED_FILES_DIR,
 
298
  code_interpreter_agent,
299
  text_tool_agent,
300
  ],
301
+ tools=[final_answer], # Add final_answer to the manager agent's tools
302
+ # additional_authorized_imports=["pandas", "numpy", "re", "requests", "bs4", "cv2", "PIL", "chess"], # Add as needed
303
+ prompt_templates=prompt_templates
304
  )
305
  else:
306
+ manager_agent = None
307
 
308
  # --- Main Application Logic ---
309
  def run_and_submit_all(profile: gr.OAuthProfile | None):
config.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Configuration for the GAIA Agent app
2
+ model_id: "gpt4all/gpt4all-j-v1.3-groovy"
model_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "model_id": "gpt4all/gpt4all-j-v1.3-groovy",
3
+ "max_tokens": 2048,
4
+ "temperature": 0.7
5
+ }
prompts.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Prompt templates for GaiaManagerAgent
2
+ manager_prompt: |
3
+ Task: {{task_description}}
4
+ {% if file_url %}
5
+ An associated file is available at: {{file_url}}
6
+ File type determined as: {{file_type}}, suggested action: {{suggested_action}}
7
+ {% endif %}
8
+ You are a manager agent for the GAIA benchmark. Your goal is to answer the given task.
9
+ You have the following specialized agents available to you as tools:
10
+ - WebSearchAgent: Call this agent for web browsing and fetching URL content.
11
+ - FileProcessorAgent: Call this agent for identifying file types, parsing spreadsheets, transcribing audio, and parsing markdown tables.
12
+ - VisionAgent: Call this agent for image processing, OCR, and chess image analysis.
13
+ - VideoAgent: Call this agent for video processing tasks.
14
+ - CodeInterpreterAgent: Call this agent to execute Python code.
15
+ - TextToolAgent: Call this agent for simple text manipulations like reversing text.
16
+
17
+ Based on the task and any provided file, devise a plan and call the appropriate agent(s) to gather information and formulate an answer.
18
+ Generate the Python code to call these agents and produce the final answer.
19
+ Your final response should be the answer to the task.
src/file_processing_tool.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import mimetypes
3
 
@@ -28,6 +29,16 @@ class FileIdentifier:
28
  for ext in details["extensions"]:
29
  self.extension_to_type[ext] = simple_type
30
 
 
 
 
 
 
 
 
 
 
 
31
  def identify_file(self, filepath):
32
  """
33
  Identifies the file type and suggests a processing action.
 
1
+ from smolagents import tool
2
  import os
3
  import mimetypes
4
 
 
29
  for ext in details["extensions"]:
30
  self.extension_to_type[ext] = simple_type
31
 
32
+ @tool(
33
+ name="identify_file_type",
34
+ description="Identifies the type of a file and suggests an action.",
35
+ inputs={"filepath": {"type": "str", "description": "Path to the file"}},
36
+ outputs={
37
+ "determined_type": {"type": "str", "description": "Determined file type"},
38
+ "suggested_action": {"type": "str", "description": "Suggested processing action"}
39
+ },
40
+ output_type="dict"
41
+ )
42
  def identify_file(self, filepath):
43
  """
44
  Identifies the file type and suggests a processing action.
src/final_answer_tool.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Optional
2
+ from smolagents import tool
3
+
4
+ @tool(
5
+ name="final_answer",
6
+ description="Provides a final answer to the given problem.",
7
+ inputs={'answer': {'type': 'any', 'description': 'The final answer to the problem'}},
8
+ output_type="any"
9
+ )
10
+ def final_answer(answer: Any) -> Any:
11
+ return answer
src/text_reversal_tool.py CHANGED
@@ -1,4 +1,13 @@
1
- def reverse_text(text: str) -> str:
 
 
 
 
 
 
 
 
 
2
  """
3
  Reverses or processes reversed text, useful for decoding or analyzing reversed strings.
4
  """
 
1
+ from smolagents import tool
2
+
3
+ @tool(
4
+ name="reverse_string_text",
5
+ description="Reverses the order of characters in a given string.",
6
+ inputs={"text": {"type": "str", "description": "String to reverse"}},
7
+ outputs={"reversed_text": {"type": "str", "description": "Reversed string"}},
8
+ output_type="str"
9
+ )
10
+ def reverse_string_text(text: str) -> str:
11
  """
12
  Reverses or processes reversed text, useful for decoding or analyzing reversed strings.
13
  """