Yago Bolivar
commited on
Commit
·
133fe7e
1
Parent(s):
1cfb30c
feat: implement configuration files and enhance tool definitions for GAIA Agent
Browse files- app.py +48 -30
- config.yaml +2 -0
- model_config.json +5 -0
- prompts.yaml +19 -0
- src/file_processing_tool.py +11 -0
- src/final_answer_tool.py +11 -0
- src/text_reversal_tool.py +10 -1
app.py
CHANGED
@@ -1,24 +1,19 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
-
import inspect # Keep if you plan to use it for agent introspection later
|
5 |
import pandas as pd
|
|
|
6 |
|
7 |
from smolagents import ToolCallingAgent, CodeAgent, InferenceClientModel, Tool
|
8 |
-
# Ensure that Tool has a .name attribute for ToolCallingAgent compatibility
|
9 |
if not hasattr(Tool, "name"):
|
10 |
Tool.name = property(lambda self: getattr(self, "tool_name", None))
|
11 |
if not hasattr(Tool, "description"):
|
12 |
-
# Proxy description to the internal attribute (e.g., tool_description)
|
13 |
Tool.description = property(lambda self: getattr(self, "tool_description", None))
|
14 |
if not hasattr(Tool, "inputs"):
|
15 |
-
# Provide an empty inputs list to satisfy Jinja templates
|
16 |
Tool.inputs = property(lambda self: [])
|
17 |
if not hasattr(Tool, "outputs"):
|
18 |
-
# Provide an empty outputs list to satisfy Jinja templates
|
19 |
Tool.outputs = property(lambda self: [])
|
20 |
if not hasattr(Tool, "output_type"):
|
21 |
-
# Provide a default output_type for template rendering
|
22 |
Tool.output_type = property(lambda self: None)
|
23 |
|
24 |
from src.file_processing_tool import FileIdentifier
|
@@ -26,24 +21,48 @@ from src.speech_to_text import transcribe_audio
|
|
26 |
from src.download_utils import download_file
|
27 |
from src.spreadsheet_tool import SpreadsheetTool
|
28 |
from src.web_browsing_tool import WebBrowser
|
29 |
-
from src.text_reversal_tool import
|
30 |
from src.markdown_table_parser import parse_markdown_table
|
31 |
from src.image_processing_tool import ImageProcessor
|
32 |
from src.video_processing_tool import VideoProcessingTool
|
33 |
from src.python_tool import CodeExecutionTool
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
-
# --- Constants ---
|
36 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
37 |
-
DOWNLOADED_FILES_DIR = "downloaded_task_files"
|
38 |
os.makedirs(DOWNLOADED_FILES_DIR, exist_ok=True)
|
39 |
|
40 |
# --- Smol Agents Setup ---
|
41 |
-
# TODO: Replace with your desired model ID
|
42 |
-
MODEL_ID = "gpt4all/gpt4all-j-v1.3-groovy" # Placeholder
|
43 |
try:
|
44 |
-
model = InferenceClientModel(
|
|
|
|
|
|
|
|
|
45 |
except Exception as e:
|
46 |
-
print(f"Error initializing InferenceClientModel with {
|
47 |
print("Please ensure the model ID is correct and you have the necessary HF_TOKEN or access.")
|
48 |
# Fallback or alternative model can be set here if needed
|
49 |
# For now, we'll let the app fail later if the model is crucial and not loaded.
|
@@ -52,8 +71,7 @@ except Exception as e:
|
|
52 |
|
53 |
# --- Tool Instantiation ---
|
54 |
# These are instantiated once and passed to agents
|
55 |
-
|
56 |
-
spreadsheet_tool_instance = SpreadsheetTool()
|
57 |
web_browser_tool = WebBrowser()
|
58 |
image_processor_tool = ImageProcessor()
|
59 |
video_processor_tool = VideoProcessingTool()
|
@@ -76,11 +94,7 @@ web_agent = ToolCallingAgent(
|
|
76 |
|
77 |
file_processing_agent = ToolCallingAgent(
|
78 |
tools=[
|
79 |
-
|
80 |
-
name="identify_file_type",
|
81 |
-
description="Identifies the type of a file and suggests an action. Input should be the file path.",
|
82 |
-
func=file_identifier_tool.identify_file
|
83 |
-
),
|
84 |
Tool(
|
85 |
name="parse_spreadsheet",
|
86 |
description="Parses data from a spreadsheet file (e.g., CSV, Excel). Input should be the file path.",
|
@@ -168,11 +182,7 @@ code_interpreter_agent = ToolCallingAgent(
|
|
168 |
|
169 |
text_tool_agent = ToolCallingAgent(
|
170 |
tools=[
|
171 |
-
|
172 |
-
name="reverse_string_text",
|
173 |
-
description="Reverses the order of characters in a given string. Input should be the string to reverse.",
|
174 |
-
func=reverse_text
|
175 |
-
)
|
176 |
],
|
177 |
model=model,
|
178 |
name="TextToolAgent",
|
@@ -267,8 +277,16 @@ Your final response should be the answer to the task.
|
|
267 |
print(f"Error during ManagerAgent execution: {e}")
|
268 |
return f"Error during agent execution: {str(e)}"
|
269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
# Initialize the Manager Agent
|
271 |
-
if model:
|
272 |
manager_agent = GaiaManagerAgent(
|
273 |
model=model,
|
274 |
file_download_dir=DOWNLOADED_FILES_DIR,
|
@@ -280,12 +298,12 @@ if model: # Only initialize if the model loaded successfully
|
|
280 |
code_interpreter_agent,
|
281 |
text_tool_agent,
|
282 |
],
|
283 |
-
#
|
284 |
-
#
|
285 |
-
|
286 |
)
|
287 |
else:
|
288 |
-
manager_agent = None
|
289 |
|
290 |
# --- Main Application Logic ---
|
291 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import requests
|
|
|
4 |
import pandas as pd
|
5 |
+
import json
|
6 |
|
7 |
from smolagents import ToolCallingAgent, CodeAgent, InferenceClientModel, Tool
|
|
|
8 |
if not hasattr(Tool, "name"):
|
9 |
Tool.name = property(lambda self: getattr(self, "tool_name", None))
|
10 |
if not hasattr(Tool, "description"):
|
|
|
11 |
Tool.description = property(lambda self: getattr(self, "tool_description", None))
|
12 |
if not hasattr(Tool, "inputs"):
|
|
|
13 |
Tool.inputs = property(lambda self: [])
|
14 |
if not hasattr(Tool, "outputs"):
|
|
|
15 |
Tool.outputs = property(lambda self: [])
|
16 |
if not hasattr(Tool, "output_type"):
|
|
|
17 |
Tool.output_type = property(lambda self: None)
|
18 |
|
19 |
from src.file_processing_tool import FileIdentifier
|
|
|
21 |
from src.download_utils import download_file
|
22 |
from src.spreadsheet_tool import SpreadsheetTool
|
23 |
from src.web_browsing_tool import WebBrowser
|
24 |
+
from src.text_reversal_tool import reverse_string_text # Changed import
|
25 |
from src.markdown_table_parser import parse_markdown_table
|
26 |
from src.image_processing_tool import ImageProcessor
|
27 |
from src.video_processing_tool import VideoProcessingTool
|
28 |
from src.python_tool import CodeExecutionTool
|
29 |
+
from src.final_answer_tool import final_answer # Import the final_answer tool
|
30 |
+
|
31 |
+
import yaml # for loading prompt templates
|
32 |
+
|
33 |
+
# --- Constants & Config Loading ---
|
34 |
+
# Load model configuration
|
35 |
+
try:
|
36 |
+
with open("model_config.json", 'r') as f:
|
37 |
+
model_config = json.load(f)
|
38 |
+
except FileNotFoundError:
|
39 |
+
print("Warning: model_config.json not found. Using default model settings.")
|
40 |
+
model_config = {
|
41 |
+
"model_id": "gpt4all/gpt4all-j-v1.3-groovy", # Default model
|
42 |
+
"max_tokens": 2048,
|
43 |
+
"temperature": 0.7
|
44 |
+
}
|
45 |
+
except json.JSONDecodeError:
|
46 |
+
print("Warning: Error decoding model_config.json. Using default model settings.")
|
47 |
+
model_config = {
|
48 |
+
"model_id": "gpt4all/gpt4all-j-v1.3-groovy",
|
49 |
+
"max_tokens": 2048,
|
50 |
+
"temperature": 0.7
|
51 |
+
}
|
52 |
|
|
|
53 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
54 |
+
DOWNLOADED_FILES_DIR = "downloaded_task_files" # Directory to store downloaded files
|
55 |
os.makedirs(DOWNLOADED_FILES_DIR, exist_ok=True)
|
56 |
|
57 |
# --- Smol Agents Setup ---
|
|
|
|
|
58 |
try:
|
59 |
+
model = InferenceClientModel(
|
60 |
+
model_id=model_config.get("model_id"),
|
61 |
+
max_tokens=model_config.get("max_tokens"),
|
62 |
+
temperature=model_config.get("temperature")
|
63 |
+
)
|
64 |
except Exception as e:
|
65 |
+
print(f"Error initializing InferenceClientModel with {model_config.get('model_id')}: {e}")
|
66 |
print("Please ensure the model ID is correct and you have the necessary HF_TOKEN or access.")
|
67 |
# Fallback or alternative model can be set here if needed
|
68 |
# For now, we'll let the app fail later if the model is crucial and not loaded.
|
|
|
71 |
|
72 |
# --- Tool Instantiation ---
|
73 |
# These are instantiated once and passed to agents
|
74 |
+
spreadsheet_tool_instance = SpreadsheetTool() # Keep instance if methods need shared state
|
|
|
75 |
web_browser_tool = WebBrowser()
|
76 |
image_processor_tool = ImageProcessor()
|
77 |
video_processor_tool = VideoProcessingTool()
|
|
|
94 |
|
95 |
file_processing_agent = ToolCallingAgent(
|
96 |
tools=[
|
97 |
+
FileIdentifier().identify_file, # Use the decorated method
|
|
|
|
|
|
|
|
|
98 |
Tool(
|
99 |
name="parse_spreadsheet",
|
100 |
description="Parses data from a spreadsheet file (e.g., CSV, Excel). Input should be the file path.",
|
|
|
182 |
|
183 |
text_tool_agent = ToolCallingAgent(
|
184 |
tools=[
|
185 |
+
reverse_string_text # Use the decorated tool object directly
|
|
|
|
|
|
|
|
|
186 |
],
|
187 |
model=model,
|
188 |
name="TextToolAgent",
|
|
|
277 |
print(f"Error during ManagerAgent execution: {e}")
|
278 |
return f"Error during agent execution: {str(e)}"
|
279 |
|
280 |
+
## Load prompt templates for manager agent
|
281 |
+
prompt_templates = {}
|
282 |
+
try:
|
283 |
+
with open("prompts.yaml", 'r') as f:
|
284 |
+
prompt_templates = yaml.safe_load(f)
|
285 |
+
except Exception:
|
286 |
+
print("Warning: Could not load prompts.yaml, proceeding without prompt templates.")
|
287 |
+
|
288 |
# Initialize the Manager Agent
|
289 |
+
if model: # Only initialize if the model loaded successfully
|
290 |
manager_agent = GaiaManagerAgent(
|
291 |
model=model,
|
292 |
file_download_dir=DOWNLOADED_FILES_DIR,
|
|
|
298 |
code_interpreter_agent,
|
299 |
text_tool_agent,
|
300 |
],
|
301 |
+
tools=[final_answer], # Add final_answer to the manager agent's tools
|
302 |
+
# additional_authorized_imports=["pandas", "numpy", "re", "requests", "bs4", "cv2", "PIL", "chess"], # Add as needed
|
303 |
+
prompt_templates=prompt_templates
|
304 |
)
|
305 |
else:
|
306 |
+
manager_agent = None
|
307 |
|
308 |
# --- Main Application Logic ---
|
309 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
config.yaml
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
# Configuration for the GAIA Agent app
|
2 |
+
model_id: "gpt4all/gpt4all-j-v1.3-groovy"
|
model_config.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_id": "gpt4all/gpt4all-j-v1.3-groovy",
|
3 |
+
"max_tokens": 2048,
|
4 |
+
"temperature": 0.7
|
5 |
+
}
|
prompts.yaml
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Prompt templates for GaiaManagerAgent
|
2 |
+
manager_prompt: |
|
3 |
+
Task: {{task_description}}
|
4 |
+
{% if file_url %}
|
5 |
+
An associated file is available at: {{file_url}}
|
6 |
+
File type determined as: {{file_type}}, suggested action: {{suggested_action}}
|
7 |
+
{% endif %}
|
8 |
+
You are a manager agent for the GAIA benchmark. Your goal is to answer the given task.
|
9 |
+
You have the following specialized agents available to you as tools:
|
10 |
+
- WebSearchAgent: Call this agent for web browsing and fetching URL content.
|
11 |
+
- FileProcessorAgent: Call this agent for identifying file types, parsing spreadsheets, transcribing audio, and parsing markdown tables.
|
12 |
+
- VisionAgent: Call this agent for image processing, OCR, and chess image analysis.
|
13 |
+
- VideoAgent: Call this agent for video processing tasks.
|
14 |
+
- CodeInterpreterAgent: Call this agent to execute Python code.
|
15 |
+
- TextToolAgent: Call this agent for simple text manipulations like reversing text.
|
16 |
+
|
17 |
+
Based on the task and any provided file, devise a plan and call the appropriate agent(s) to gather information and formulate an answer.
|
18 |
+
Generate the Python code to call these agents and produce the final answer.
|
19 |
+
Your final response should be the answer to the task.
|
src/file_processing_tool.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import os
|
2 |
import mimetypes
|
3 |
|
@@ -28,6 +29,16 @@ class FileIdentifier:
|
|
28 |
for ext in details["extensions"]:
|
29 |
self.extension_to_type[ext] = simple_type
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
def identify_file(self, filepath):
|
32 |
"""
|
33 |
Identifies the file type and suggests a processing action.
|
|
|
1 |
+
from smolagents import tool
|
2 |
import os
|
3 |
import mimetypes
|
4 |
|
|
|
29 |
for ext in details["extensions"]:
|
30 |
self.extension_to_type[ext] = simple_type
|
31 |
|
32 |
+
@tool(
|
33 |
+
name="identify_file_type",
|
34 |
+
description="Identifies the type of a file and suggests an action.",
|
35 |
+
inputs={"filepath": {"type": "str", "description": "Path to the file"}},
|
36 |
+
outputs={
|
37 |
+
"determined_type": {"type": "str", "description": "Determined file type"},
|
38 |
+
"suggested_action": {"type": "str", "description": "Suggested processing action"}
|
39 |
+
},
|
40 |
+
output_type="dict"
|
41 |
+
)
|
42 |
def identify_file(self, filepath):
|
43 |
"""
|
44 |
Identifies the file type and suggests a processing action.
|
src/final_answer_tool.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Optional
|
2 |
+
from smolagents import tool
|
3 |
+
|
4 |
+
@tool(
|
5 |
+
name="final_answer",
|
6 |
+
description="Provides a final answer to the given problem.",
|
7 |
+
inputs={'answer': {'type': 'any', 'description': 'The final answer to the problem'}},
|
8 |
+
output_type="any"
|
9 |
+
)
|
10 |
+
def final_answer(answer: Any) -> Any:
|
11 |
+
return answer
|
src/text_reversal_tool.py
CHANGED
@@ -1,4 +1,13 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
"""
|
3 |
Reverses or processes reversed text, useful for decoding or analyzing reversed strings.
|
4 |
"""
|
|
|
1 |
+
from smolagents import tool
|
2 |
+
|
3 |
+
@tool(
|
4 |
+
name="reverse_string_text",
|
5 |
+
description="Reverses the order of characters in a given string.",
|
6 |
+
inputs={"text": {"type": "str", "description": "String to reverse"}},
|
7 |
+
outputs={"reversed_text": {"type": "str", "description": "Reversed string"}},
|
8 |
+
output_type="str"
|
9 |
+
)
|
10 |
+
def reverse_string_text(text: str) -> str:
|
11 |
"""
|
12 |
Reverses or processes reversed text, useful for decoding or analyzing reversed strings.
|
13 |
"""
|