Spaces:
Runtime error
Runtime error
Yago Bolivar
commited on
Commit
·
133fe7e
1
Parent(s):
1cfb30c
feat: implement configuration files and enhance tool definitions for GAIA Agent
Browse files- app.py +48 -30
- config.yaml +2 -0
- model_config.json +5 -0
- prompts.yaml +19 -0
- src/file_processing_tool.py +11 -0
- src/final_answer_tool.py +11 -0
- src/text_reversal_tool.py +10 -1
app.py
CHANGED
|
@@ -1,24 +1,19 @@
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
import requests
|
| 4 |
-
import inspect # Keep if you plan to use it for agent introspection later
|
| 5 |
import pandas as pd
|
|
|
|
| 6 |
|
| 7 |
from smolagents import ToolCallingAgent, CodeAgent, InferenceClientModel, Tool
|
| 8 |
-
# Ensure that Tool has a .name attribute for ToolCallingAgent compatibility
|
| 9 |
if not hasattr(Tool, "name"):
|
| 10 |
Tool.name = property(lambda self: getattr(self, "tool_name", None))
|
| 11 |
if not hasattr(Tool, "description"):
|
| 12 |
-
# Proxy description to the internal attribute (e.g., tool_description)
|
| 13 |
Tool.description = property(lambda self: getattr(self, "tool_description", None))
|
| 14 |
if not hasattr(Tool, "inputs"):
|
| 15 |
-
# Provide an empty inputs list to satisfy Jinja templates
|
| 16 |
Tool.inputs = property(lambda self: [])
|
| 17 |
if not hasattr(Tool, "outputs"):
|
| 18 |
-
# Provide an empty outputs list to satisfy Jinja templates
|
| 19 |
Tool.outputs = property(lambda self: [])
|
| 20 |
if not hasattr(Tool, "output_type"):
|
| 21 |
-
# Provide a default output_type for template rendering
|
| 22 |
Tool.output_type = property(lambda self: None)
|
| 23 |
|
| 24 |
from src.file_processing_tool import FileIdentifier
|
|
@@ -26,24 +21,48 @@ from src.speech_to_text import transcribe_audio
|
|
| 26 |
from src.download_utils import download_file
|
| 27 |
from src.spreadsheet_tool import SpreadsheetTool
|
| 28 |
from src.web_browsing_tool import WebBrowser
|
| 29 |
-
from src.text_reversal_tool import
|
| 30 |
from src.markdown_table_parser import parse_markdown_table
|
| 31 |
from src.image_processing_tool import ImageProcessor
|
| 32 |
from src.video_processing_tool import VideoProcessingTool
|
| 33 |
from src.python_tool import CodeExecutionTool
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
-
# --- Constants ---
|
| 36 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 37 |
-
DOWNLOADED_FILES_DIR = "downloaded_task_files"
|
| 38 |
os.makedirs(DOWNLOADED_FILES_DIR, exist_ok=True)
|
| 39 |
|
| 40 |
# --- Smol Agents Setup ---
|
| 41 |
-
# TODO: Replace with your desired model ID
|
| 42 |
-
MODEL_ID = "gpt4all/gpt4all-j-v1.3-groovy" # Placeholder
|
| 43 |
try:
|
| 44 |
-
model = InferenceClientModel(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
except Exception as e:
|
| 46 |
-
print(f"Error initializing InferenceClientModel with {
|
| 47 |
print("Please ensure the model ID is correct and you have the necessary HF_TOKEN or access.")
|
| 48 |
# Fallback or alternative model can be set here if needed
|
| 49 |
# For now, we'll let the app fail later if the model is crucial and not loaded.
|
|
@@ -52,8 +71,7 @@ except Exception as e:
|
|
| 52 |
|
| 53 |
# --- Tool Instantiation ---
|
| 54 |
# These are instantiated once and passed to agents
|
| 55 |
-
|
| 56 |
-
spreadsheet_tool_instance = SpreadsheetTool()
|
| 57 |
web_browser_tool = WebBrowser()
|
| 58 |
image_processor_tool = ImageProcessor()
|
| 59 |
video_processor_tool = VideoProcessingTool()
|
|
@@ -76,11 +94,7 @@ web_agent = ToolCallingAgent(
|
|
| 76 |
|
| 77 |
file_processing_agent = ToolCallingAgent(
|
| 78 |
tools=[
|
| 79 |
-
|
| 80 |
-
name="identify_file_type",
|
| 81 |
-
description="Identifies the type of a file and suggests an action. Input should be the file path.",
|
| 82 |
-
func=file_identifier_tool.identify_file
|
| 83 |
-
),
|
| 84 |
Tool(
|
| 85 |
name="parse_spreadsheet",
|
| 86 |
description="Parses data from a spreadsheet file (e.g., CSV, Excel). Input should be the file path.",
|
|
@@ -168,11 +182,7 @@ code_interpreter_agent = ToolCallingAgent(
|
|
| 168 |
|
| 169 |
text_tool_agent = ToolCallingAgent(
|
| 170 |
tools=[
|
| 171 |
-
|
| 172 |
-
name="reverse_string_text",
|
| 173 |
-
description="Reverses the order of characters in a given string. Input should be the string to reverse.",
|
| 174 |
-
func=reverse_text
|
| 175 |
-
)
|
| 176 |
],
|
| 177 |
model=model,
|
| 178 |
name="TextToolAgent",
|
|
@@ -267,8 +277,16 @@ Your final response should be the answer to the task.
|
|
| 267 |
print(f"Error during ManagerAgent execution: {e}")
|
| 268 |
return f"Error during agent execution: {str(e)}"
|
| 269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
# Initialize the Manager Agent
|
| 271 |
-
if model:
|
| 272 |
manager_agent = GaiaManagerAgent(
|
| 273 |
model=model,
|
| 274 |
file_download_dir=DOWNLOADED_FILES_DIR,
|
|
@@ -280,12 +298,12 @@ if model: # Only initialize if the model loaded successfully
|
|
| 280 |
code_interpreter_agent,
|
| 281 |
text_tool_agent,
|
| 282 |
],
|
| 283 |
-
#
|
| 284 |
-
#
|
| 285 |
-
|
| 286 |
)
|
| 287 |
else:
|
| 288 |
-
manager_agent = None
|
| 289 |
|
| 290 |
# --- Main Application Logic ---
|
| 291 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
import requests
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
+
import json
|
| 6 |
|
| 7 |
from smolagents import ToolCallingAgent, CodeAgent, InferenceClientModel, Tool
|
|
|
|
| 8 |
if not hasattr(Tool, "name"):
|
| 9 |
Tool.name = property(lambda self: getattr(self, "tool_name", None))
|
| 10 |
if not hasattr(Tool, "description"):
|
|
|
|
| 11 |
Tool.description = property(lambda self: getattr(self, "tool_description", None))
|
| 12 |
if not hasattr(Tool, "inputs"):
|
|
|
|
| 13 |
Tool.inputs = property(lambda self: [])
|
| 14 |
if not hasattr(Tool, "outputs"):
|
|
|
|
| 15 |
Tool.outputs = property(lambda self: [])
|
| 16 |
if not hasattr(Tool, "output_type"):
|
|
|
|
| 17 |
Tool.output_type = property(lambda self: None)
|
| 18 |
|
| 19 |
from src.file_processing_tool import FileIdentifier
|
|
|
|
| 21 |
from src.download_utils import download_file
|
| 22 |
from src.spreadsheet_tool import SpreadsheetTool
|
| 23 |
from src.web_browsing_tool import WebBrowser
|
| 24 |
+
from src.text_reversal_tool import reverse_string_text # Changed import
|
| 25 |
from src.markdown_table_parser import parse_markdown_table
|
| 26 |
from src.image_processing_tool import ImageProcessor
|
| 27 |
from src.video_processing_tool import VideoProcessingTool
|
| 28 |
from src.python_tool import CodeExecutionTool
|
| 29 |
+
from src.final_answer_tool import final_answer # Import the final_answer tool
|
| 30 |
+
|
| 31 |
+
import yaml # for loading prompt templates
|
| 32 |
+
|
| 33 |
+
# --- Constants & Config Loading ---
|
| 34 |
+
# Load model configuration
|
| 35 |
+
try:
|
| 36 |
+
with open("model_config.json", 'r') as f:
|
| 37 |
+
model_config = json.load(f)
|
| 38 |
+
except FileNotFoundError:
|
| 39 |
+
print("Warning: model_config.json not found. Using default model settings.")
|
| 40 |
+
model_config = {
|
| 41 |
+
"model_id": "gpt4all/gpt4all-j-v1.3-groovy", # Default model
|
| 42 |
+
"max_tokens": 2048,
|
| 43 |
+
"temperature": 0.7
|
| 44 |
+
}
|
| 45 |
+
except json.JSONDecodeError:
|
| 46 |
+
print("Warning: Error decoding model_config.json. Using default model settings.")
|
| 47 |
+
model_config = {
|
| 48 |
+
"model_id": "gpt4all/gpt4all-j-v1.3-groovy",
|
| 49 |
+
"max_tokens": 2048,
|
| 50 |
+
"temperature": 0.7
|
| 51 |
+
}
|
| 52 |
|
|
|
|
| 53 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 54 |
+
DOWNLOADED_FILES_DIR = "downloaded_task_files" # Directory to store downloaded files
|
| 55 |
os.makedirs(DOWNLOADED_FILES_DIR, exist_ok=True)
|
| 56 |
|
| 57 |
# --- Smol Agents Setup ---
|
|
|
|
|
|
|
| 58 |
try:
|
| 59 |
+
model = InferenceClientModel(
|
| 60 |
+
model_id=model_config.get("model_id"),
|
| 61 |
+
max_tokens=model_config.get("max_tokens"),
|
| 62 |
+
temperature=model_config.get("temperature")
|
| 63 |
+
)
|
| 64 |
except Exception as e:
|
| 65 |
+
print(f"Error initializing InferenceClientModel with {model_config.get('model_id')}: {e}")
|
| 66 |
print("Please ensure the model ID is correct and you have the necessary HF_TOKEN or access.")
|
| 67 |
# Fallback or alternative model can be set here if needed
|
| 68 |
# For now, we'll let the app fail later if the model is crucial and not loaded.
|
|
|
|
| 71 |
|
| 72 |
# --- Tool Instantiation ---
|
| 73 |
# These are instantiated once and passed to agents
|
| 74 |
+
spreadsheet_tool_instance = SpreadsheetTool() # Keep instance if methods need shared state
|
|
|
|
| 75 |
web_browser_tool = WebBrowser()
|
| 76 |
image_processor_tool = ImageProcessor()
|
| 77 |
video_processor_tool = VideoProcessingTool()
|
|
|
|
| 94 |
|
| 95 |
file_processing_agent = ToolCallingAgent(
|
| 96 |
tools=[
|
| 97 |
+
FileIdentifier().identify_file, # Use the decorated method
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
Tool(
|
| 99 |
name="parse_spreadsheet",
|
| 100 |
description="Parses data from a spreadsheet file (e.g., CSV, Excel). Input should be the file path.",
|
|
|
|
| 182 |
|
| 183 |
text_tool_agent = ToolCallingAgent(
|
| 184 |
tools=[
|
| 185 |
+
reverse_string_text # Use the decorated tool object directly
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
],
|
| 187 |
model=model,
|
| 188 |
name="TextToolAgent",
|
|
|
|
| 277 |
print(f"Error during ManagerAgent execution: {e}")
|
| 278 |
return f"Error during agent execution: {str(e)}"
|
| 279 |
|
| 280 |
+
## Load prompt templates for manager agent
|
| 281 |
+
prompt_templates = {}
|
| 282 |
+
try:
|
| 283 |
+
with open("prompts.yaml", 'r') as f:
|
| 284 |
+
prompt_templates = yaml.safe_load(f)
|
| 285 |
+
except Exception:
|
| 286 |
+
print("Warning: Could not load prompts.yaml, proceeding without prompt templates.")
|
| 287 |
+
|
| 288 |
# Initialize the Manager Agent
|
| 289 |
+
if model: # Only initialize if the model loaded successfully
|
| 290 |
manager_agent = GaiaManagerAgent(
|
| 291 |
model=model,
|
| 292 |
file_download_dir=DOWNLOADED_FILES_DIR,
|
|
|
|
| 298 |
code_interpreter_agent,
|
| 299 |
text_tool_agent,
|
| 300 |
],
|
| 301 |
+
tools=[final_answer], # Add final_answer to the manager agent's tools
|
| 302 |
+
# additional_authorized_imports=["pandas", "numpy", "re", "requests", "bs4", "cv2", "PIL", "chess"], # Add as needed
|
| 303 |
+
prompt_templates=prompt_templates
|
| 304 |
)
|
| 305 |
else:
|
| 306 |
+
manager_agent = None
|
| 307 |
|
| 308 |
# --- Main Application Logic ---
|
| 309 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
config.yaml
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Configuration for the GAIA Agent app
|
| 2 |
+
model_id: "gpt4all/gpt4all-j-v1.3-groovy"
|
model_config.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_id": "gpt4all/gpt4all-j-v1.3-groovy",
|
| 3 |
+
"max_tokens": 2048,
|
| 4 |
+
"temperature": 0.7
|
| 5 |
+
}
|
prompts.yaml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Prompt templates for GaiaManagerAgent
|
| 2 |
+
manager_prompt: |
|
| 3 |
+
Task: {{task_description}}
|
| 4 |
+
{% if file_url %}
|
| 5 |
+
An associated file is available at: {{file_url}}
|
| 6 |
+
File type determined as: {{file_type}}, suggested action: {{suggested_action}}
|
| 7 |
+
{% endif %}
|
| 8 |
+
You are a manager agent for the GAIA benchmark. Your goal is to answer the given task.
|
| 9 |
+
You have the following specialized agents available to you as tools:
|
| 10 |
+
- WebSearchAgent: Call this agent for web browsing and fetching URL content.
|
| 11 |
+
- FileProcessorAgent: Call this agent for identifying file types, parsing spreadsheets, transcribing audio, and parsing markdown tables.
|
| 12 |
+
- VisionAgent: Call this agent for image processing, OCR, and chess image analysis.
|
| 13 |
+
- VideoAgent: Call this agent for video processing tasks.
|
| 14 |
+
- CodeInterpreterAgent: Call this agent to execute Python code.
|
| 15 |
+
- TextToolAgent: Call this agent for simple text manipulations like reversing text.
|
| 16 |
+
|
| 17 |
+
Based on the task and any provided file, devise a plan and call the appropriate agent(s) to gather information and formulate an answer.
|
| 18 |
+
Generate the Python code to call these agents and produce the final answer.
|
| 19 |
+
Your final response should be the answer to the task.
|
src/file_processing_tool.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import os
|
| 2 |
import mimetypes
|
| 3 |
|
|
@@ -28,6 +29,16 @@ class FileIdentifier:
|
|
| 28 |
for ext in details["extensions"]:
|
| 29 |
self.extension_to_type[ext] = simple_type
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
def identify_file(self, filepath):
|
| 32 |
"""
|
| 33 |
Identifies the file type and suggests a processing action.
|
|
|
|
| 1 |
+
from smolagents import tool
|
| 2 |
import os
|
| 3 |
import mimetypes
|
| 4 |
|
|
|
|
| 29 |
for ext in details["extensions"]:
|
| 30 |
self.extension_to_type[ext] = simple_type
|
| 31 |
|
| 32 |
+
@tool(
|
| 33 |
+
name="identify_file_type",
|
| 34 |
+
description="Identifies the type of a file and suggests an action.",
|
| 35 |
+
inputs={"filepath": {"type": "str", "description": "Path to the file"}},
|
| 36 |
+
outputs={
|
| 37 |
+
"determined_type": {"type": "str", "description": "Determined file type"},
|
| 38 |
+
"suggested_action": {"type": "str", "description": "Suggested processing action"}
|
| 39 |
+
},
|
| 40 |
+
output_type="dict"
|
| 41 |
+
)
|
| 42 |
def identify_file(self, filepath):
|
| 43 |
"""
|
| 44 |
Identifies the file type and suggests a processing action.
|
src/final_answer_tool.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Optional
|
| 2 |
+
from smolagents import tool
|
| 3 |
+
|
| 4 |
+
@tool(
|
| 5 |
+
name="final_answer",
|
| 6 |
+
description="Provides a final answer to the given problem.",
|
| 7 |
+
inputs={'answer': {'type': 'any', 'description': 'The final answer to the problem'}},
|
| 8 |
+
output_type="any"
|
| 9 |
+
)
|
| 10 |
+
def final_answer(answer: Any) -> Any:
|
| 11 |
+
return answer
|
src/text_reversal_tool.py
CHANGED
|
@@ -1,4 +1,13 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
"""
|
| 3 |
Reverses or processes reversed text, useful for decoding or analyzing reversed strings.
|
| 4 |
"""
|
|
|
|
| 1 |
+
from smolagents import tool
|
| 2 |
+
|
| 3 |
+
@tool(
|
| 4 |
+
name="reverse_string_text",
|
| 5 |
+
description="Reverses the order of characters in a given string.",
|
| 6 |
+
inputs={"text": {"type": "str", "description": "String to reverse"}},
|
| 7 |
+
outputs={"reversed_text": {"type": "str", "description": "Reversed string"}},
|
| 8 |
+
output_type="str"
|
| 9 |
+
)
|
| 10 |
+
def reverse_string_text(text: str) -> str:
|
| 11 |
"""
|
| 12 |
Reverses or processes reversed text, useful for decoding or analyzing reversed strings.
|
| 13 |
"""
|