HF_Agents_Final_Project

Runtime error

App Files Files Community

Yago Bolivar commited on May 21

Commit

133fe7e

1 Parent(s): 1cfb30c

feat: implement configuration files and enhance tool definitions for GAIA Agent

Browse files

Files changed (7) hide show

app.py +48 -30
config.yaml +2 -0
model_config.json +5 -0
prompts.yaml +19 -0
src/file_processing_tool.py +11 -0
src/final_answer_tool.py +11 -0
src/text_reversal_tool.py +10 -1

app.py CHANGED Viewed

@@ -1,24 +1,19 @@
 import os
 import gradio as gr
 import requests
-import inspect # Keep if you plan to use it for agent introspection later
 import pandas as pd
 from smolagents import ToolCallingAgent, CodeAgent, InferenceClientModel, Tool
-# Ensure that Tool has a .name attribute for ToolCallingAgent compatibility
 if not hasattr(Tool, "name"):
     Tool.name = property(lambda self: getattr(self, "tool_name", None))
 if not hasattr(Tool, "description"):
-    # Proxy description to the internal attribute (e.g., tool_description)
     Tool.description = property(lambda self: getattr(self, "tool_description", None))
 if not hasattr(Tool, "inputs"):
-    # Provide an empty inputs list to satisfy Jinja templates
     Tool.inputs = property(lambda self: [])
 if not hasattr(Tool, "outputs"):
-    # Provide an empty outputs list to satisfy Jinja templates
     Tool.outputs = property(lambda self: [])
 if not hasattr(Tool, "output_type"):
-    # Provide a default output_type for template rendering
     Tool.output_type = property(lambda self: None)
 from src.file_processing_tool import FileIdentifier
@@ -26,24 +21,48 @@ from src.speech_to_text import transcribe_audio
 from src.download_utils import download_file
 from src.spreadsheet_tool import SpreadsheetTool
 from src.web_browsing_tool import WebBrowser
-from src.text_reversal_tool import reverse_text
 from src.markdown_table_parser import parse_markdown_table
 from src.image_processing_tool import ImageProcessor
 from src.video_processing_tool import VideoProcessingTool
 from src.python_tool import CodeExecutionTool
-# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-DOWNLOADED_FILES_DIR = "downloaded_task_files" # Directory to store downloaded files
 os.makedirs(DOWNLOADED_FILES_DIR, exist_ok=True)
 # --- Smol Agents Setup ---
-# TODO: Replace with your desired model ID
-MODEL_ID = "gpt4all/gpt4all-j-v1.3-groovy" # Placeholder
 try:
-    model = InferenceClientModel(model_id=MODEL_ID)
 except Exception as e:
-    print(f"Error initializing InferenceClientModel with {MODEL_ID}: {e}")
     print("Please ensure the model ID is correct and you have the necessary HF_TOKEN or access.")
     # Fallback or alternative model can be set here if needed
     # For now, we'll let the app fail later if the model is crucial and not loaded.
@@ -52,8 +71,7 @@ except Exception as e:
 # --- Tool Instantiation ---
 # These are instantiated once and passed to agents
-file_identifier_tool = FileIdentifier()
-spreadsheet_tool_instance = SpreadsheetTool()
 web_browser_tool = WebBrowser()
 image_processor_tool = ImageProcessor()
 video_processor_tool = VideoProcessingTool()
@@ -76,11 +94,7 @@ web_agent = ToolCallingAgent(
 file_processing_agent = ToolCallingAgent(
     tools=[
-        Tool(
-            name="identify_file_type",
-            description="Identifies the type of a file and suggests an action. Input should be the file path.",
-            func=file_identifier_tool.identify_file
-        ),
         Tool(
             name="parse_spreadsheet",
             description="Parses data from a spreadsheet file (e.g., CSV, Excel). Input should be the file path.",
@@ -168,11 +182,7 @@ code_interpreter_agent = ToolCallingAgent(
 text_tool_agent = ToolCallingAgent(
     tools=[
-        Tool(
-            name="reverse_string_text",
-            description="Reverses the order of characters in a given string. Input should be the string to reverse.",
-            func=reverse_text
-        )
     ],
     model=model,
     name="TextToolAgent",
@@ -267,8 +277,16 @@ Your final response should be the answer to the task.
             print(f"Error during ManagerAgent execution: {e}")
             return f"Error during agent execution: {str(e)}"
 # Initialize the Manager Agent
-if model: # Only initialize if the model loaded successfully
     manager_agent = GaiaManagerAgent(
         model=model,
         file_download_dir=DOWNLOADED_FILES_DIR,
@@ -280,12 +298,12 @@ if model: # Only initialize if the model loaded successfully
             code_interpreter_agent,
             text_tool_agent,
         ],
-        # Ensure the manager agent can import modules used by the tools if necessary,
-        # or that the tools are self-contained.
-        # additional_authorized_imports=["pandas", "numpy", "re", "requests", "bs4", "cv2", "PIL", "chess"] # Add as needed
     )
 else:
-    manager_agent = None
 # --- Main Application Logic ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):

 import os
 import gradio as gr
 import requests
 import pandas as pd
+import json
 from smolagents import ToolCallingAgent, CodeAgent, InferenceClientModel, Tool
 if not hasattr(Tool, "name"):
     Tool.name = property(lambda self: getattr(self, "tool_name", None))
 if not hasattr(Tool, "description"):
     Tool.description = property(lambda self: getattr(self, "tool_description", None))
 if not hasattr(Tool, "inputs"):
     Tool.inputs = property(lambda self: [])
 if not hasattr(Tool, "outputs"):
     Tool.outputs = property(lambda self: [])
 if not hasattr(Tool, "output_type"):
     Tool.output_type = property(lambda self: None)
 from src.file_processing_tool import FileIdentifier
 from src.download_utils import download_file
 from src.spreadsheet_tool import SpreadsheetTool
 from src.web_browsing_tool import WebBrowser
+from src.text_reversal_tool import reverse_string_text # Changed import
 from src.markdown_table_parser import parse_markdown_table
 from src.image_processing_tool import ImageProcessor
 from src.video_processing_tool import VideoProcessingTool
 from src.python_tool import CodeExecutionTool
+from src.final_answer_tool import final_answer # Import the final_answer tool
+import yaml  # for loading prompt templates
+# --- Constants & Config Loading ---
+# Load model configuration
+try:
+    with open("model_config.json", 'r') as f:
+        model_config = json.load(f)
+except FileNotFoundError:
+    print("Warning: model_config.json not found. Using default model settings.")
+    model_config = {
+        "model_id": "gpt4all/gpt4all-j-v1.3-groovy", # Default model
+        "max_tokens": 2048,
+        "temperature": 0.7
+    }
+except json.JSONDecodeError:
+    print("Warning: Error decoding model_config.json. Using default model settings.")
+    model_config = {
+        "model_id": "gpt4all/gpt4all-j-v1.3-groovy",
+        "max_tokens": 2048,
+        "temperature": 0.7
+    }
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+DOWNLOADED_FILES_DIR = "downloaded_task_files"  # Directory to store downloaded files
 os.makedirs(DOWNLOADED_FILES_DIR, exist_ok=True)
 # --- Smol Agents Setup ---
 try:
+    model = InferenceClientModel(
+        model_id=model_config.get("model_id"),
+        max_tokens=model_config.get("max_tokens"),
+        temperature=model_config.get("temperature")
+    )
 except Exception as e:
+    print(f"Error initializing InferenceClientModel with {model_config.get('model_id')}: {e}")
     print("Please ensure the model ID is correct and you have the necessary HF_TOKEN or access.")
     # Fallback or alternative model can be set here if needed
     # For now, we'll let the app fail later if the model is crucial and not loaded.
 # --- Tool Instantiation ---
 # These are instantiated once and passed to agents
+spreadsheet_tool_instance = SpreadsheetTool() # Keep instance if methods need shared state
 web_browser_tool = WebBrowser()
 image_processor_tool = ImageProcessor()
 video_processor_tool = VideoProcessingTool()
 file_processing_agent = ToolCallingAgent(
     tools=[
+        FileIdentifier().identify_file, # Use the decorated method
         Tool(
             name="parse_spreadsheet",
             description="Parses data from a spreadsheet file (e.g., CSV, Excel). Input should be the file path.",
 text_tool_agent = ToolCallingAgent(
     tools=[
+        reverse_string_text # Use the decorated tool object directly
     ],
     model=model,
     name="TextToolAgent",
             print(f"Error during ManagerAgent execution: {e}")
             return f"Error during agent execution: {str(e)}"
+## Load prompt templates for manager agent
+prompt_templates = {}
+try:
+    with open("prompts.yaml", 'r') as f:
+        prompt_templates = yaml.safe_load(f)
+except Exception:
+    print("Warning: Could not load prompts.yaml, proceeding without prompt templates.")
 # Initialize the Manager Agent
+if model:  # Only initialize if the model loaded successfully
     manager_agent = GaiaManagerAgent(
         model=model,
         file_download_dir=DOWNLOADED_FILES_DIR,
             code_interpreter_agent,
             text_tool_agent,
         ],
+        tools=[final_answer], # Add final_answer to the manager agent's tools
+        # additional_authorized_imports=["pandas", "numpy", "re", "requests", "bs4", "cv2", "PIL", "chess"],  # Add as needed
+        prompt_templates=prompt_templates
     )
 else:
+    manager_agent = None
 # --- Main Application Logic ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):

config.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Configuration for the GAIA Agent app
2	+ model_id: "gpt4all/gpt4all-j-v1.3-groovy"

model_config.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "model_id": "gpt4all/gpt4all-j-v1.3-groovy",
+  "max_tokens": 2048,
+  "temperature": 0.7
+}

prompts.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+# Prompt templates for GaiaManagerAgent
+manager_prompt: |
+  Task: {{task_description}}
+  {% if file_url %}
+  An associated file is available at: {{file_url}}
+  File type determined as: {{file_type}}, suggested action: {{suggested_action}}
+  {% endif %}
+  You are a manager agent for the GAIA benchmark. Your goal is to answer the given task.
+  You have the following specialized agents available to you as tools:
+  - WebSearchAgent: Call this agent for web browsing and fetching URL content.
+  - FileProcessorAgent: Call this agent for identifying file types, parsing spreadsheets, transcribing audio, and parsing markdown tables.
+  - VisionAgent: Call this agent for image processing, OCR, and chess image analysis.
+  - VideoAgent: Call this agent for video processing tasks.
+  - CodeInterpreterAgent: Call this agent to execute Python code.
+  - TextToolAgent: Call this agent for simple text manipulations like reversing text.
+  Based on the task and any provided file, devise a plan and call the appropriate agent(s) to gather information and formulate an answer.
+  Generate the Python code to call these agents and produce the final answer.
+  Your final response should be the answer to the task.

src/file_processing_tool.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import mimetypes
@@ -28,6 +29,16 @@ class FileIdentifier:
             for ext in details["extensions"]:
                 self.extension_to_type[ext] = simple_type
     def identify_file(self, filepath):
         """
         Identifies the file type and suggests a processing action.

+from smolagents import tool
 import os
 import mimetypes
             for ext in details["extensions"]:
                 self.extension_to_type[ext] = simple_type
+    @tool(
+        name="identify_file_type",
+        description="Identifies the type of a file and suggests an action.",
+        inputs={"filepath": {"type": "str", "description": "Path to the file"}},
+        outputs={
+            "determined_type": {"type": "str", "description": "Determined file type"},
+            "suggested_action": {"type": "str", "description": "Suggested processing action"}
+        },
+        output_type="dict"
+    )
     def identify_file(self, filepath):
         """
         Identifies the file type and suggests a processing action.

src/final_answer_tool.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from typing import Any, Optional
+from smolagents import tool
+@tool(
+    name="final_answer",
+    description="Provides a final answer to the given problem.",
+    inputs={'answer': {'type': 'any', 'description': 'The final answer to the problem'}},
+    output_type="any"
+)
+def final_answer(answer: Any) -> Any:
+    return answer

src/text_reversal_tool.py CHANGED Viewed

@@ -1,4 +1,13 @@
-def reverse_text(text: str) -> str:
     """
     Reverses or processes reversed text, useful for decoding or analyzing reversed strings.
     """

+from smolagents import tool
+@tool(
+    name="reverse_string_text",
+    description="Reverses the order of characters in a given string.",
+    inputs={"text": {"type": "str", "description": "String to reverse"}},
+    outputs={"reversed_text": {"type": "str", "description": "Reversed string"}},
+    output_type="str"
+)
+def reverse_string_text(text: str) -> str:
     """
     Reverses or processes reversed text, useful for decoding or analyzing reversed strings.
     """