HF_Agents_Final_Project

Sleeping

App Files Files Community

Yago Bolivar commited on May 25

Commit

9bdf620

1 Parent(s): 556b9b5

feat: add tests for chess position analysis and prompt formatting scenarios

Browse files

Files changed (3) hide show

tests/test_chess_formatting.py +202 -0
tests/test_formatting.sh +16 -0
tests/test_prompt_formatting.py +108 -0

tests/test_chess_formatting.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import yaml
+import os
+import pytest
+from unittest.mock import MagicMock
+# Create mock classes for testing
+class MockModel:
+    def __init__(self):
+        pass
+    def __call__(self, prompt, **kwargs):
+        return self.generate_text(prompt, **kwargs)
+    def generate_text(self, prompt, **kwargs):
+        # This method will be implemented in child classes
+        pass
+class CodeAgent:
+    def __init__(self, model=None, tools=None, max_steps=None, verbosity_level=None,
+                 name=None, description=None, prompt_templates=None):
+        self.model = model
+        self.tools = tools
+        self.max_steps = max_steps
+        self.verbosity_level = verbosity_level
+        self.name = name
+        self.description = description
+        self.prompt_templates = prompt_templates
+        self.step_counter = 0
+    def run(self, query):
+        """Simulate running the agent for testing purposes."""
+        response = None
+        for step in range(self.max_steps):
+            response = self.model.generate_text("", step=step)
+            if isinstance(response, dict) and "choices" in response:
+                response = response["choices"][0]["message"]["content"]
+            if "final_answer" in response:
+                break
+        return response
+    def __call__(self, query):
+        return self.run(query)
+# Load your updated prompts.yaml
+# Get the correct path relative to this script
+try:
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    project_root = os.path.dirname(script_dir)  # Go up one level from tests/ to project root
+    prompts_path = os.path.join(project_root, "prompts.yaml")
+    print(f"Looking for prompts.yaml at: {prompts_path}")
+    with open(prompts_path, 'r') as stream:
+        prompt_templates = yaml.safe_load(stream)
+        print("Successfully loaded prompts.yaml")
+except FileNotFoundError:
+    print(f"Warning: prompts.yaml not found at {prompts_path}. Tests may fail.")
+    prompt_templates = {}
+except yaml.YAMLError as e:
+    print(f"Error parsing prompts.yaml: {e}")
+    prompt_templates = {}
+# Create a specialized model for testing chess position scenario
+class ChessPositionTestModel(MockModel):
+    def __init__(self):
+        super().__init__()
+    def generate_text(self, prompt, **kwargs):
+        # For testing purposes, we'll simulate a series of responses
+        # to see how the agent handles multi-step chess analysis
+        step = kwargs.get("step", 0)
+        responses = [
+            # Step 0: Initial response asking for the chess image
+            """Thought: I need to see the chess image to analyze the position.
+```py
+print("I need the chess image to analyze the position. Please provide the image.")
+```<end_code>""",
+            # Step 1: After receiving the image
+            """Thought: Now I can see the chess position. I'll analyze it.
+```py
+from src.image_processing_tool import ImageProcessor
+image_processor = ImageProcessor()
+analysis = image_processor.analyze_chess_position(image_path="chess_image.png")
+print(f"Chess position analysis: {analysis}")
+```<end_code>""",
+            # Step 2: Error handling when image analysis fails
+            """Thought: There was an error analyzing the chess position. I'll try a different approach.
+```py
+print("The image analysis failed. Let me try a different method.")
+# Alternative approach
+```<end_code>""",
+            # Step 3: Final answer
+            """Thought: I've analyzed the chess position and determined the best move.
+```py
+final_answer("e4 to e5")
+```<end_code>"""
+        ]
+        # Return the appropriate response for this step
+        if step < len(responses):
+            return {"choices": [{"message": {"content": responses[step]}}]}
+        else:
+            return {"choices": [{"message": {"content": "Test complete"}}]}
+# Simulating a chess position analysis
+def test_chess_position_scenario():
+    print("\nTesting chess position analysis scenario\n")
+    # Create a minimal version of your tools for testing
+    class DummyImageProcessorTool:
+        def __init__(self):
+            self.name = "image_processor"
+            self.description = "Analyze images including chess positions"
+            self.inputs = {"image_path": "string"}
+            self.output_type = "string"
+        def analyze_chess_position(self, image_path):
+            return "Position analyzed: white king on e1, black king on e8"
+    class DummyFinalAnswerTool:
+        def __init__(self):
+            self.name = "final_answer"
+            self.description = "Use this to provide the final answer"
+            self.inputs = {"answer": "string"}
+            self.output_type = "string"
+        def __call__(self, answer):
+            return f"Final answer submitted: {answer}"
+    # Create the test model
+    model = ChessPositionTestModel()
+    # Create agent with your updated prompts
+    tools = [DummyImageProcessorTool(), DummyFinalAnswerTool()]
+    try:
+        agent = CodeAgent(
+            model=model,
+            tools=tools,
+            max_steps=4,  # Allow for 4 steps to see all responses
+            verbosity_level=2,  # Increased verbosity to see more details
+            name="ChessTestAgent",
+            description="Testing chess position analysis formatting",
+            prompt_templates=prompt_templates
+        )
+    except Exception as e:
+        print(f"Error creating agent: {e}")
+        return
+    # Test with a chess position analysis task
+    print("Starting chess position analysis test...")
+    result = agent("Analyze this chess position and determine the best move for white.")
+    print(f"Final result: {result}")
+    print("-"*50)
+    return result
+def test_prompt_structure():
+    """Test that the prompt structure includes proper formatting instructions."""
+    print("\nTesting prompt structure for formatting instructions\n")
+    # Check if prompts.yaml was loaded successfully
+    if not prompt_templates:
+        pytest.skip("No prompt templates available to test")
+    # Get the system prompt from the templates
+    system_prompt = prompt_templates.get("system_prompt", {}).get("main", "")
+    # Check that the system prompt contains the necessary elements
+    formatting_elements = [
+        "IMPORTANT FORMATTING RULES for ALL responses:",  # Section header
+        "EVERY response MUST follow the format",  # Format requirement
+        "```py",  # Code block start
+        "```<end_code>",  # Code block end
+        "MUST include a code block",  # Code block requirement
+        "Example of correct formatting:"  # Example section
+    ]
+    for element in formatting_elements:
+        assert element in system_prompt, f"Missing required formatting element: {element}"
+    # Check that the example shows proper formatting
+    example_start = system_prompt.find("Example of correct formatting:")
+    if example_start != -1:
+        example_content = system_prompt[example_start:system_prompt.find("\n\n", example_start)]
+        assert "Thought:" in example_content, "Example missing Thought: section"
+        assert "```py" in example_content, "Example missing code block start"
+        assert "```<end_code>" in example_content, "Example missing code block end"
+    else:
+        pytest.fail("No formatting example found in system prompt")
+    print("✓ Prompt structure contains all required formatting elements")
+# Run the tests if executed directly
+if __name__ == "__main__":
+    test_prompt_structure()
+    test_chess_position_scenario()

tests/test_formatting.sh ADDED Viewed

	@@ -0,0 +1,16 @@

+#!/bin/bash
+# Script to test the formatting in prompts.yaml
+echo "Starting prompt formatting tests..."
+# Set working directory to project root
+cd "$(dirname "$0")/.."  # Go up one level to project root
+echo "Current directory: $(pwd)"
+echo "Checking if prompts.yaml exists: $([ -f 'prompts.yaml' ] && echo 'Yes' || echo 'No')"
+# Run the chess formatting test
+echo "Running chess formatting tests..."
+python3 -m tests.test_chess_formatting
+echo -e "\nTests completed."

tests/test_prompt_formatting.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import yaml
+import os
+import sys
+from smolagents import CodeAgent, DummyModel
+# Load your updated prompts.yaml
+with open("prompts.yaml", 'r') as stream:
+    prompt_templates = yaml.safe_load(stream)
+# Create a simple dummy model that will help us test the formatting
+class TestFormattingModel(DummyModel):
+    def __init__(self):
+        super().__init__()
+    def __call__(self, prompt, **kwargs):
+        # Print the prompt for inspection
+        print("="*50)
+        print("PROMPT:")
+        print("="*50)
+        print(prompt)
+        print("="*50)
+        # Return a response that simulates different scenarios
+        scenario = kwargs.get("scenario", "normal")
+        if scenario == "normal":
+            return {
+                "choices": [{
+                    "message": {
+                        "content": """Thought: I'll solve this task step by step.
+```py
+print("Starting to solve the task")
+result = 2 + 2
+print(f"The result is {result}")
+```<end_code>"""
+                    }
+                }]
+            }
+        elif scenario == "error":
+            return {
+                "choices": [{
+                    "message": {
+                        "content": """Thought: I encountered an error.
+```py
+print("An error occurred: file not found")
+```<end_code>"""
+                    }
+                }]
+            }
+        elif scenario == "chess":
+            return {
+                "choices": [{
+                    "message": {
+                        "content": """Thought: I need more information about the chess position.
+```py
+print("I need to see the chess image to analyze the position. Please provide the image.")
+```<end_code>"""
+                    }
+                }]
+            }
+        return {"choices": [{"message": {"content": "Test failed"}}]}
+# Create a minimal agent to test your prompts
+def test_scenario(scenario_name):
+    print(f"\nTesting scenario: {scenario_name}")
+    model = TestFormattingModel()
+    # Create a minimal version of your tools for testing
+    class DummyFinalAnswerTool:
+        def __init__(self):
+            self.name = "final_answer"
+            self.description = "Use this to provide the final answer"
+            self.inputs = {"answer": "string"}
+            self.output_type = "string"
+        def __call__(self, answer):
+            return f"Final answer submitted: {answer}"
+    # Create agent with your updated prompts
+    agent = CodeAgent(
+        model=model,
+        tools=[DummyFinalAnswerTool()],
+        max_steps=2,
+        verbosity_level=1,
+        name="TestAgent",
+        description="Testing prompt formatting",
+        prompt_templates=prompt_templates
+    )
+    # Test with a simple task
+    result = agent(f"This is a test task for the {scenario_name} scenario.", scenario=scenario_name)
+    print(f"Result: {result}")
+    print("-"*50)
+    return result
+if __name__ == "__main__":
+    print("Testing prompt formatting with different scenarios\n")
+    # Test normal scenario
+    test_scenario("normal")
+    # Test error scenario
+    test_scenario("error")
+    # Test chess scenario
+    test_scenario("chess")