File size: 7,645 Bytes
9bdf620
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import yaml
import os
import pytest
from unittest.mock import MagicMock

# Create mock classes for testing
class MockModel:
    def __init__(self):
        pass
    
    def __call__(self, prompt, **kwargs):
        return self.generate_text(prompt, **kwargs)
        
    def generate_text(self, prompt, **kwargs):
        # This method will be implemented in child classes
        pass

class CodeAgent:
    def __init__(self, model=None, tools=None, max_steps=None, verbosity_level=None, 
                 name=None, description=None, prompt_templates=None):
        self.model = model
        self.tools = tools
        self.max_steps = max_steps
        self.verbosity_level = verbosity_level
        self.name = name
        self.description = description
        self.prompt_templates = prompt_templates
        self.step_counter = 0
    
    def run(self, query):
        """Simulate running the agent for testing purposes."""
        response = None
        for step in range(self.max_steps):
            response = self.model.generate_text("", step=step)
            if isinstance(response, dict) and "choices" in response:
                response = response["choices"][0]["message"]["content"]
            if "final_answer" in response:
                break
        return response
        
    def __call__(self, query):
        return self.run(query)

# Load your updated prompts.yaml
# Get the correct path relative to this script
try:
    script_dir = os.path.dirname(os.path.abspath(__file__))
    project_root = os.path.dirname(script_dir)  # Go up one level from tests/ to project root
    prompts_path = os.path.join(project_root, "prompts.yaml")
    
    print(f"Looking for prompts.yaml at: {prompts_path}")
    
    with open(prompts_path, 'r') as stream:
        prompt_templates = yaml.safe_load(stream)
        print("Successfully loaded prompts.yaml")
except FileNotFoundError:
    print(f"Warning: prompts.yaml not found at {prompts_path}. Tests may fail.")
    prompt_templates = {}
except yaml.YAMLError as e:
    print(f"Error parsing prompts.yaml: {e}")
    prompt_templates = {}

# Create a specialized model for testing chess position scenario
class ChessPositionTestModel(MockModel):
    def __init__(self):
        super().__init__()
    
    def generate_text(self, prompt, **kwargs):
        # For testing purposes, we'll simulate a series of responses
        # to see how the agent handles multi-step chess analysis
        step = kwargs.get("step", 0)
        
        responses = [
            # Step 0: Initial response asking for the chess image
            """Thought: I need to see the chess image to analyze the position.
```py
print("I need the chess image to analyze the position. Please provide the image.")
```<end_code>""",
            
            # Step 1: After receiving the image
            """Thought: Now I can see the chess position. I'll analyze it.
```py
from src.image_processing_tool import ImageProcessor

image_processor = ImageProcessor()
analysis = image_processor.analyze_chess_position(image_path="chess_image.png")
print(f"Chess position analysis: {analysis}")
```<end_code>""",
            
            # Step 2: Error handling when image analysis fails
            """Thought: There was an error analyzing the chess position. I'll try a different approach.
```py
print("The image analysis failed. Let me try a different method.")
# Alternative approach
```<end_code>""",
            
            # Step 3: Final answer
            """Thought: I've analyzed the chess position and determined the best move.
```py
final_answer("e4 to e5")
```<end_code>"""
        ]
        
        # Return the appropriate response for this step
        if step < len(responses):
            return {"choices": [{"message": {"content": responses[step]}}]}
        else:
            return {"choices": [{"message": {"content": "Test complete"}}]}

# Simulating a chess position analysis
def test_chess_position_scenario():
    print("\nTesting chess position analysis scenario\n")
    
    # Create a minimal version of your tools for testing
    class DummyImageProcessorTool:
        def __init__(self):
            self.name = "image_processor"
            self.description = "Analyze images including chess positions"
            self.inputs = {"image_path": "string"}
            self.output_type = "string"
        
        def analyze_chess_position(self, image_path):
            return "Position analyzed: white king on e1, black king on e8"
    
    class DummyFinalAnswerTool:
        def __init__(self):
            self.name = "final_answer"
            self.description = "Use this to provide the final answer"
            self.inputs = {"answer": "string"}
            self.output_type = "string"
        
        def __call__(self, answer):
            return f"Final answer submitted: {answer}"
    
    # Create the test model
    model = ChessPositionTestModel()
    
    # Create agent with your updated prompts
    tools = [DummyImageProcessorTool(), DummyFinalAnswerTool()]
    try:
        agent = CodeAgent(
            model=model,
            tools=tools,
            max_steps=4,  # Allow for 4 steps to see all responses
            verbosity_level=2,  # Increased verbosity to see more details
            name="ChessTestAgent",
            description="Testing chess position analysis formatting",
            prompt_templates=prompt_templates
        )
    except Exception as e:
        print(f"Error creating agent: {e}")
        return
    
    # Test with a chess position analysis task
    print("Starting chess position analysis test...")
    result = agent("Analyze this chess position and determine the best move for white.")
    
    print(f"Final result: {result}")
    print("-"*50)
    return result

def test_prompt_structure():
    """Test that the prompt structure includes proper formatting instructions."""
    print("\nTesting prompt structure for formatting instructions\n")
    
    # Check if prompts.yaml was loaded successfully
    if not prompt_templates:
        pytest.skip("No prompt templates available to test")
    
    # Get the system prompt from the templates
    system_prompt = prompt_templates.get("system_prompt", {}).get("main", "")
    
    # Check that the system prompt contains the necessary elements
    formatting_elements = [
        "IMPORTANT FORMATTING RULES for ALL responses:",  # Section header
        "EVERY response MUST follow the format",  # Format requirement
        "```py",  # Code block start
        "```<end_code>",  # Code block end
        "MUST include a code block",  # Code block requirement
        "Example of correct formatting:"  # Example section
    ]
    
    for element in formatting_elements:
        assert element in system_prompt, f"Missing required formatting element: {element}"
    
    # Check that the example shows proper formatting
    example_start = system_prompt.find("Example of correct formatting:")
    if example_start != -1:
        example_content = system_prompt[example_start:system_prompt.find("\n\n", example_start)]
        
        assert "Thought:" in example_content, "Example missing Thought: section"
        assert "```py" in example_content, "Example missing code block start"
        assert "```<end_code>" in example_content, "Example missing code block end"
    else:
        pytest.fail("No formatting example found in system prompt")
    
    print("✓ Prompt structure contains all required formatting elements")

# Run the tests if executed directly
if __name__ == "__main__":
    test_prompt_structure()
    test_chess_position_scenario()