Yago Bolivar commited on
Commit
9bdf620
·
1 Parent(s): 556b9b5

feat: add tests for chess position analysis and prompt formatting scenarios

Browse files
tests/test_chess_formatting.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+ import os
3
+ import pytest
4
+ from unittest.mock import MagicMock
5
+
6
+ # Create mock classes for testing
7
+ class MockModel:
8
+ def __init__(self):
9
+ pass
10
+
11
+ def __call__(self, prompt, **kwargs):
12
+ return self.generate_text(prompt, **kwargs)
13
+
14
+ def generate_text(self, prompt, **kwargs):
15
+ # This method will be implemented in child classes
16
+ pass
17
+
18
+ class CodeAgent:
19
+ def __init__(self, model=None, tools=None, max_steps=None, verbosity_level=None,
20
+ name=None, description=None, prompt_templates=None):
21
+ self.model = model
22
+ self.tools = tools
23
+ self.max_steps = max_steps
24
+ self.verbosity_level = verbosity_level
25
+ self.name = name
26
+ self.description = description
27
+ self.prompt_templates = prompt_templates
28
+ self.step_counter = 0
29
+
30
+ def run(self, query):
31
+ """Simulate running the agent for testing purposes."""
32
+ response = None
33
+ for step in range(self.max_steps):
34
+ response = self.model.generate_text("", step=step)
35
+ if isinstance(response, dict) and "choices" in response:
36
+ response = response["choices"][0]["message"]["content"]
37
+ if "final_answer" in response:
38
+ break
39
+ return response
40
+
41
+ def __call__(self, query):
42
+ return self.run(query)
43
+
44
+ # Load your updated prompts.yaml
45
+ # Get the correct path relative to this script
46
+ try:
47
+ script_dir = os.path.dirname(os.path.abspath(__file__))
48
+ project_root = os.path.dirname(script_dir) # Go up one level from tests/ to project root
49
+ prompts_path = os.path.join(project_root, "prompts.yaml")
50
+
51
+ print(f"Looking for prompts.yaml at: {prompts_path}")
52
+
53
+ with open(prompts_path, 'r') as stream:
54
+ prompt_templates = yaml.safe_load(stream)
55
+ print("Successfully loaded prompts.yaml")
56
+ except FileNotFoundError:
57
+ print(f"Warning: prompts.yaml not found at {prompts_path}. Tests may fail.")
58
+ prompt_templates = {}
59
+ except yaml.YAMLError as e:
60
+ print(f"Error parsing prompts.yaml: {e}")
61
+ prompt_templates = {}
62
+
63
+ # Create a specialized model for testing chess position scenario
64
+ class ChessPositionTestModel(MockModel):
65
+ def __init__(self):
66
+ super().__init__()
67
+
68
+ def generate_text(self, prompt, **kwargs):
69
+ # For testing purposes, we'll simulate a series of responses
70
+ # to see how the agent handles multi-step chess analysis
71
+ step = kwargs.get("step", 0)
72
+
73
+ responses = [
74
+ # Step 0: Initial response asking for the chess image
75
+ """Thought: I need to see the chess image to analyze the position.
76
+ ```py
77
+ print("I need the chess image to analyze the position. Please provide the image.")
78
+ ```<end_code>""",
79
+
80
+ # Step 1: After receiving the image
81
+ """Thought: Now I can see the chess position. I'll analyze it.
82
+ ```py
83
+ from src.image_processing_tool import ImageProcessor
84
+
85
+ image_processor = ImageProcessor()
86
+ analysis = image_processor.analyze_chess_position(image_path="chess_image.png")
87
+ print(f"Chess position analysis: {analysis}")
88
+ ```<end_code>""",
89
+
90
+ # Step 2: Error handling when image analysis fails
91
+ """Thought: There was an error analyzing the chess position. I'll try a different approach.
92
+ ```py
93
+ print("The image analysis failed. Let me try a different method.")
94
+ # Alternative approach
95
+ ```<end_code>""",
96
+
97
+ # Step 3: Final answer
98
+ """Thought: I've analyzed the chess position and determined the best move.
99
+ ```py
100
+ final_answer("e4 to e5")
101
+ ```<end_code>"""
102
+ ]
103
+
104
+ # Return the appropriate response for this step
105
+ if step < len(responses):
106
+ return {"choices": [{"message": {"content": responses[step]}}]}
107
+ else:
108
+ return {"choices": [{"message": {"content": "Test complete"}}]}
109
+
110
+ # Simulating a chess position analysis
111
+ def test_chess_position_scenario():
112
+ print("\nTesting chess position analysis scenario\n")
113
+
114
+ # Create a minimal version of your tools for testing
115
+ class DummyImageProcessorTool:
116
+ def __init__(self):
117
+ self.name = "image_processor"
118
+ self.description = "Analyze images including chess positions"
119
+ self.inputs = {"image_path": "string"}
120
+ self.output_type = "string"
121
+
122
+ def analyze_chess_position(self, image_path):
123
+ return "Position analyzed: white king on e1, black king on e8"
124
+
125
+ class DummyFinalAnswerTool:
126
+ def __init__(self):
127
+ self.name = "final_answer"
128
+ self.description = "Use this to provide the final answer"
129
+ self.inputs = {"answer": "string"}
130
+ self.output_type = "string"
131
+
132
+ def __call__(self, answer):
133
+ return f"Final answer submitted: {answer}"
134
+
135
+ # Create the test model
136
+ model = ChessPositionTestModel()
137
+
138
+ # Create agent with your updated prompts
139
+ tools = [DummyImageProcessorTool(), DummyFinalAnswerTool()]
140
+ try:
141
+ agent = CodeAgent(
142
+ model=model,
143
+ tools=tools,
144
+ max_steps=4, # Allow for 4 steps to see all responses
145
+ verbosity_level=2, # Increased verbosity to see more details
146
+ name="ChessTestAgent",
147
+ description="Testing chess position analysis formatting",
148
+ prompt_templates=prompt_templates
149
+ )
150
+ except Exception as e:
151
+ print(f"Error creating agent: {e}")
152
+ return
153
+
154
+ # Test with a chess position analysis task
155
+ print("Starting chess position analysis test...")
156
+ result = agent("Analyze this chess position and determine the best move for white.")
157
+
158
+ print(f"Final result: {result}")
159
+ print("-"*50)
160
+ return result
161
+
162
+ def test_prompt_structure():
163
+ """Test that the prompt structure includes proper formatting instructions."""
164
+ print("\nTesting prompt structure for formatting instructions\n")
165
+
166
+ # Check if prompts.yaml was loaded successfully
167
+ if not prompt_templates:
168
+ pytest.skip("No prompt templates available to test")
169
+
170
+ # Get the system prompt from the templates
171
+ system_prompt = prompt_templates.get("system_prompt", {}).get("main", "")
172
+
173
+ # Check that the system prompt contains the necessary elements
174
+ formatting_elements = [
175
+ "IMPORTANT FORMATTING RULES for ALL responses:", # Section header
176
+ "EVERY response MUST follow the format", # Format requirement
177
+ "```py", # Code block start
178
+ "```<end_code>", # Code block end
179
+ "MUST include a code block", # Code block requirement
180
+ "Example of correct formatting:" # Example section
181
+ ]
182
+
183
+ for element in formatting_elements:
184
+ assert element in system_prompt, f"Missing required formatting element: {element}"
185
+
186
+ # Check that the example shows proper formatting
187
+ example_start = system_prompt.find("Example of correct formatting:")
188
+ if example_start != -1:
189
+ example_content = system_prompt[example_start:system_prompt.find("\n\n", example_start)]
190
+
191
+ assert "Thought:" in example_content, "Example missing Thought: section"
192
+ assert "```py" in example_content, "Example missing code block start"
193
+ assert "```<end_code>" in example_content, "Example missing code block end"
194
+ else:
195
+ pytest.fail("No formatting example found in system prompt")
196
+
197
+ print("✓ Prompt structure contains all required formatting elements")
198
+
199
+ # Run the tests if executed directly
200
+ if __name__ == "__main__":
201
+ test_prompt_structure()
202
+ test_chess_position_scenario()
tests/test_formatting.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Script to test the formatting in prompts.yaml
3
+
4
+ echo "Starting prompt formatting tests..."
5
+
6
+ # Set working directory to project root
7
+ cd "$(dirname "$0")/.." # Go up one level to project root
8
+
9
+ echo "Current directory: $(pwd)"
10
+ echo "Checking if prompts.yaml exists: $([ -f 'prompts.yaml' ] && echo 'Yes' || echo 'No')"
11
+
12
+ # Run the chess formatting test
13
+ echo "Running chess formatting tests..."
14
+ python3 -m tests.test_chess_formatting
15
+
16
+ echo -e "\nTests completed."
tests/test_prompt_formatting.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+ import os
3
+ import sys
4
+ from smolagents import CodeAgent, DummyModel
5
+
6
+ # Load your updated prompts.yaml
7
+ with open("prompts.yaml", 'r') as stream:
8
+ prompt_templates = yaml.safe_load(stream)
9
+
10
+ # Create a simple dummy model that will help us test the formatting
11
+ class TestFormattingModel(DummyModel):
12
+ def __init__(self):
13
+ super().__init__()
14
+
15
+ def __call__(self, prompt, **kwargs):
16
+ # Print the prompt for inspection
17
+ print("="*50)
18
+ print("PROMPT:")
19
+ print("="*50)
20
+ print(prompt)
21
+ print("="*50)
22
+
23
+ # Return a response that simulates different scenarios
24
+ scenario = kwargs.get("scenario", "normal")
25
+
26
+ if scenario == "normal":
27
+ return {
28
+ "choices": [{
29
+ "message": {
30
+ "content": """Thought: I'll solve this task step by step.
31
+ ```py
32
+ print("Starting to solve the task")
33
+ result = 2 + 2
34
+ print(f"The result is {result}")
35
+ ```<end_code>"""
36
+ }
37
+ }]
38
+ }
39
+ elif scenario == "error":
40
+ return {
41
+ "choices": [{
42
+ "message": {
43
+ "content": """Thought: I encountered an error.
44
+ ```py
45
+ print("An error occurred: file not found")
46
+ ```<end_code>"""
47
+ }
48
+ }]
49
+ }
50
+ elif scenario == "chess":
51
+ return {
52
+ "choices": [{
53
+ "message": {
54
+ "content": """Thought: I need more information about the chess position.
55
+ ```py
56
+ print("I need to see the chess image to analyze the position. Please provide the image.")
57
+ ```<end_code>"""
58
+ }
59
+ }]
60
+ }
61
+
62
+ return {"choices": [{"message": {"content": "Test failed"}}]}
63
+
64
+ # Create a minimal agent to test your prompts
65
+ def test_scenario(scenario_name):
66
+ print(f"\nTesting scenario: {scenario_name}")
67
+ model = TestFormattingModel()
68
+
69
+ # Create a minimal version of your tools for testing
70
+ class DummyFinalAnswerTool:
71
+ def __init__(self):
72
+ self.name = "final_answer"
73
+ self.description = "Use this to provide the final answer"
74
+ self.inputs = {"answer": "string"}
75
+ self.output_type = "string"
76
+
77
+ def __call__(self, answer):
78
+ return f"Final answer submitted: {answer}"
79
+
80
+ # Create agent with your updated prompts
81
+ agent = CodeAgent(
82
+ model=model,
83
+ tools=[DummyFinalAnswerTool()],
84
+ max_steps=2,
85
+ verbosity_level=1,
86
+ name="TestAgent",
87
+ description="Testing prompt formatting",
88
+ prompt_templates=prompt_templates
89
+ )
90
+
91
+ # Test with a simple task
92
+ result = agent(f"This is a test task for the {scenario_name} scenario.", scenario=scenario_name)
93
+
94
+ print(f"Result: {result}")
95
+ print("-"*50)
96
+ return result
97
+
98
+ if __name__ == "__main__":
99
+ print("Testing prompt formatting with different scenarios\n")
100
+
101
+ # Test normal scenario
102
+ test_scenario("normal")
103
+
104
+ # Test error scenario
105
+ test_scenario("error")
106
+
107
+ # Test chess scenario
108
+ test_scenario("chess")