File size: 7,645 Bytes
9bdf620 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
import yaml
import os
import pytest
from unittest.mock import MagicMock
# Create mock classes for testing
class MockModel:
def __init__(self):
pass
def __call__(self, prompt, **kwargs):
return self.generate_text(prompt, **kwargs)
def generate_text(self, prompt, **kwargs):
# This method will be implemented in child classes
pass
class CodeAgent:
def __init__(self, model=None, tools=None, max_steps=None, verbosity_level=None,
name=None, description=None, prompt_templates=None):
self.model = model
self.tools = tools
self.max_steps = max_steps
self.verbosity_level = verbosity_level
self.name = name
self.description = description
self.prompt_templates = prompt_templates
self.step_counter = 0
def run(self, query):
"""Simulate running the agent for testing purposes."""
response = None
for step in range(self.max_steps):
response = self.model.generate_text("", step=step)
if isinstance(response, dict) and "choices" in response:
response = response["choices"][0]["message"]["content"]
if "final_answer" in response:
break
return response
def __call__(self, query):
return self.run(query)
# Load your updated prompts.yaml
# Get the correct path relative to this script
try:
script_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(script_dir) # Go up one level from tests/ to project root
prompts_path = os.path.join(project_root, "prompts.yaml")
print(f"Looking for prompts.yaml at: {prompts_path}")
with open(prompts_path, 'r') as stream:
prompt_templates = yaml.safe_load(stream)
print("Successfully loaded prompts.yaml")
except FileNotFoundError:
print(f"Warning: prompts.yaml not found at {prompts_path}. Tests may fail.")
prompt_templates = {}
except yaml.YAMLError as e:
print(f"Error parsing prompts.yaml: {e}")
prompt_templates = {}
# Create a specialized model for testing chess position scenario
class ChessPositionTestModel(MockModel):
def __init__(self):
super().__init__()
def generate_text(self, prompt, **kwargs):
# For testing purposes, we'll simulate a series of responses
# to see how the agent handles multi-step chess analysis
step = kwargs.get("step", 0)
responses = [
# Step 0: Initial response asking for the chess image
"""Thought: I need to see the chess image to analyze the position.
```py
print("I need the chess image to analyze the position. Please provide the image.")
```<end_code>""",
# Step 1: After receiving the image
"""Thought: Now I can see the chess position. I'll analyze it.
```py
from src.image_processing_tool import ImageProcessor
image_processor = ImageProcessor()
analysis = image_processor.analyze_chess_position(image_path="chess_image.png")
print(f"Chess position analysis: {analysis}")
```<end_code>""",
# Step 2: Error handling when image analysis fails
"""Thought: There was an error analyzing the chess position. I'll try a different approach.
```py
print("The image analysis failed. Let me try a different method.")
# Alternative approach
```<end_code>""",
# Step 3: Final answer
"""Thought: I've analyzed the chess position and determined the best move.
```py
final_answer("e4 to e5")
```<end_code>"""
]
# Return the appropriate response for this step
if step < len(responses):
return {"choices": [{"message": {"content": responses[step]}}]}
else:
return {"choices": [{"message": {"content": "Test complete"}}]}
# Simulating a chess position analysis
def test_chess_position_scenario():
print("\nTesting chess position analysis scenario\n")
# Create a minimal version of your tools for testing
class DummyImageProcessorTool:
def __init__(self):
self.name = "image_processor"
self.description = "Analyze images including chess positions"
self.inputs = {"image_path": "string"}
self.output_type = "string"
def analyze_chess_position(self, image_path):
return "Position analyzed: white king on e1, black king on e8"
class DummyFinalAnswerTool:
def __init__(self):
self.name = "final_answer"
self.description = "Use this to provide the final answer"
self.inputs = {"answer": "string"}
self.output_type = "string"
def __call__(self, answer):
return f"Final answer submitted: {answer}"
# Create the test model
model = ChessPositionTestModel()
# Create agent with your updated prompts
tools = [DummyImageProcessorTool(), DummyFinalAnswerTool()]
try:
agent = CodeAgent(
model=model,
tools=tools,
max_steps=4, # Allow for 4 steps to see all responses
verbosity_level=2, # Increased verbosity to see more details
name="ChessTestAgent",
description="Testing chess position analysis formatting",
prompt_templates=prompt_templates
)
except Exception as e:
print(f"Error creating agent: {e}")
return
# Test with a chess position analysis task
print("Starting chess position analysis test...")
result = agent("Analyze this chess position and determine the best move for white.")
print(f"Final result: {result}")
print("-"*50)
return result
def test_prompt_structure():
"""Test that the prompt structure includes proper formatting instructions."""
print("\nTesting prompt structure for formatting instructions\n")
# Check if prompts.yaml was loaded successfully
if not prompt_templates:
pytest.skip("No prompt templates available to test")
# Get the system prompt from the templates
system_prompt = prompt_templates.get("system_prompt", {}).get("main", "")
# Check that the system prompt contains the necessary elements
formatting_elements = [
"IMPORTANT FORMATTING RULES for ALL responses:", # Section header
"EVERY response MUST follow the format", # Format requirement
"```py", # Code block start
"```<end_code>", # Code block end
"MUST include a code block", # Code block requirement
"Example of correct formatting:" # Example section
]
for element in formatting_elements:
assert element in system_prompt, f"Missing required formatting element: {element}"
# Check that the example shows proper formatting
example_start = system_prompt.find("Example of correct formatting:")
if example_start != -1:
example_content = system_prompt[example_start:system_prompt.find("\n\n", example_start)]
assert "Thought:" in example_content, "Example missing Thought: section"
assert "```py" in example_content, "Example missing code block start"
assert "```<end_code>" in example_content, "Example missing code block end"
else:
pytest.fail("No formatting example found in system prompt")
print("✓ Prompt structure contains all required formatting elements")
# Run the tests if executed directly
if __name__ == "__main__":
test_prompt_structure()
test_chess_position_scenario()
|