diff --git "a/task_generators/bug_fixing.py" "b/task_generators/bug_fixing.py"
new file mode 100644--- /dev/null
+++ "b/task_generators/bug_fixing.py"
@@ -0,0 +1,3986 @@
+# recursive_swe_bench/task_generators/bug_fixing.py
+
+from typing import Any, Dict, List, Optional, Tuple, Set, Union
+import uuid
+import json
+import re
+import random
+import ast
+import copy
+from pathlib import Path
+import tempfile
+import subprocess
+import shutil
+import os
+
+from recursive_swe_bench.core.recursive_task import (
+    RecursiveTask, ProblemState, EvaluationResult, Feedback, TaskStatus
+)
+
+class BugCategory:
+    """Categories of bugs for classification and evolution."""
+    SYNTAX = "syntax"
+    LOGICAL = "logical"
+    PERFORMANCE = "performance"
+    SECURITY = "security"
+    CONCURRENCY = "concurrency"
+    EXCEPTION_HANDLING = "exception_handling"
+    API_USAGE = "api_usage"
+    MEMORY_MANAGEMENT = "memory_management"
+    TYPE_ERROR = "type_error"
+    EDGE_CASE = "edge_case"
+    DATA_HANDLING = "data_handling"
+    DEPENDENCY = "dependency"
+
+
+class BugFixingTask(RecursiveTask):
+    """
+    A recursive task for evaluating how models fix bugs in code.
+    
+    The task presents a piece of code with one or more bugs, and evolves
+    based on the model's fix attempts. As the model addresses issues,
+    the task may introduce more subtle bugs, change requirements, or
+    increase complexity to test adaptive problem-solving.
+    """
+    
+    def __init__(
+        self,
+        initial_state: ProblemState,
+        config: Dict[str, Any] = None,
+        test_runner: Any = None
+    ):
+        """
+        Initialize the bug fixing task.
+        
+        Args:
+            initial_state: The initial problem state
+            config: Configuration options
+            test_runner: Custom test runner (optional)
+        """
+        super().__init__(initial_state, config)
+        self.test_runner = test_runner or DefaultTestRunner()
+        self.bug_categories: Set[str] = set(
+            self.config.get("bug_categories", [BugCategory.LOGICAL, BugCategory.SYNTAX])
+        )
+        self.difficulty_progression = self.config.get(
+            "difficulty_progression", [0.0, 0.15, 0.3, 0.5, 0.7]
+        )
+        self.evolution_strategies = self.config.get(
+            "evolution_strategies", ["add_subtle_bug", "change_requirements", "increase_complexity"]
+        )
+        
+    def _run_evaluation(self, solution: str) -> EvaluationResult:
+        """
+        Run tests to evaluate the solution.
+        
+        Args:
+            solution: The solution code
+            
+        Returns:
+            Evaluation results
+        """
+        # Create a temporary directory to run tests
+        with tempfile.TemporaryDirectory() as temp_dir:
+            temp_path = Path(temp_dir)
+            
+            # Write solution code to file
+            solution_file = temp_path / "solution.py"
+            with open(solution_file, "w") as f:
+                f.write(solution)
+            
+            # Create test files
+            test_files = self._create_test_files(temp_path)
+            
+            # Run tests
+            results = self.test_runner.run_tests(
+                solution_file=solution_file,
+                test_files=test_files,
+                code_context=self.state.code_context
+            )
+            
+            # Calculate score based on test results
+            score = self._calculate_score(results)
+            
+            return EvaluationResult(
+                success=results["all_passed"],
+                score=score,
+                execution_results=results["execution"],
+                error_details=results.get("errors"),
+                test_results=results["tests"],
+                metrics={
+                    "passed_tests": results["passed_tests"],
+                    "total_tests": results["total_tests"],
+                    "execution_time": results["execution_time"],
+                    "memory_usage": results.get("memory_usage", 0),
+                    "code_complexity": self._calculate_complexity(solution)
+                }
+            )
+    
+    def _generate_feedback(self, solution: str, result: EvaluationResult) -> Feedback:
+        """
+        Generate structured feedback based on evaluation results.
+        
+        Args:
+            solution: The solution code
+            result: The evaluation results
+            
+        Returns:
+            Structured feedback
+        """
+        issues = []
+        suggestions = []
+        focus_areas = []
+        
+        # Add issues for failing tests
+        if result.test_results:
+            for test_name, test_result in result.test_results.items():
+                if not test_result["passed"]:
+                    issues.append({
+                        "type": "test_failure",
+                        "test": test_name,
+                        "message": test_result.get("message", "Test failed"),
+                        "expected": test_result.get("expected"),
+                        "actual": test_result.get("actual")
+                    })
+        
+        # Add issues for errors
+        if result.error_details:
+            for error_type, error_info in result.error_details.items():
+                issues.append({
+                    "type": "error",
+                    "error_type": error_type,
+                    "message": error_info.get("message", "An error occurred"),
+                    "location": error_info.get("location")
+                })
+        
+        # Generate suggestions based on issues
+        for issue in issues:
+            if issue["type"] == "test_failure":
+                suggestion = self._generate_suggestion_for_test_failure(
+                    issue, solution, result.test_results
+                )
+                if suggestion:
+                    suggestions.append(suggestion)
+            elif issue["type"] == "error":
+                suggestion = self._generate_suggestion_for_error(
+                    issue, solution
+                )
+                if suggestion:
+                    suggestions.append(suggestion)
+        
+        # Determine focus areas based on issues and task state
+        focus_areas = self._determine_focus_areas(issues, solution, result)
+        
+        # Generate adaptation hints based on the current state and results
+        adaptation_hints = self._generate_adaptation_hints(solution, result)
+        
+        # Create summary
+        if result.success:
+            summary = (
+                f"Your solution passes all tests with a score of {result.score:.2f}. "
+                f"The code successfully addresses the bugs in the original implementation."
+            )
+        else:
+            passed = result.metrics.get("passed_tests", 0)
+            total = result.metrics.get("total_tests", 0)
+            summary = (
+                f"Your solution passes {passed}/{total} tests with a score of {result.score:.2f}. "
+                f"There are still issues that need to be addressed."
+            )
+        
+        return Feedback(
+            summary=summary,
+            issues=issues,
+            suggestions=suggestions,
+            focus_areas=focus_areas,
+            adaptation_hints=adaptation_hints
+        )
+    
+    def _evolve_state(self, solution: str, result: EvaluationResult, feedback: Feedback) -> ProblemState:
+        """
+        Evolve the problem state based on the solution and feedback.
+        
+        This method implements the recursive nature of the benchmark by
+        adapting the problem to challenge the model's understanding.
+        
+        Args:
+            solution: The attempted solution
+            result: The evaluation results
+            feedback: The feedback provided
+            
+        Returns:
+            The evolved problem state
+        """
+        # If the solution perfectly solved the problem, make it more challenging
+        if result.success and result.score > 0.95:
+            return self._increase_difficulty(solution, result, feedback)
+        
+        # If the solution was close but not perfect, focus on the remaining issues
+        elif result.score > 0.7:
+            return self._focus_remaining_issues(solution, result, feedback)
+            
+        # If the solution was not very good, provide more guidance
+        else:
+            return self._provide_more_guidance(solution, result, feedback)
+    
+    def _increase_difficulty(self, solution: str, result: EvaluationResult, feedback: Feedback) -> ProblemState:
+        """
+        Increase the difficulty of the problem for models that solved it well.
+        
+        Args:
+            solution: The successful solution
+            result: The evaluation results
+            feedback: The feedback provided
+            
+        Returns:
+            The evolved problem state with increased difficulty
+        """
+        # Create a new state based on the current state
+        new_state = copy.deepcopy(self.state)
+        
+        # Increment evolution stage
+        new_state.evolution_stage += 1
+        
+        # Increase difficulty based on progression schedule
+        current_difficulty_idx = min(new_state.evolution_stage, 
+                                    len(self.difficulty_progression) - 1)
+        new_state.difficulty = self.difficulty_progression[current_difficulty_idx]
+        
+        # Select an evolution strategy based on the current state
+        strategy = self._select_evolution_strategy(solution, result, feedback)
+        
+        # Apply the selected strategy
+        if strategy == "add_subtle_bug":
+            self._add_subtle_bug(new_state, solution)
+        elif strategy == "change_requirements":
+            self._change_requirements(new_state, solution)
+        elif strategy == "increase_complexity":
+            self._increase_complexity(new_state, solution)
+        
+        # Update the description to reflect the changes
+        new_state.description = self._generate_description(new_state)
+        
+        # Update adaptation vector to guide future evolution
+        new_state.adaptation_vector = self._calculate_adaptation_vector(
+            solution, result, feedback
+        )
+        
+        return new_state
+    
+    def _focus_remaining_issues(self, solution: str, result: EvaluationResult, feedback: Feedback) -> ProblemState:
+        """
+        Evolve the state to focus on remaining issues when the solution is close but not perfect.
+        
+        Args:
+            solution: The nearly-successful solution
+            result: The evaluation results
+            feedback: The feedback provided
+            
+        Returns:
+            The evolved problem state focusing on remaining issues
+        """
+        # Create a new state based on the current state
+        new_state = copy.deepcopy(self.state)
+        
+        # Increment evolution stage
+        new_state.evolution_stage += 1
+        
+        # Maintain the same difficulty level
+        current_difficulty_idx = min(new_state.evolution_stage - 1, 
+                                    len(self.difficulty_progression) - 1)
+        new_state.difficulty = self.difficulty_progression[current_difficulty_idx]
+        
+        # Update the code context to focus on remaining issues
+        new_state.code_context["focus_areas"] = feedback.focus_areas
+        
+        # Highlight failing tests in the code context
+        if result.test_results:
+            failing_tests = [
+                test_name for test_name, test_result in result.test_results.items()
+                if not test_result["passed"]
+            ]
+            new_state.code_context["failing_tests"] = failing_tests
+        
+        # Update the description to be more specific about remaining issues
+        new_state.description = self._generate_focused_description(
+            new_state, feedback.issues
+        )
+        
+        # Update adaptation vector to guide future evolution
+        new_state.adaptation_vector = self._calculate_adaptation_vector(
+            solution, result, feedback
+        )
+        
+        return new_state
+    
+    def _provide_more_guidance(self, solution: str, result: EvaluationResult, feedback: Feedback) -> ProblemState:
+        """
+        Evolve the state to provide more guidance when the solution was not very good.
+        
+        Args:
+            solution: The unsuccessful solution
+            result: The evaluation results
+            feedback: The feedback provided
+            
+        Returns:
+            The evolved problem state with more guidance
+        """
+        # Create a new state based on the current state
+        new_state = copy.deepcopy(self.state)
+        
+        # Increment evolution stage
+        new_state.evolution_stage += 1
+        
+        # Maintain or slightly decrease difficulty
+        current_difficulty_idx = max(0, min(new_state.evolution_stage - 1, 
+                                          len(self.difficulty_progression) - 1) - 1)
+        new_state.difficulty = self.difficulty_progression[current_difficulty_idx]
+        
+        # Add more hints to the code context
+        new_state.code_context["hints"] = self._generate_hints(
+            solution, result, feedback
+        )
+        
+        # Add more detailed information about failing tests
+        if result.test_results:
+            detailed_test_results = {}
+            for test_name, test_result in result.test_results.items():
+                if not test_result["passed"]:
+                    detailed_test_results[test_name] = {
+                        "message": test_result.get("message", "Test failed"),
+                        "expected": test_result.get("expected"),
+                        "actual": test_result.get("actual"),
+                        "hint": self._generate_test_hint(test_name, test_result)
+                    }
+            new_state.code_context["detailed_test_results"] = detailed_test_results
+        
+        # Update the description to include more guidance
+        new_state.description = self._generate_guided_description(
+            new_state, feedback.issues, feedback.suggestions
+        )
+        
+        # Update adaptation vector to guide future evolution
+        new_state.adaptation_vector = self._calculate_adaptation_vector(
+            solution, result, feedback
+        )
+        
+        return new_state
+    
+    def _select_evolution_strategy(self, solution: str, result: EvaluationResult, feedback: Feedback) -> str:
+        """
+        Select an evolution strategy based on the current state and solution.
+        
+        Args:
+            solution: The current solution
+            result: The evaluation results
+            feedback: The feedback provided
+            
+        Returns:
+            The selected evolution strategy
+        """
+        available_strategies = self.evolution_strategies.copy()
+        
+        # Weight the strategies based on the current state
+        weights = {}
+        
+        # Prefer adding subtle bugs if the solution is very good
+        if result.score > 0.95:
+            weights["add_subtle_bug"] = 0.6
+            weights["change_requirements"] = 0.3
+            weights["increase_complexity"] = 0.1
+        
+        # Prefer changing requirements if we've already added several bugs
+        elif self.state.evolution_stage >= 2 and "bug_count" in self.state.code_context and self.state.code_context["bug_count"] >= 3:
+            weights["add_subtle_bug"] = 0.1
+            weights["change_requirements"] = 0.7
+            weights["increase_complexity"] = 0.2
+            
+        # Prefer increasing complexity if the solution is good but not perfect
+        elif result.score > 0.85:
+            weights["add_subtle_bug"] = 0.2
+            weights["change_requirements"] = 0.2
+            weights["increase_complexity"] = 0.6
+            
+        # Default to equal weights
+        else:
+            weights = {strategy: 1.0 / len(available_strategies) 
+                      for strategy in available_strategies}
+        
+        # Normalize weights for available strategies
+        total_weight = sum(weights.get(strategy, 0) for strategy in available_strategies)
+        normalized_weights = [weights.get(strategy, 0) / total_weight 
+                             for strategy in available_strategies]
+        
+        # Select a strategy based on weights
+        return random.choices(available_strategies, weights=normalized_weights)[0]
+    
+    def _add_subtle_bug(self, state: ProblemState, solution: str) -> None:
+        """
+        Add a subtle bug to the solution code.
+        
+        Args:
+            state: The problem state to modify
+            solution: The current solution
+        """
+        # Parse the solution to find potential bug insertion points
+        try:
+            parsed_solution = ast.parse(solution)
+        except SyntaxError:
+            # If we can't parse the solution, just add a syntax error
+            self._add_syntax_error(state, solution)
+            return
+        
+        # Choose a bug category based on available categories
+        available_categories = list(self.bug_categories)
+        if available_categories:
+            bug_category = random.choice(available_categories)
+        else:
+            bug_category = BugCategory.LOGICAL
+        
+        # Add a bug based on the selected category
+        if bug_category == BugCategory.SYNTAX:
+            self._add_syntax_error(state, solution)
+        elif bug_category == BugCategory.LOGICAL:
+            self._add_logical_error(state, solution, parsed_solution)
+        elif bug_category == BugCategory.PERFORMANCE:
+            self._add_performance_issue(state, solution, parsed_solution)
+        elif bug_category == BugCategory.EDGE_CASE:
+            self._add_edge_case_issue(state, solution, parsed_solution)
+        else:
+            # Default to logical error
+            self._add_logical_error(state, solution, parsed_solution)
+        
+        # Update bug count in code context
+        if "bug_count" not in state.code_context:
+            state.code_context["bug_count"] = 0
+        state.code_context["bug_count"] += 1
+        
+        # Add the bug category to the context
+        if "bug_categories" not in state.code_context:
+            state.code_context["bug_categories"] = []
+        state.code_context["bug_categories"].append(bug_category)
+    
+    def _change_requirements(self, state: ProblemState, solution: str) -> None:
+        """
+        Change the requirements to challenge the current solution.
+        
+        Args:
+            state: The problem state to modify
+            solution: The current solution
+        """
+        # Get the current requirements
+        requirements = state.requirements
+        
+        # Add a new requirement
+        new_requirement = self._generate_new_requirement(state, solution)
+        if new_requirement:
+            requirements.append(new_requirement)
+        
+        # Modify an existing requirement if possible
+        if requirements and random.random() < 0.5:
+            idx = random.randint(0, len(requirements) - 1)
+            requirements[idx] = self._modify_requirement(requirements[idx], state, solution)
+    
+    def _increase_complexity(self, state: ProblemState, solution: str) -> None:
+        """
+        Increase the complexity of the task.
+        
+        Args:
+            state: The problem state to modify
+            solution: The current solution
+        """
+        # Parse the solution if possible
+        try:
+            parsed_solution = ast.parse(solution)
+        except SyntaxError:
+            # If we can't parse the solution, make a simpler change
+            self._add_edge_case_requirement(state)
+            return
+        
+        # Choose a complexity increase strategy
+        strategies = [
+            "add_edge_cases",
+            "increase_data_volume",
+            "add_performance_constraint",
+            "expand_functionality"
+        ]
+        
+        strategy = random.choice(strategies)
+        
+        if strategy == "add_edge_cases":
+            self._add_edge_case_requirement(state)
+        elif strategy == "increase_data_volume":
+            self._increase_data_volume(state, solution)
+        elif strategy == "add_performance_constraint":
+            self._add_performance_constraint(state, solution)
+        elif strategy == "expand_functionality":
+            self._expand_functionality(state, solution)
+    
+    def _create_test_files(self, temp_path: Path) -> List[Path]:
+        """
+        Create test files based on the current problem state.
+        
+        Args:
+            temp_path: The temporary directory path
+            
+        Returns:
+            List of test file paths
+        """
+        test_files = []
+        
+        # Create test files from the code context
+        if "tests" in self.state.code_context:
+            for i, test in enumerate(self.state.code_context["tests"]):
+                test_file = temp_path / f"test_{i}.py"
+                with open(test_file, "w") as f:
+                    f.write(test["content"])
+                test_files.append(test_file)
+        
+        # Create a default test file if no tests are specified
+        if not test_files:
+            test_file = temp_path / "test_default.py"
+            with open(test_file, "w") as f:
+                f.write(self._generate_default_test())
+            test_files.append(test_file)
+        
+        return test_files
+    
+    def _calculate_score(self, results: Dict[str, Any]) -> float:
+        """
+        Calculate a score based on test results.
+        
+        Args:
+            results: The test results
+            
+        Returns:
+            A score between 0 and 1
+        """
+        # Base score on test results
+        if results["total_tests"] == 0:
+            test_score = 0.0
+        else:
+            test_score = results["passed_tests"] / results["total_tests"]
+        
+        # Adjust for execution success
+        execution_score = 1.0 if results["execution"]["success"] else 0.0
+        
+        # Combine scores with weights
+        weights = self.config.get("score_weights", {"test": 0.7, "execution": 0.3})
+        score = (test_score * weights["test"] + execution_score * weights["execution"])
+        
+        # Apply difficulty modifier
+        difficulty_modifier = 1.0 + (self.state.difficulty * 0.2)
+        score = score / difficulty_modifier
+        
+        return max(0.0, min(1.0, score))
+    
+    def _calculate_complexity(self, code: str) -> float:
+        """
+        Calculate the complexity of code.
+        
+        Args:
+            code: The code to analyze
+            
+        Returns:
+            A complexity score
+        """
+        # Simple cyclomatic complexity estimation
+        complexity = 1
+        
+        # Count control flow statements
+        for pattern in ["if", "for", "while", "and", "or"]:
+            complexity += code.count(f" {pattern} ")
+        
+        # Count function definitions
+        complexity += code.count("def ")
+        
+        # Normalize to 0-1 range
+        normalized = min(1.0, complexity / 50.0)
+        
+        return normalized
+    
+    def _generate_suggestion_for_test_failure(
+        self,
+        issue: Dict[str, Any],
+        solution: str,
+        test_results: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Generate a suggestion for a test failure.
+        
+        Args:
+            issue: The issue data
+            solution: The solution code
+            test_results: The test results
+            
+        Returns:
+            A suggestion dictionary
+        """
+        test_name = issue["test"]
+        test_result = test_results[test_name]
+        
+        # Extract relevant parts of the test
+        test_content = None
+        for test in self.state.code_context.get("tests", []):
+            if test.get("name") == test_name:
+                test_content = test.get("content")
+                break
+        
+        if test_content:
+            # Try to extract the assertion that failed
+            assertion_match = re.search(r"assert.*", test_content)
+            assertion = assertion_match.group(0) if assertion_match else None
+            
+            # Look for function names in both test and solution
+            test_funcs = re.findall(r"def\s+(\w+)", test_content)
+            solution_funcs = re.findall(r"def\s+(\w+)", solution)
+            
+            # Find functions in test that aren't in solution
+            missing_funcs = [f for f in test_funcs if f not in solution_funcs]
+            
+            if missing_funcs:
+                return {
+                    "type": "missing_function",
+                    "message": f"Implement the missing function(s): {', '.join(missing_funcs)}",
+                    "functions": missing_funcs
+                }
+            elif assertion:
+                return {
+                    "type": "fix_assertion_failure",
+                    "message": f"Fix the code to pass the assertion: {assertion}",
+                    "assertion": assertion,
+                    "expected": test_result.get("expected"),
+                    "actual": test_result.get("actual")
+                }
+            else:
+                return {
+                    "type": "fix_test_failure",
+                    "message": f"Fix the code to pass the test: {test_name}",
+                    "test_name": test_name
+                }
+        else:
+            return {
+                "type": "general_fix",
+                "message": f"Fix the code to pass the failing test: {test_name}"
+            }
+    
+    def _generate_suggestion_for_error(
+        self,
+        issue: Dict[str, Any],
+        solution: str
+    ) -> Dict[str, Any]:
+        """
+        Generate a suggestion for an error.
+        
+        Args:
+            issue: The issue data
+            solution: The solution code
+            
+        Returns:
+            A suggestion dictionary
+        """
+        error_type = issue["error_type"]
+        message = issue["message"]
+        location = issue.get("location")
+        
+        if error_type == "syntax":
+            return {
+                "type": "fix_syntax",
+                "message": f"Fix the syntax error: {message}",
+                "location": location
+            }
+        elif error_type == "runtime":
+            return {
+                "type": "fix_runtime_error",
+                "message": f"Fix the runtime error: {message}",
+                "location": location
+            }
+        else:
+            return {
+                "type": "fix_error",
+                "message": f"Fix the error: {message}",
+                "error_type": error_type,
+                "location": location
+            }
+    
+    def _determine_focus_areas(
+        self,
+        issues: List[Dict[str, Any]],
+        solution: str,
+        result: EvaluationResult
+    ) -> List[str]:
+        """
+        Determine focus areas based on issues and results.
+        
+        Args:
+            issues: The identified issues
+            solution: The solution code
+            result: The evaluation results
+            
+        Returns:
+            List of focus areas
+        """
+        focus_areas = []
+        
+        # Check for syntax issues
+        syntax_issues = [i for i in issues if i.get("error_type") == "syntax"]
+        if syntax_issues:
+            focus_areas.append("syntax")
+        
+        # Check for failing tests
+        test_issues = [i for i in issues if i["type"] == "test_failure"]
+        if test_issues:
+            if any("expected" in i and "actual" in i for i in test_issues):
+                focus_areas.append("logic")
+            else:
+                focus_areas.append("functionality")
+        
+        # Check for performance issues
+        if result.metrics and "execution_time" in result.metrics:
+            if result.metrics["execution_time"] > self.config.get("performance_threshold", 1.0):
+                focus_areas.append("performance")
+        
+        # Check for complexity issues
+        if result.metrics and "code_complexity" in result.metrics:
+            if result.metrics["code_complexity"] > self.config.get("complexity_threshold", 0.7):
+                focus_areas.append("complexity")
+        
+        # Default focus area if none were identified
+        if not focus_areas:
+            focus_areas.append("general")
+        
+        return focus_areas
+    
+    def _generate_adaptation_hints(
+        self,
+        solution: str,
+        result: EvaluationResult
+    ) -> List[Dict[str, Any]]:
+        """
+        Generate hints about how the problem might adapt in the next iteration.
+        
+        Args:
+            solution: The solution code
+            result: The evaluation results
+            
+        Returns:
+            List of adaptation hints
+        """
+        hints = []
+        
+        # Hint about potential complexity increases
+        if result.score > 0.8:
+            hints.append({
+                "type": "complexity_increase",
+                "message": "The problem may become more complex in the next iteration."
+            })
+        
+        # Hint about potential requirement changes
+        if result.score > 0.9 and self.state.evolution_stage >= 1:
+            hints.append({
+                "type": "requirement_change",
+                "message": "The requirements may change in the next iteration."
+            })
+        
+        # Hint about potential bug additions
+        if result.score > 0.95:
+            hints.append({
+                "type": "new_bugs",
+                "message": "New, more subtle bugs may be introduced in the next iteration."
+            })
+        
+        # Hint about focus on specific areas
+        if result.score > 0.7 and result.score < 0.95:
+            focus_areas = result.metrics.get("focus_areas", [])
+            if focus_areas:
+                hints.append({
+                    "type": "focus_shift",
+                    "message": f"The next iteration may focus more on: {', '.join(focus_areas)}",
+                    "areas": focus_areas
+                })
+        
+        return hints
+    
+    def _generate_description(self, state: ProblemState) -> str:
+        """
+        Generate a description for the current problem state.
+        
+        Args:
+            state: The problem state
+            
+        Returns:
+            A descriptive prompt for the problem
+        """
+        # Base description
+        base_desc = (
+            f"Fix the bug(s) in the following code. "
+            f"This is iteration {state.evolution_stage + 1} of the task."
+        )
+        
+        # Add information about known bug categories
+        if "bug_categories" in state.code_context:
+            categories = state.code_context["bug_categories"]
+            if categories:
+                base_desc += f"\n\nThe code contains the following types of issues: {', '.join(categories)}."
+        
+        # Add requirements
+        if state.requirements:
+            base_desc += "\n\nRequirements:"
+            for i, req in enumerate(state.requirements):
+                base_desc += f"\n{i+1}. {req['description']}"
+                
+        # Add information about difficulty
+        difficulty_desc = "easy"
+        if state.difficulty > 0.3 and state.difficulty <= 0.6:
+            difficulty_desc = "moderate"
+        elif state.difficulty > 0.6 and state.difficulty <= 0.8:
+            difficulty_desc = "challenging"
+        elif state.difficulty > 0.8:
+            difficulty_desc = "very challenging"
+        
+        base_desc += f"\n\nThis is a {difficulty_desc} bug fixing task."
+        
+        return base_desc
+    
+    def _generate_focused_description(self, state: ProblemState, issues: List[Dict[str, Any]]) -> str:
+        """
+        Generate a description focused on remaining issues.
+        
+        Args:
+            state: The problem state
+            issues: The identified issues
+            
+        Returns:
+            A descriptive prompt focused on remaining issues
+        """
+        base_desc = self._generate_description(state)
+        
+        # Add focus on remaining issues
+        if issues:
+            base_desc += "\n\nFocus on the following issues:"
+            for i, issue in enumerate(issues):
+                if issue["type"] == "test_failure":
+                    base_desc += f"\n{i+1}. Test failure in '{issue['test']}': {issue['message']}"
+                else:
+                    base_desc += f"\n{i+1}. {issue['error_type']} error: {issue['message']}"
+        
+        # Add focus areas if present
+        if "focus_areas" in state.code_context:
+            areas = state.code_context["focus_areas"]
+            if areas:
+                base_desc += f"\n\nPay particular attention to: {', '.join(areas)}."
+        
+        return base_desc
+    
+    def _generate_guided_description(
+        self,
+        state: ProblemState,
+        issues: List[Dict[str, Any]],
+        suggestions: List[Dict[str, Any]]
+    ) -> str:
+        """
+        Generate a description with added guidance.
+        
+        Args:
+            state: The problem state
+            issues: The identified issues
+            suggestions: The suggested fixes
+            
+        Returns:
+            A descriptive prompt with added guidance
+        """
+        base_desc = self._generate_description(state)
+        
+        # Add detailed information about issues
+        if issues:
+            base_desc += "\n\nThe following issues were identified in your previous solution:"
+            for i, issue in enumerate(issues):
+                if issue["type"] == "test_failure":
+                    base_desc += f"\n{i+1}. Test failure in '{issue['test']}': {issue['message']}"
+                    if "expected" in issue and "actual" in issue:
+                        base_desc += f"\n   Expected: {issue['expected']}"
+                        base_desc += f"\n   Actual: {issue['actual']}"
+                else:
+                    base_desc += f"\n{i+1}. {issue['error_type']} error: {issue['message']}"
+                    if "location" in issue:
+                        base_desc += f"\n   Location: {issue['location']}"
+        
+        # Add suggestions
+        if suggestions:
+            base_desc += "\n\nConsider the following suggestions:"
+            for i, suggestion in enumerate(suggestions):
+                base_desc += f"\n{i+1}. {suggestion['message']}"
+        
+        # Add hints if present
+        if "hints" in state.code_context:
+            hints = state.code_context["hints"]
+            if hints:
+                base_desc += "\n\nHints:"
+                for i, hint in enumerate(hints):
+                    base_desc += f"\n{i+1}. {hint}"
+        
+        return base_desc
+    
+    def _generate_hints(
+        self,
+        solution: str,
+        result: EvaluationResult,
+        feedback: Feedback
+    ) -> List[str]:
+        """
+        Generate hints based on the solution and feedback.
+        
+        Args:
+            solution: The solution code
+            result: The evaluation results
+            feedback: The feedback provided
+            
+        Returns:
+            List of hints
+        """
+        hints = []
+        
+        # Add hints based on failing tests
+        if result.test_results:
+            failing_tests = [
+                test_name for test_name, test_result in result.test_results.items()
+                if not test_result["passed"]
+            ]
+            
+            if failing_tests:
+                test_hint = "Focus on fixing the failing tests"
+                
+                # Add specific information about test expectations if available
+                for test_name in failing_tests[:2]:  # Limit to first two tests
+                    test_result = result.test_results[test_name]
+                    if "expected" in test_result and "actual" in test_result:
+                        test_hint += f". For test '{test_name}', expected '{test_result['expected']}' but got '{test_result['actual']}'"
+                
+                hints.append(test_hint + ".")
+        
+        # Add hints based on errors
+        if result.error_details:
+            for error_type, error_info in result.error_details.items():
+                hints.append(f"Fix the {error_type} error: {error_info.get('message', 'Unknown error')}.")
+        
+        # Add hints based on focus areas
+        for area in feedback.focus_areas:
+            if area == "syntax":
+                hints.append("Check your syntax carefully, especially parentheses, indentation, and function definitions.")
+            elif area == "logic":
+                hints.append("Review the logic of your solution, especially conditional statements and loop conditions.")
+            elif area == "functionality":
+                hints.append("Ensure your solution implements all required functionality specified in the tests.")
+            elif area == "performance":
+                hints.append("Consider optimizing your solution for better performance, avoid unnecessary operations.")
+            elif area == "complexity":
+                hints.append("Try to simplify your solution, it may be more complex than necessary.")
+        
+        return hints
+    
+    def _generate_test_hint(self, test_name: str, test_result: Dict[str, Any]) -> str:
+        """
+        Generate a hint for a specific failing test.
+        
+        Args:
+            test_name: The name of the test
+            test_result: The test result
+            
+        Returns:
+            A hint for the test
+        """
+        if "expected" in test_result and "actual" in test_result:
+            return f"The test expected '{test_result['expected']}' but got '{test_result['actual']}'"
+        elif "message" in test_result:
+            return test_result["message"]
+        else:
+            return "The test failed, but no detailed information is available."
+    
+    def _add_syntax_error(self, state: ProblemState, solution: str) -> None:
+        """
+        Add a syntax error to the solution code.
+        
+        Args:
+            state: The problem state to modify
+            solution: The current solution
+        """
+        lines = solution.split('\n')
+        if not lines:
+            return
+        
+        # Choose a line to modify
+        idx = random.randint(0, len(lines) - 1)
+        line = lines[idx]
+        
+        # Skip empty lines or comment lines
+        while not line.strip() or line.strip().startswith('#'):
+            idx = random.randint(0, len(lines) - 1)
+            line = lines[idx]
+        
+        # Choose a modification type
+        mod_type = random.choice([
+            "remove_character",
+            "add_character",
+            "swap_characters",
+            "change_indent"
+        ])
+        
+        if mod_type == "remove_character" and line:
+            char_idx = random.randint(0, len(line) - 1)
+            lines[idx] = line[:char_idx] + line[char_idx+1:]
+        
+        elif mod_type == "add_character":
+            char_idx = random.randint(0, len(line))
+            char = random.choice(["(", ")", "{", "}", "[", "]", ":", ";", ",", "."])
+            lines[idx] = line[:char_idx] + char + line[char_idx:]
+        
+        elif mod_type == "swap_characters" and len(line) >= 2:
+            char_idx = random.randint(0, len(line) - 2)
+            lines[idx] = (line[:char_idx] + line[char_idx+1] + 
+                         line[char_idx] + line[char_idx+2:])
+        
+        elif mod_type == "change_indent":
+            # Either add or remove indentation
+            if line.startswith("    "):
+                lines[idx] = line[2:]  # Remove some indent
+            else:
+                lines[idx] = "  " + line  # Add inconsistent indent
+        
+        # Update the code
+        modified_code = '\n'.join(lines)
+        state.code_context["code"] = modified_code
+        
+        # Add information about the modification
+        if "bugs" not in state.code_context:
+            state.code_context["bugs"] = []
+        
+        state.code_context["bugs"].append({
+            "type": "syntax",
+            "line": idx + 1,
+            "description": f"Syntax error introduced in line {idx + 1}"
+        })
+    
+    def _add_logical_error(self, state: ProblemState, solution: str, parsed_solution: ast.Module) -> None:
+        """
+        Add a logical error to the solution code.
+        
+        Args:
+            state: The problem state to modify
+            solution: The current solution
+            parsed_solution: The parsed AST of the solution
+        """
+        modification_types = [
+            "change_comparison",
+            "invert_condition",
+            "off_by_one",
+            "change_operator",
+            "reverse_logic"
+        ]
+        
+        mod_type = random.choice(modification_types)
+        lines = solution.split('\n')
+        
+        # Find all if statements and loops
+        if_statements = []
+        for i, line in enumerate(lines):
+            if re.search(r'\bif\b|\bwhile\b|\bfor\b', line):
+                if_statements.append((i, line))
+        
+        if if_statements:
+            # Choose an if statement to modify
+            idx, line = random.choice(if_
+# recursive_swe_bench/task_generators/bug_fixing.py (continued)
+
+        if if_statements:
+            # Choose an if statement to modify
+            idx, line = random.choice(if_statements)
+            
+            if mod_type == "change_comparison":
+                # Change comparison operators
+                comparisons = {"==": "!=", "!=": "==", ">": "<", "<": ">", ">=": "<=", "<=": ">="}
+                for op, new_op in comparisons.items():
+                    if op in line:
+                        lines[idx] = line.replace(op, new_op, 1)
+                        break
+            
+            elif mod_type == "invert_condition":
+                # Add or remove a "not" to invert the condition
+                if "not" in line:
+                    lines[idx] = line.replace("not ", "", 1)
+                else:
+                    match = re.search(r'(if|while)\s+([^:]+):', line)
+                    if match:
+                        condition = match.group(2)
+                        lines[idx] = line.replace(condition, f"not ({condition})", 1)
+            
+            elif mod_type == "off_by_one":
+                # Introduce an off-by-one error
+                for op in ["+", "-"]:
+                    if op in line:
+                        # If there's a number after the operator, change it
+                        match = re.search(f'\\{op}\\s*(\\d+)', line)
+                        if match:
+                            num = int(match.group(1))
+                            new_num = num + 1 if op == "+" else max(0, num - 1)
+                            lines[idx] = line.replace(f"{op} {num}", f"{op} {new_num}", 1)
+                            break
+            
+            elif mod_type == "change_operator":
+                # Change arithmetic or logical operators
+                operators = {"+": "-", "-": "+", "*": "/", "/": "*", "and": "or", "or": "and"}
+                for op, new_op in operators.items():
+                    if f" {op} " in line:
+                        lines[idx] = line.replace(f" {op} ", f" {new_op} ", 1)
+                        break
+            
+            elif mod_type == "reverse_logic":
+                # Reverse the logic of a compound condition
+                if " and " in line:
+                    parts = line.split(" and ")
+                    lines[idx] = line.replace(" and ".join(parts), " or ".join(parts), 1)
+                elif " or " in line:
+                    parts = line.split(" or ")
+                    lines[idx] = line.replace(" or ".join(parts), " and ".join(parts), 1)
+        
+        else:
+            # If no if statements found, introduce a different kind of logical error
+            # Find variable assignments
+            assignments = []
+            for i, line in enumerate(lines):
+                if "=" in line and "==" not in line and "!=" not in line:
+                    assignments.append((i, line))
+            
+            if assignments:
+                # Choose an assignment to modify
+                idx, line = random.choice(assignments)
+                
+                # Modify the assignment
+                if "+" in line:
+                    lines[idx] = line.replace("+", "-", 1)
+                elif "-" in line:
+                    lines[idx] = line.replace("-", "+", 1)
+                elif "*" in line:
+                    lines[idx] = line.replace("*", "/", 1)
+                elif "/" in line:
+                    lines[idx] = line.replace("/", "*", 1)
+                else:
+                    # If no arithmetic operator, change the value
+                    match = re.search(r'=\s*(\d+)', line)
+                    if match:
+                        num = int(match.group(1))
+                        new_num = num + random.choice([-1, 1]) * random.randint(1, 3)
+                        lines[idx] = line.replace(f"= {num}", f"= {new_num}", 1)
+        
+        # Update the code
+        modified_code = '\n'.join(lines)
+        state.code_context["code"] = modified_code
+        
+        # Add information about the modification
+        if "bugs" not in state.code_context:
+            state.code_context["bugs"] = []
+        
+        state.code_context["bugs"].append({
+            "type": "logical",
+            "line": idx + 1,
+            "description": f"Logical error introduced in line {idx + 1}: {mod_type}"
+        })
+    
+    def _add_performance_issue(self, state: ProblemState, solution: str, parsed_solution: ast.Module) -> None:
+        """
+        Add a performance issue to the solution code.
+        
+        Args:
+            state: The problem state to modify
+            solution: The current solution
+            parsed_solution: The parsed AST of the solution
+        """
+        lines = solution.split('\n')
+        
+        # Find loops in the code
+        loops = []
+        for i, line in enumerate(lines):
+            if re.search(r'\bfor\b|\bwhile\b', line):
+                loops.append((i, line))
+        
+        if loops:
+            # Choose a loop to modify
+            idx, line = random.choice(loops)
+            
+            # Choose a modification type
+            mod_type = random.choice([
+                "add_nested_loop",
+                "replace_efficient_operation",
+                "add_redundant_computation"
+            ])
+            
+            if mod_type == "add_nested_loop":
+                # Add a nested loop
+                indent = len(line) - len(line.lstrip())
+                indent_str = ' ' * indent
+                loop_body_indent = indent_str + '    '
+                
+                # Find the next line with the same indentation or less
+                end_idx = idx + 1
+                while end_idx < len(lines) and (not lines[end_idx].strip() or len(lines[end_idx]) - len(lines[end_idx].lstrip()) > indent):
+                    end_idx += 1
+                
+                # Insert a nested loop before the end of the current loop
+                insert_pos = end_idx
+                lines.insert(insert_pos, f"{loop_body_indent}for _ in range(100):  # Unnecessary loop")
+                lines.insert(insert_pos + 1, f"{loop_body_indent}    pass")
+            
+            elif mod_type == "replace_efficient_operation":
+                # Replace an efficient operation with a less efficient one
+                # Look for list comprehensions or efficient operations
+                for i in range(idx + 1, min(idx + 10, len(lines))):
+                    if "append" in lines[i] or "extend" in lines[i]:
+                        indent = len(lines[i]) - len(lines[i].lstrip())
+                        indent_str = ' ' * indent
+                        match = re.search(r'(\w+)\.(append|extend)', lines[i])
+                        if match:
+                            list_name = match.group(1)
+                            operation = match.group(2)
+                            item = lines[i].split(f"{list_name}.{operation}(")[1].split(")")[0]
+                            
+                            if operation == "append":
+                                # Replace append with concatenation
+                                lines[i] = f"{indent_str}{list_name} = {list_name} + [{item}]  # Less efficient than append"
+                            elif operation == "extend":
+                                # Replace extend with concatenation
+                                lines[i] = f"{indent_str}{list_name} = {list_name} + {item}  # Less efficient than extend"
+                            break
+            
+            elif mod_type == "add_redundant_computation":
+                # Add redundant computation inside the loop
+                # Find the indentation level of the loop body
+                if idx + 1 < len(lines):
+                    body_indent = len(lines[idx + 1]) - len(lines[idx + 1].lstrip())
+                    body_indent_str = ' ' * body_indent
+                    
+                    # Add redundant computation
+                    lines.insert(idx + 1, f"{body_indent_str}temp = []  # Redundant computation")
+                    lines.insert(idx + 2, f"{body_indent_str}for i in range(1000):")
+                    lines.insert(idx + 3, f"{body_indent_str}    temp.append(i)")
+                    lines.insert(idx + 4, f"{body_indent_str}    temp.sort()  # Unnecessary sort in each iteration")
+        
+        else:
+            # If no loops found, introduce inefficient data structure or algorithm
+            function_defs = []
+            for i, line in enumerate(lines):
+                if line.strip().startswith("def "):
+                    function_defs.append((i, line))
+            
+            if function_defs:
+                # Choose a function to modify
+                idx, line = random.choice(function_defs)
+                
+                # Find the indentation level of the function body
+                if idx + 1 < len(lines):
+                    body_indent = len(lines[idx + 1]) - len(lines[idx + 1].lstrip())
+                    body_indent_str = ' ' * body_indent
+                    
+                    # Add inefficient code at the beginning of the function
+                    lines.insert(idx + 1, f"{body_indent_str}# Inefficient data structure usage")
+                    lines.insert(idx + 2, f"{body_indent_str}data = []")
+                    lines.insert(idx + 3, f"{body_indent_str}for i in range(1000):")
+                    lines.insert(idx + 4, f"{body_indent_str}    data.append(i)")
+                    lines.insert(idx + 5, f"{body_indent_str}    # Inefficient search operation")
+                    lines.insert(idx + 6, f"{body_indent_str}    if i in data:  # Linear search instead of using a set")
+                    lines.insert(idx + 7, f"{body_indent_str}        pass")
+        
+        # Update the code
+        modified_code = '\n'.join(lines)
+        state.code_context["code"] = modified_code
+        
+        # Add information about the modification
+        if "bugs" not in state.code_context:
+            state.code_context["bugs"] = []
+        
+        state.code_context["bugs"].append({
+            "type": "performance",
+            "line": idx + 1,
+            "description": f"Performance issue introduced around line {idx + 1}"
+        })
+    
+    def _add_edge_case_issue(self, state: ProblemState, solution: str, parsed_solution: ast.Module) -> None:
+        """
+        Add an edge case issue to the solution code.
+        
+        Args:
+            state: The problem state to modify
+            solution: The current solution
+            parsed_solution: The parsed AST of the solution
+        """
+        lines = solution.split('\n')
+        
+        # Find functions in the code
+        functions = []
+        current_func = None
+        func_start = None
+        for i, line in enumerate(lines):
+            if line.strip().startswith("def "):
+                if current_func:
+                    functions.append((func_start, i - 1, current_func))
+                current_func = line.strip()[4:].split("(")[0]
+                func_start = i
+            elif i == len(lines) - 1 and current_func:
+                functions.append((func_start, i, current_func))
+        
+        if functions:
+            # Choose a function to modify
+            start_idx, end_idx, func_name = random.choice(functions)
+            
+            # Choose a modification type
+            mod_type = random.choice([
+                "remove_boundary_check",
+                "introduce_zero_division",
+                "handling_empty_input",
+                "type_assumption"
+            ])
+            
+            if mod_type == "remove_boundary_check":
+                # Find and remove or modify boundary checks
+                for i in range(start_idx, end_idx + 1):
+                    if re.search(r'if\s+.*(?:len|count|size|length|empty|<=|>=|<|>|\!=)', lines[i]):
+                        # Comment out the boundary check
+                        lines[i] = f"# {lines[i]}  # Boundary check removed"
+                        # Skip the body of the if statement
+                        j = i + 1
+                        indent = len(lines[i]) - len(lines[i].lstrip())
+                        body_indent = indent + 4
+                        while j <= end_idx and (not lines[j].strip() or len(lines[j]) - len(lines[j].lstrip()) >= body_indent):
+                            lines[j] = f"# {lines[j]}"
+                            j += 1
+                        break
+            
+            elif mod_type == "introduce_zero_division":
+                # Find division operations and modify them
+                for i in range(start_idx, end_idx + 1):
+                    if "/" in lines[i] and "try" not in lines[i] and "except" not in lines[i]:
+                        # Remove denominator check if it exists
+                        if re.search(r'if\s+.*(?:!=\s*0|>\s*0)', lines[i]):
+                            lines[i] = f"# {lines[i]}  # Denominator check removed"
+                        else:
+                            # Or modify a division to potentially cause zero division
+                            match = re.search(r'(\w+)\s*/\s*(\w+)', lines[i])
+                            if match:
+                                denominator = match.group(2)
+                                # Add a potential zero value for the denominator
+                                indent = len(lines[i]) - len(lines[i].lstrip())
+                                indent_str = ' ' * indent
+                                lines.insert(i, f"{indent_str}if random.random() < 0.1:  # Introduce potential zero division")
+                                lines.insert(i + 1, f"{indent_str}    {denominator} = 0")
+                                break
+            
+            elif mod_type == "handling_empty_input":
+                # Modify parameter handling to not handle empty inputs correctly
+                params = re.search(r'def\s+\w+\s*\((.*?)\)', lines[start_idx])
+                if params and params.group(1):
+                    param_list = [p.strip() for p in params.group(1).split(",")]
+                    if param_list:
+                        param = param_list[0].split("=")[0].strip()
+                        # Find checks for the parameter
+                        for i in range(start_idx + 1, end_idx + 1):
+                            if re.search(rf'if\s+.*(?:not\s+{param}|len\s*\(\s*{param}\s*\)\s*==\s*0)', lines[i]):
+                                # Comment out the empty check
+                                lines[i] = f"# {lines[i]}  # Empty input check removed"
+                                # Skip the body of the if statement
+                                j = i + 1
+                                indent = len(lines[i]) - len(lines[i].lstrip())
+                                body_indent = indent + 4
+                                while j <= end_idx and (not lines[j].strip() or len(lines[j]) - len(lines[j].lstrip()) >= body_indent):
+                                    lines[j] = f"# {lines[j]}"
+                                    j += 1
+                                break
+            
+            elif mod_type == "type_assumption":
+                # Introduce assumptions about parameter types
+                params = re.search(r'def\s+\w+\s*\((.*?)\)', lines[start_idx])
+                if params and params.group(1):
+                    param_list = [p.strip() for p in params.group(1).split(",")]
+                    if param_list:
+                        param = param_list[0].split("=")[0].strip()
+                        # Find type checks for the parameter
+                        type_check_found = False
+                        for i in range(start_idx + 1, end_idx + 1):
+                            if re.search(rf'(?:isinstance|type)\s*\(\s*{param}\s*,', lines[i]):
+                                # Comment out the type check
+                                lines[i] = f"# {lines[i]}  # Type check removed"
+                                type_check_found = True
+                                break
+                        
+                        if not type_check_found:
+                            # Add a problematic type assumption
+                            indent = 4  # Assume basic indentation
+                            for i in range(start_idx + 1, min(start_idx + 5, end_idx + 1)):
+                                if lines[i].strip():
+                                    indent = len(lines[i]) - len(lines[i].lstrip())
+                                    break
+                            
+                            indent_str = ' ' * indent
+                            # Add code that assumes a specific type
+                            lines.insert(start_idx + 1, f"{indent_str}# Assuming {param} is a specific type without checking")
+                            lines.insert(start_idx + 2, f"{indent_str}{param}_length = len({param})  # Will fail if {param} doesn't support len()")
+        
+        # Update the code
+        modified_code = '\n'.join(lines)
+        state.code_context["code"] = modified_code
+        
+        # Add information about the modification
+        if "bugs" not in state.code_context:
+            state.code_context["bugs"] = []
+        
+        state.code_context["bugs"].append({
+            "type": "edge_case",
+            "line": start_idx + 1,
+            "description": f"Edge case issue introduced in function '{func_name}': {mod_type}"
+        })
+    
+    def _generate_new_requirement(self, state: ProblemState, solution: str) -> Dict[str, Any]:
+        """
+        Generate a new requirement based on the current state and solution.
+        
+        Args:
+            state: The current problem state
+            solution: The current solution
+            
+        Returns:
+            A new requirement dictionary
+        """
+        # Parse the solution to find functions and variables
+        function_names = re.findall(r'def\s+(\w+)', solution)
+        variable_names = re.findall(r'(\w+)\s*=', solution)
+        
+        # Choose a requirement type
+        req_type = random.choice([
+            "edge_case_handling",
+            "performance_improvement",
+            "error_handling",
+            "type_checking",
+            "feature_addition"
+        ])
+        
+        if req_type == "edge_case_handling":
+            if function_names:
+                func_name = random.choice(function_names)
+                edge_cases = [
+                    "empty input",
+                    "negative values",
+                    "zero values",
+                    "extremely large values",
+                    "special characters",
+                    "duplicate values"
+                ]
+                edge_case = random.choice(edge_cases)
+                return {
+                    "type": "edge_case_handling",
+                    "description": f"The function '{func_name}' should handle {edge_case} correctly.",
+                    "difficulty": random.uniform(0.3, 0.7)
+                }
+            
+        elif req_type == "performance_improvement":
+            return {
+                "type": "performance_improvement",
+                "description": "The solution should be optimized to run in O(n) time or better.",
+                "difficulty": random.uniform(0.4, 0.8)
+            }
+            
+        elif req_type == "error_handling":
+            error_types = [
+                "invalid input",
+                "division by zero",
+                "file not found",
+                "network timeout",
+                "permission denied"
+            ]
+            error_type = random.choice(error_types)
+            return {
+                "type": "error_handling",
+                "description": f"The code should handle {error_type} errors gracefully.",
+                "difficulty": random.uniform(0.2, 0.6)
+            }
+            
+        elif req_type == "type_checking":
+            if function_names:
+                func_name = random.choice(function_names)
+                return {
+                    "type": "type_checking",
+                    "description": f"The function '{func_name}' should validate input types before processing.",
+                    "difficulty": random.uniform(0.1, 0.5)
+                }
+            
+        elif req_type == "feature_addition":
+            features = [
+                "logging capability",
+                "progress tracking",
+                "caching for repeated operations",
+                "parameter validation",
+                "configuration options"
+            ]
+            feature = random.choice(features)
+            return {
+                "type": "feature_addition",
+                "description": f"Add {feature} to the solution.",
+                "difficulty": random.uniform(0.3, 0.7)
+            }
+        
+        # Default requirement if none of the above were applicable
+        return {
+            "type": "general_improvement",
+            "description": "Improve the overall code quality and readability.",
+            "difficulty": random.uniform(0.1, 0.4)
+        }
+    
+    def _modify_requirement(self, requirement: Dict[str, Any], state: ProblemState, solution: str) -> Dict[str, Any]:
+        """
+        Modify an existing requirement to make it more challenging.
+        
+        Args:
+            requirement: The requirement to modify
+            state: The current problem state
+            solution: The current solution
+            
+        Returns:
+            The modified requirement
+        """
+        # Make a copy of the requirement
+        modified_req = copy.deepcopy(requirement)
+        
+        # Increase the difficulty
+        modified_req["difficulty"] = min(1.0, requirement.get("difficulty", 0.3) + random.uniform(0.1, 0.3))
+        
+        # Modify the description based on the requirement type
+        if requirement["type"] == "edge_case_handling":
+            modified_req["description"] += " Additionally, it should handle very large inputs efficiently."
+        
+        elif requirement["type"] == "performance_improvement":
+            modified_req["description"] = modified_req["description"].replace("O(n)", "O(log n)")
+        
+        elif requirement["type"] == "error_handling":
+            modified_req["description"] += " And provide detailed error messages for debugging."
+        
+        elif requirement["type"] == "type_checking":
+            modified_req["description"] += " And automatically convert types when possible."
+        
+        elif requirement["type"] == "feature_addition":
+            modified_req["description"] += " Ensure this feature is configurable via parameters."
+        
+        else:
+            modified_req["description"] += " The code should also be well-documented with comments."
+        
+        return modified_req
+    
+    def _add_edge_case_requirement(self, state: ProblemState) -> None:
+        """
+        Add a requirement for handling edge cases.
+        
+        Args:
+            state: The problem state to modify
+        """
+        edge_cases = [
+            "empty collections",
+            "null/None values",
+            "boundary values (min/max)",
+            "negative numbers",
+            "special characters",
+            "Unicode characters",
+            "very large inputs",
+            "malformed input"
+        ]
+        
+        edge_case = random.choice(edge_cases)
+        
+        # Add a new requirement
+        state.requirements.append({
+            "type": "edge_case_handling",
+            "description": f"The solution must correctly handle {edge_case}.",
+            "difficulty": random.uniform(0.3, 0.7)
+        })
+        
+        # Add test cases for the edge case if tests exist
+        if "tests" in state.code_context:
+            # Create a new test for the edge case
+            test_template = self._generate_edge_case_test(edge_case, state.code_context)
+            if test_template:
+                state.code_context["tests"].append({
+                    "name": f"test_edge_case_{len(state.code_context['tests'])}",
+                    "content": test_template,
+                    "description": f"Test handling of {edge_case}"
+                })
+    
+    def _increase_data_volume(self, state: ProblemState, solution: str) -> None:
+        """
+        Modify the problem to require handling larger data volumes.
+        
+        Args:
+            state: The problem state to modify
+            solution: The current solution
+        """
+        # Add a requirement for handling large data
+        state.requirements.append({
+            "type": "scalability",
+            "description": "The solution must efficiently handle large datasets (10,000+ items).",
+            "difficulty": random.uniform(0.5, 0.8)
+        })
+        
+        # Modify existing tests to use larger data if tests exist
+        if "tests" in state.code_context:
+            for i, test in enumerate(state.code_context["tests"]):
+                content = test["content"]
+                
+                # Look for small lists or arrays in tests
+                for pattern, replacement in [
+                    (r'\[[^\]]{0,50}\]', '[random.randint(0, 1000) for _ in range(10000)]'),
+                    (r'range\(\d+\)', 'range(10000)'),
+                    (r'"[^"]{0,20}"', '"' + 'a' * 10000 + '"')
+                ]:
+                    match = re.search(pattern, content)
+                    if match and random.random() < 0.3:  # Only replace some instances
+                        content = content.replace(match.group(0), replacement, 1)
+                        break
+                
+                state.code_context["tests"][i]["content"] = content
+                state.code_context["tests"][i]["description"] = f"{test.get('description', 'Test')} (with large data)"
+    
+    def _add_performance_constraint(self, state: ProblemState, solution: str) -> None:
+        """
+        Add a performance constraint to the problem.
+        
+        Args:
+            state: The problem state to modify
+            solution: The current solution
+        """
+        # Choose a performance constraint
+        constraints = [
+            "linear time complexity (O(n))",
+            "logarithmic time complexity (O(log n))",
+            "constant memory usage (O(1) space)",
+            "execution time under 100ms for large inputs",
+            "minimal function calls"
+        ]
+        
+        constraint = random.choice(constraints)
+        
+        # Add a new requirement
+        state.requirements.append({
+            "type": "performance",
+            "description": f"The solution must achieve {constraint}.",
+            "difficulty": random.uniform(0.6, 0.9)
+        })
+        
+        # Add performance testing code if tests exist
+        if "tests" in state.code_context:
+            # Add a performance test
+            perf_test = self._generate_performance_test(constraint, state.code_context)
+            if perf_test:
+                state.code_context["tests"].append({
+                    "name": f"test_performance_{len(state.code_context['tests'])}",
+                    "content": perf_test,
+                    "description": f"Test {constraint}"
+                })
+    
+    def _expand_functionality(self, state: ProblemState, solution: str) -> None:
+        """
+        Expand the required functionality of the solution.
+        
+        Args:
+            state: The problem state to modify
+            solution: The current solution
+        """
+        # Choose a functionality expansion
+        expansions = [
+            "support for different input types",
+            "parameterized behavior",
+            "additional output formats",
+            "flexible error handling",
+            "integration with external systems"
+        ]
+        
+        expansion = random.choice(expansions)
+        
+        # Add a new requirement
+        state.requirements.append({
+            "type": "functionality",
+            "description": f"Expand the solution to include {expansion}.",
+            "difficulty": random.uniform(0.4, 0.8)
+        })
+        
+        # Add test cases for the new functionality if tests exist
+        if "tests" in state.code_context:
+            # Create a new test for the expanded functionality
+            test_template = self._generate_functionality_test(expansion, state.code_context)
+            if test_template:
+                state.code_context["tests"].append({
+                    "name": f"test_expanded_functionality_{len(state.code_context['tests'])}",
+                    "content": test_template,
+                    "description": f"Test {expansion}"
+                })
+    
+    def _generate_default_test(self) -> str:
+        """
+        Generate a default test based on the current problem state.
+        
+        Returns:
+            A default test script
+        """
+        # Generate a basic test script
+        return """
+import unittest
+import sys
+import os
+
+# Add the directory containing the solution to the path
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+# Import the solution
+from solution import *
+
+class DefaultTest(unittest.TestCase):
+    def test_basic_functionality(self):
+        # A basic test that should pass if the solution is correct
+        self.assertTrue(True, "Basic assertion failed")
+        
+    def test_expected_output(self):
+        # Test expected output of main functions
+        # This will need to be updated based on the specific problem
+        pass
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+    
+    def _generate_edge_case_test(self, edge_case: str, code_context: Dict[str, Any]) -> str:
+        """
+        Generate a test for an edge case.
+        
+        Args:
+            edge_case: The edge case to test
+            code_context: The code context containing information about the problem
+            
+        Returns:
+            A test script for the edge case
+        """
+        # Extract function names from the code context
+        function_names = []
+        if "code" in code_context:
+            function_names = re.findall(r'def\s+(\w+)', code_context["code"])
+        
+        if not function_names:
+            return None
+        
+        # Choose a function to test
+        function_name = random.choice(function_names)
+        
+        # Generate test code based on the edge case
+        if edge_case == "empty collections":
+            return f"""
+import unittest
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import {function_name}
+
+class EmptyCollectionTest(unittest.TestCase):
+    def test_empty_input(self):
+        # Test with empty list
+        result = {function_name}([])
+        self.assertIsNotNone(result, "Function should handle empty list")
+        
+        # Test with empty string
+        result = {function_name}("")
+        self.assertIsNotNone(result, "Function should handle empty string")
+        
+        # Test with empty dict
+        result = {function_name}({{}})
+        self.assertIsNotNone(result, "Function should handle empty dict")
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        elif edge_case == "null/None values":
+            return f"""
+import unittest
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import {function_name}
+
+class NoneValueTest(unittest.TestCase):
+    def test_none_input(self):
+        # Test with None as input
+        result = {function_name}(None)
+        self.assertIsNotNone(result, "Function should handle None input")
+        
+        # Test with list containing None
+        result = {function_name}([1, None, 3])
+        self.assertIsNotNone(result, "Function should handle list with None values")
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        elif edge_case == "boundary values (min/max)":
+            return f"""
+# recursive_swe_bench/task_generators/bug_fixing.py (completion)
+
+import unittest
+import sys
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import {function_name}
+
+class BoundaryValueTest(unittest.TestCase):
+    def test_min_max_values(self):
+        # Test with minimum integer
+        min_int = -sys.maxsize - 1
+        result = {function_name}(min_int)
+        self.assertIsNotNone(result, "Function should handle minimum integer")
+        
+        # Test with maximum integer
+        max_int = sys.maxsize
+        result = {function_name}(max_int)
+        self.assertIsNotNone(result, "Function should handle maximum integer")
+        
+        # Test with very large list
+        large_list = list(range(10000))
+        result = {function_name}(large_list)
+        self.assertIsNotNone(result, "Function should handle very large inputs")
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        elif edge_case == "negative numbers":
+            return f"""
+import unittest
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import {function_name}
+
+class NegativeNumberTest(unittest.TestCase):
+    def test_negative_numbers(self):
+        # Test with negative number
+        result = {function_name}(-1)
+        self.assertIsNotNone(result, "Function should handle negative numbers")
+        
+        # Test with list of negative numbers
+        result = {function_name}([-1, -2, -3])
+        self.assertIsNotNone(result, "Function should handle lists of negative numbers")
+        
+        # Test with mixed positive and negative
+        result = {function_name}([-1, 0, 1])
+        self.assertIsNotNone(result, "Function should handle mixed positive and negative")
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        else:
+            # Generic edge case test
+            return f"""
+import unittest
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import {function_name}
+
+class EdgeCaseTest(unittest.TestCase):
+    def test_edge_case_{edge_case.replace(' ', '_')}(self):
+        # Test edge case: {edge_case}
+        # This is a placeholder test that needs to be customized for the specific edge case
+        self.assertTrue(True, "Edge case test not implemented")
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+    
+    def _generate_performance_test(self, constraint: str, code_context: Dict[str, Any]) -> str:
+        """
+        Generate a performance test based on a constraint.
+        
+        Args:
+            constraint: The performance constraint
+            code_context: The code context containing information about the problem
+            
+        Returns:
+            A test script for the performance constraint
+        """
+        # Extract function names from the code context
+        function_names = []
+        if "code" in code_context:
+            function_names = re.findall(r'def\s+(\w+)', code_context["code"])
+        
+        if not function_names:
+            return None
+        
+        # Choose a function to test
+        function_name = random.choice(function_names)
+        
+        if "time complexity" in constraint:
+            return f"""
+import unittest
+import sys
+import os
+import time
+import random
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import {function_name}
+
+class PerformanceTest(unittest.TestCase):
+    def test_time_complexity(self):
+        # Test for {constraint}
+        sizes = [100, 1000, 10000]
+        times = []
+        
+        for size in sizes:
+            # Generate input of the given size
+            input_data = [random.randint(0, 1000) for _ in range(size)]
+            
+            # Measure execution time
+            start_time = time.time()
+            {function_name}(input_data)
+            end_time = time.time()
+            
+            times.append(end_time - start_time)
+        
+        # Check if time grows appropriately
+        # For O(n), time should grow linearly with input size
+        # For O(log n), time should grow logarithmically
+        # This is a simplified check and might need adjustment
+        if "log n" in "{constraint}":
+            # For logarithmic time, the ratio of times should decrease
+            ratio1 = times[1] / times[0]
+            ratio2 = times[2] / times[1]
+            self.assertLess(ratio2, ratio1 * 1.5, 
+                           f"Growth rate appears super-logarithmic: {times}")
+        else:  # Assume linear or better
+            # For linear time, the ratio of times should be roughly equal to ratio of sizes
+            ratio1 = times[1] / times[0]
+            size_ratio1 = sizes[1] / sizes[0]
+            
+            ratio2 = times[2] / times[1]
+            size_ratio2 = sizes[2] / sizes[1]
+            
+            self.assertLess(ratio1, size_ratio1 * 1.5, 
+                           f"First growth rate appears super-linear: {times}")
+            self.assertLess(ratio2, size_ratio2 * 1.5, 
+                           f"Second growth rate appears super-linear: {times}")
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        elif "execution time" in constraint:
+            return f"""
+import unittest
+import sys
+import os
+import time
+import random
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import {function_name}
+
+class PerformanceTest(unittest.TestCase):
+    def test_execution_time(self):
+        # Test for {constraint}
+        # Generate a large input
+        input_data = [random.randint(0, 1000) for _ in range(10000)]
+        
+        # Measure execution time
+        start_time = time.time()
+        {function_name}(input_data)
+        end_time = time.time()
+        
+        execution_time = (end_time - start_time) * 1000  # Convert to ms
+        
+        self.assertLess(execution_time, 100, 
+                       f"Execution time exceeded 100ms: {execution_time:.2f}ms")
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        elif "memory usage" in constraint:
+            return f"""
+import unittest
+import sys
+import os
+import psutil
+import random
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import {function_name}
+
+class MemoryUsageTest(unittest.TestCase):
+    def test_memory_usage(self):
+        # Test for {constraint}
+        # Note: This is an approximate test and may not be accurate in all environments
+        
+        # Get current process
+        process = psutil.Process(os.getpid())
+        
+        # Measure memory before
+        memory_before = process.memory_info().rss / 1024 / 1024  # MB
+        
+        # Generate a large input
+        input_data = [random.randint(0, 1000) for _ in range(100000)]
+        
+        # Run function
+        {function_name}(input_data)
+        
+        # Measure memory after
+        memory_after = process.memory_info().rss / 1024 / 1024  # MB
+        
+        # Calculate memory usage
+        memory_used = memory_after - memory_before
+        
+        # A crude approximation, adjust as needed
+        self.assertLess(memory_used, 10, 
+                       f"Memory usage seems high: {memory_used:.2f}MB")
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        else:
+            # Generic performance test
+            return f"""
+import unittest
+import sys
+import os
+import time
+import random
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import {function_name}
+
+class PerformanceTest(unittest.TestCase):
+    def test_performance(self):
+        # Test for {constraint}
+        # This is a placeholder test that needs to be customized for the specific constraint
+        
+        # Generate a large input
+        input_data = [random.randint(0, 1000) for _ in range(10000)]
+        
+        # Measure execution time
+        start_time = time.time()
+        {function_name}(input_data)
+        end_time = time.time()
+        
+        execution_time = end_time - start_time
+        
+        # Just log the time for now
+        print(f"Execution time: {execution_time:.4f} seconds")
+        self.assertTrue(True, "Performance test completed")
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+    
+    def _generate_functionality_test(self, expansion: str, code_context: Dict[str, Any]) -> str:
+        """
+        Generate a test for expanded functionality.
+        
+        Args:
+            expansion: The functionality expansion
+            code_context: The code context containing information about the problem
+            
+        Returns:
+            A test script for the expanded functionality
+        """
+        # Extract function names from the code context
+        function_names = []
+        if "code" in code_context:
+            function_names = re.findall(r'def\s+(\w+)', code_context["code"])
+        
+        if not function_names:
+            return None
+        
+        # Choose a function to test
+        function_name = random.choice(function_names)
+        
+        if "different input types" in expansion:
+            return f"""
+import unittest
+import sys
+import os
+import json
+from collections import namedtuple
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import {function_name}
+
+class InputTypesTest(unittest.TestCase):
+    def test_different_input_types(self):
+        # Test with different types of inputs
+        
+        # Test with list
+        list_input = [1, 2, 3]
+        list_result = {function_name}(list_input)
+        self.assertIsNotNone(list_result, "Function should handle list input")
+        
+        # Test with tuple
+        tuple_input = (1, 2, 3)
+        tuple_result = {function_name}(tuple_input)
+        self.assertIsNotNone(tuple_result, "Function should handle tuple input")
+        
+        # Test with set
+        set_input = {{1, 2, 3}}
+        set_result = {function_name}(set_input)
+        self.assertIsNotNone(set_result, "Function should handle set input")
+        
+        # Test with dictionary
+        dict_input = {{"a": 1, "b": 2, "c": 3}}
+        dict_result = {function_name}(dict_input)
+        self.assertIsNotNone(dict_result, "Function should handle dictionary input")
+        
+        # Test with JSON string
+        json_input = '{{"data": [1, 2, 3]}}'
+        json_result = {function_name}(json_input)
+        self.assertIsNotNone(json_result, "Function should handle JSON string")
+        
+        # Test with custom object
+        Point = namedtuple('Point', ['x', 'y'])
+        obj_input = Point(1, 2)
+        obj_result = {function_name}(obj_input)
+        self.assertIsNotNone(obj_result, "Function should handle custom object")
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        elif "parameterized behavior" in expansion:
+            return f"""
+import unittest
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import {function_name}
+
+class ParameterizedTest(unittest.TestCase):
+    def test_parameterized_behavior(self):
+        # Test function with different parameters
+        
+        # Base case with default parameters
+        base_input = [1, 2, 3]
+        base_result = {function_name}(base_input)
+        
+        # The function should now accept additional parameters
+        # These are example parameters, adjust based on the specific function
+        
+        # With sorting parameter
+        try:
+            sorted_result = {function_name}(base_input, sort=True)
+            self.assertIsNotNone(sorted_result, "Function should handle sort parameter")
+        except TypeError as e:
+            self.fail(f"Function does not support sort parameter: {{e}}")
+        
+        # With filtering parameter
+        try:
+            filtered_result = {function_name}(base_input, filter_fn=lambda x: x > 1)
+            self.assertIsNotNone(filtered_result, "Function should handle filter_fn parameter")
+        except TypeError as e:
+            self.fail(f"Function does not support filter_fn parameter: {{e}}")
+        
+        # With formatting parameter
+        try:
+            formatted_result = {function_name}(base_input, format="json")
+            self.assertIsNotNone(formatted_result, "Function should handle format parameter")
+        except TypeError as e:
+            self.fail(f"Function does not support format parameter: {{e}}")
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        elif "additional output formats" in expansion:
+            return f"""
+import unittest
+import sys
+import os
+import json
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import {function_name}
+
+class OutputFormatsTest(unittest.TestCase):
+    def test_output_formats(self):
+        # Test function with different output formats
+        input_data = [1, 2, 3]
+        
+        # Original format
+        original_result = {function_name}(input_data)
+        
+        # The function should now support different output formats
+        # These are example formats, adjust based on the specific function
+        
+        # JSON format
+        try:
+            json_result = {function_name}(input_data, format="json")
+            # Check if it's valid JSON
+            try:
+                json_obj = json.loads(json_result) if isinstance(json_result, str) else json_result
+                self.assertIsNotNone(json_obj, "JSON result should be valid")
+            except json.JSONDecodeError:
+                self.fail("JSON result is not valid")
+        except TypeError as e:
+            self.fail(f"Function does not support JSON format: {{e}}")
+        
+        # CSV format
+        try:
+            csv_result = {function_name}(input_data, format="csv")
+            self.assertIsNotNone(csv_result, "CSV result should not be None")
+            if isinstance(csv_result, str):
+                self.assertIn(",", csv_result, "CSV result should contain commas")
+        except TypeError as e:
+            self.fail(f"Function does not support CSV format: {{e}}")
+        
+        # XML format
+        try:
+            xml_result = {function_name}(input_data, format="xml")
+            self.assertIsNotNone(xml_result, "XML result should not be None")
+            if isinstance(xml_result, str):
+                self.assertIn("<", xml_result, "XML result should contain tags")
+                self.assertIn(">", xml_result, "XML result should contain tags")
+        except TypeError as e:
+            self.fail(f"Function does not support XML format: {{e}}")
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        else:
+            # Generic functionality expansion test
+            return f"""
+import unittest
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import {function_name}
+
+class ExpandedFunctionalityTest(unittest.TestCase):
+    def test_expanded_functionality(self):
+        # Test for {expansion}
+        # This is a placeholder test that needs to be customized for the specific expansion
+        
+        # Basic test to verify the function exists
+        input_data = [1, 2, 3]
+        result = {function_name}(input_data)
+        self.assertIsNotNone(result, "Function should return a result")
+        
+        # You need to add specific tests for the expanded functionality
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+    
+    def _calculate_adaptation_vector(self, solution: str, result: EvaluationResult, feedback: Feedback) -> List[float]:
+        """
+        Calculate an adaptation vector based on the solution, result, and feedback.
+        
+        The adaptation vector encodes how the problem should evolve in future iterations,
+        capturing dimensions like difficulty, bug type emphasis, and feedback focus.
+        
+        Args:
+            solution: The current solution
+            result: The evaluation results
+            feedback: The feedback provided
+            
+        Returns:
+            An adaptation vector (list of floats)
+        """
+        # Initialize adaptation vector with zeros
+        # Dimensions:
+        # [0] - difficulty adjustment
+        # [1] - syntax vs logical bug emphasis
+        # [2] - performance focus
+        # [3] - edge case focus
+        # [4] - requirement expansion
+        adaptation_vector = [0.0] * 5
+        
+        # Adjust difficulty based on score
+        if result.score > 0.95:
+            adaptation_vector[0] = 0.2  # Increase difficulty significantly
+        elif result.score > 0.8:
+            adaptation_vector[0] = 0.1  # Increase difficulty moderately
+        elif result.score > 0.6:
+            adaptation_vector[0] = 0.0  # Maintain current difficulty
+        elif result.score > 0.4:
+            adaptation_vector[0] = -0.1  # Decrease difficulty moderately
+        else:
+            adaptation_vector[0] = -0.2  # Decrease difficulty significantly
+        
+        # Adjust bug type emphasis based on error types
+        syntax_issues = sum(1 for issue in feedback.issues if issue.get("error_type") == "syntax")
+        logical_issues = sum(1 for issue in feedback.issues if issue.get("type") == "test_failure")
+        
+        if syntax_issues > logical_issues:
+            adaptation_vector[1] = -0.1  # Move toward more logical bugs
+        elif logical_issues > syntax_issues:
+            adaptation_vector[1] = 0.1  # Move toward more syntax bugs
+        
+        # Adjust performance focus based on execution time and metrics
+        if result.metrics and "execution_time" in result.metrics:
+            if result.metrics["execution_time"] > self.config.get("performance_threshold", 1.0):
+                adaptation_vector[2] = 0.2  # Increase performance focus
+            else:
+                adaptation_vector[2] = -0.1  # Decrease performance focus
+        
+        # Adjust edge case focus based on test failures
+        if result.test_results:
+            edge_case_failures = sum(1 for test_name, test_result in result.test_results.items()
+                                    if not test_result["passed"] and "edge" in test_name.lower())
+            if edge_case_failures > 0:
+                adaptation_vector[3] = 0.2  # Increase edge case focus
+            else:
+                adaptation_vector[3] = 0.0  # Maintain current edge case focus
+        
+        # Adjust requirement expansion based on current state
+        current_requirements = len(self.state.requirements)
+        if current_requirements < 3:
+            adaptation_vector[4] = 0.1  # Increase likelihood of adding requirements
+        elif current_requirements >= 5:
+            adaptation_vector[4] = -0.1  # Decrease likelihood of adding requirements
+        
+        return adaptation_vector
+
+
+class DefaultTestRunner:
+    """Default test runner for evaluating bug fixes."""
+    
+    def run_tests(self, solution_file: Path, test_files: List[Path], code_context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Run tests against a solution file.
+        
+        Args:
+            solution_file: Path to the solution file
+            test_files: List of test file paths
+            code_context: Context information about the code
+            
+        Returns:
+            Dictionary of test results
+        """
+        # Initialize results
+        results = {
+            "all_passed": True,
+            "passed_tests": 0,
+            "total_tests": 0,
+            "tests": {},
+            "execution": {
+                "success": True,
+                "error": None,
+                "stdout": None,
+                "stderr": None
+            },
+            "execution_time": 0.0
+        }
+        
+        # Import the solution to check for syntax errors
+        try:
+            # Check if the solution file exists
+            if not solution_file.exists():
+                results["execution"]["success"] = False
+                results["execution"]["error"] = "Solution file not found"
+                results["all_passed"] = False
+                return results
+            
+            # Try to import the module to test for syntax errors
+            sys.path.insert(0, str(solution_file.parent))
+            import importlib.util
+            spec = importlib.util.spec_from_file_location("solution", solution_file)
+            solution_module = importlib.util.module_from_spec(spec)
+            spec.loader.exec_module(solution_module)
+            
+            # Check for required functions
+            if "required_functions" in code_context:
+                for func_name in code_context["required_functions"]:
+                    if not hasattr(solution_module, func_name):
+                        results["execution"]["success"] = False
+                        results["execution"]["error"] = f"Required function '{func_name}' not found"
+                        results["all_passed"] = False
+                        return results
+            
+        except Exception as e:
+            results["execution"]["success"] = False
+            results["execution"]["error"] = str(e)
+            results["all_passed"] = False
+            return results
+        
+        # Run each test file
+        for test_file in test_files:
+            # Skip if the test file doesn't exist
+            if not test_file.exists():
+                continue
+            
+            # Run the test file
+            import unittest
+            import io
+            from contextlib import redirect_stdout, redirect_stderr
+            
+            # Create a test loader and find tests in the file
+            loader = unittest.TestLoader()
+            try:
+                tests = loader.discover(str(test_file.parent), pattern=test_file.name)
+                
+                # Count the number of test cases
+                test_cases = 0
+                for suite in tests:
+                    for test_case in suite:
+                        test_cases += test_case.countTestCases()
+                
+                results["total_tests"] += test_cases
+                
+                # Run the tests
+                runner = unittest.TextTestRunner(verbosity=2)
+                
+                # Capture stdout and stderr
+                stdout_buffer = io.StringIO()
+                stderr_buffer = io.StringIO()
+                
+                with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
+                    test_result = runner.run(tests)
+                
+                stdout = stdout_buffer.getvalue()
+                stderr = stderr_buffer.getvalue()
+                
+                # Check if all tests passed
+                if not test_result.wasSuccessful():
+                    results["all_passed"] = False
+                
+                # Count passed tests
+                passed_tests = test_cases - len(test_result.failures) - len(test_result.errors)
+                results["passed_tests"] += passed_tests
+                
+                # Store individual test results
+                test_name = test_file.stem
+                results["tests"][test_name] = {
+                    "passed": test_result.wasSuccessful(),
+                    "failures": len(test_result.failures),
+                    "errors": len(test_result.errors),
+                    "skipped": len(test_result.skipped),
+                    "total": test_cases,
+                    "passed_count": passed_tests,
+                    "stdout": stdout,
+                    "stderr": stderr
+                }
+                
+                # Extract more detailed information about failures
+                for failure in test_result.failures:
+                    test_id = failure[0].id()
+                    failure_message = failure[1]
+                    
+                    # Extract expected and actual values if available
+                    import re
+                    expected_match = re.search(r'Expected\s*:(.+)', failure_message)
+                    actual_match = re.search(r'Actual\s*:(.+)', failure_message)
+                    
+                    expected = expected_match.group(1).strip() if expected_match else None
+                    actual = actual_match.group(1).strip() if actual_match else None
+                    
+                    if test_id not in results["tests"]:
+                        results["tests"][test_id] = {}
+                    
+                    results["tests"][test_id].update({
+                        "passed": False,
+                        "message": failure_message,
+                        "expected": expected,
+                        "actual": actual
+                    })
+                
+            except Exception as e:
+                # If the test file itself has errors
+                results["all_passed"] = False
+                results["tests"][test_file.stem] = {
+                    "passed": False,
+                    "error": str(e),
+                    "failures": 1,
+                    "errors": 1,
+                    "skipped": 0,
+                    "total": 1,
+                    "passed_count": 0
+                }
+                results["total_tests"] += 1
+        
+        return results
+
+
+class BugFixingTaskGenerator:
+    """Generator for bug fixing tasks."""
+    
+    def __init__(self, config: Dict[str, Any] = None):
+        """
+        Initialize the bug fixing task generator.
+        
+        Args:
+            config: Configuration options
+        """
+        self.config = config or {}
+        self.difficulty_levels = self.config.get(
+            "difficulty_levels", 
+            ["easy", "medium", "hard", "expert"]
+        )
+        self.bug_categories = self.config.get(
+            "bug_categories",
+            [
+                BugCategory.SYNTAX,
+                BugCategory.LOGICAL,
+                BugCategory.EDGE_CASE,
+                BugCategory.PERFORMANCE
+            ]
+        )
+        self.test_templates = self._load_test_templates()
+    
+    def generate_task(self, difficulty: str = None, bug_categories: List[str] = None) -> BugFixingTask:
+        """
+        Generate a new bug fixing task.
+        
+        Args:
+            difficulty: The difficulty level (easy, medium, hard, expert)
+            bug_categories: List of bug categories to include
+            
+        Returns:
+            A new bug fixing task
+        """
+        # Choose difficulty if not specified
+        if difficulty is None:
+            difficulty = random.choice(self.difficulty_levels)
+        
+        # Choose bug categories if not specified
+        if bug_categories is None:
+            num_categories = random.randint(1, 3)
+            bug_categories = random.sample(self.bug_categories, num_categories)
+        
+        # Generate a problem based on difficulty and bug categories
+        problem_state = self._generate_problem_state(difficulty, bug_categories)
+        
+        # Create config for the task
+        task_config = {
+            "difficulty": difficulty,
+            "bug_categories": bug_categories,
+            "convergence_criteria": {
+                "score_threshold": 0.95,
+                "min_iterations": 1,
+                "max_iterations": self.config.get("max_iterations", 5),
+                "score_delta_threshold": 0.05,
+                "consecutive_plateau_limit": 2
+            },
+            "score_weights": {
+                "test": 0.7,
+                "execution": 0.3
+            },
+            "performance_threshold": 1.0,
+            "complexity_threshold": 0.7
+        }
+        
+        # Create and return the task
+        return BugFixingTask(problem_state, task_config)
+    
+    def _generate_problem_state(self, difficulty: str, bug_categories: List[str]) -> ProblemState:
+        """
+        Generate a problem state for the given difficulty and bug categories.
+        
+        Args:
+            difficulty: The difficulty level
+            bug_categories: List of bug categories
+            
+        Returns:
+            A problem state for the task
+        """
+        # Choose a template based on difficulty and bug categories
+        template = self._choose_template(difficulty, bug_categories)
+        
+        # Create a copy of the template
+        problem_state = copy.deepcopy(template)
+        
+        # Generate a unique ID
+        problem_state.problem_id = str(uuid.uuid4())
+        
+        # Initialize evolution stage and adaptation vector
+        problem_state.evolution_stage = 0
+        problem_state.adaptation_vector = [0.0] * 5
+        
+        # Adjust difficulty value based on level
+        difficulty_values = {
+            "easy": 0.25,
+            "medium": 0.5,
+            "hard": 0.75,
+            "expert": 0.9
+        }
+        problem_state.difficulty = difficulty_values.get(difficulty, 0.5)
+        
+        # Insert bugs based on categories
+        for category in bug_categories:
+            self._insert_bug(problem_state, category)
+        
+        # Update description to reflect the current state
+        problem_state.description = self._generate_description(problem_state)
+        
+        return problem_state
+    
+    def _choose_template(self, difficulty: str, bug_categories: List[str]) -> ProblemState:
+        """
+        Choose a template that matches the difficulty and bug categories.
+        
+        Args:
+            difficulty: The difficulty level
+            bug_categories: List of bug categories
+            
+        Returns:
+            A template problem state
+        """
+        # In a real implementation, this would load from a database of templates
+        # For now, we'll generate a simple template
+        
+        # Generate code context with a sample function
+        code = self._generate_template_code(difficulty, bug_categories)
+        tests = self._generate_template_tests(code)
+        
+        # Create a basic problem state
+        return ProblemState(
+            problem_id="template",
+            description="Fix the bugs in the given code.",
+            code_context={
+                "code": code,
+                "tests": tests,
+                "bug_count": 0,
+                "bug_categories": []
+            },
+            requirements=[
+                {
+                    "type": "functional",
+                    "description": "The code should pass all the provided tests.",
+                    "difficulty": 0.3
+                }
+            ],
+            difficulty=0.5,  # Will be overridden
+            evolution_stage=0,
+            adaptation_vector=[0.0] * 5
+        )
+    
+    def _generate_template_code(self, difficulty: str, bug_categories: List[str]) -> str:
+        """
+        Generate template code based on difficulty and bug categories.
+        
+        Args:
+            difficulty: The difficulty level
+            bug_categories: List of bug categories
+            
+        Returns:
+            Template code
+        """
+        # For demonstration, we'll use a few predefined templates
+        templates = {
+            "easy": """
+def calculate_sum(numbers):
+    \"\"\"Calculate the sum of a list of numbers.\"\"\"
+    total = 0
+    for num in numbers:
+        total += num
+    return total
+
+def calculate_average(numbers):
+    \"\"\"Calculate the average of a list of numbers.\"\"\"
+    if not numbers:
+        return 0
+    return calculate_sum(numbers) / len(numbers)
+""",
+            "medium": """
+def find_most_frequent(items):
+    \"\"\"Find the most frequently occurring item in
+# recursive_swe_bench/task_generators/bug_fixing.py (template generation)
+
+def find_most_frequent(items):
+    """Find the most frequently occurring item in a list."""
+    if not items:
+        return None
+    
+    counts = {}
+    for item in items:
+        if item in counts:
+            counts[item] += 1
+        else:
+            counts[item] = 1
+    
+    max_count = 0
+    max_item = None
+    for item, count in counts.items():
+        if count > max_count:
+            max_count = count
+            max_item = item
+    
+    return max_item
+
+def binary_search(sorted_list, target):
+    """Perform binary search on a sorted list."""
+    left = 0
+    right = len(sorted_list) - 1
+    
+    while left <= right:
+        mid = (left + right) // 2
+        if sorted_list[mid] == target:
+            return mid
+        elif sorted_list[mid] < target:
+            left = mid + 1
+        else:
+            right = mid - 1
+    
+    return -1  # Target not found
+""",
+            "hard": """
+def merge_sort(arr):
+    """Sort an array using the merge sort algorithm."""
+    if len(arr) <= 1:
+        return arr
+    
+    # Split the array into two halves
+    mid = len(arr) // 2
+    left_half = arr[:mid]
+    right_half = arr[mid:]
+    
+    # Recursively sort both halves
+    left_half = merge_sort(left_half)
+    right_half = merge_sort(right_half)
+    
+    # Merge the sorted halves
+    return merge(left_half, right_half)
+
+def merge(left, right):
+    """Merge two sorted arrays."""
+    result = []
+    i = j = 0
+    
+    # Compare elements from both arrays and add the smaller one to the result
+    while i < len(left) and j < len(right):
+        if left[i] <= right[j]:
+            result.append(left[i])
+            i += 1
+        else:
+            result.append(right[j])
+            j += 1
+    
+    # Add any remaining elements
+    result.extend(left[i:])
+    result.extend(right[j:])
+    
+    return result
+
+def quicksort(arr):
+    """Sort an array using the quicksort algorithm."""
+    if len(arr) <= 1:
+        return arr
+    
+    # Choose the pivot (using the first element for simplicity)
+    pivot = arr[0]
+    
+    # Partition the array
+    less = [x for x in arr[1:] if x <= pivot]
+    greater = [x for x in arr[1:] if x > pivot]
+    
+    # Recursively sort the partitions and combine
+    return quicksort(less) + [pivot] + quicksort(greater)
+""",
+            "expert": """
+class Node:
+    """Node in a binary tree."""
+    def __init__(self, value):
+        self.value = value
+        self.left = None
+        self.right = None
+
+def build_binary_tree(values):
+    """Build a binary tree from a list of values."""
+    if not values:
+        return None
+    
+    root = Node(values[0])
+    queue = [root]
+    i = 1
+    
+    while queue and i < len(values):
+        node = queue.pop(0)
+        
+        # Add left child
+        if i < len(values) and values[i] is not None:
+            node.left = Node(values[i])
+            queue.append(node.left)
+        i += 1
+        
+        # Add right child
+        if i < len(values) and values[i] is not None:
+            node.right = Node(values[i])
+            queue.append(node.right)
+        i += 1
+    
+    return root
+
+def is_balanced(root):
+    """Check if a binary tree is balanced."""
+    def height(node):
+        if not node:
+            return 0
+        return max(height(node.left), height(node.right)) + 1
+    
+    def is_balanced_helper(node):
+        if not node:
+            return True
+        
+        left_height = height(node.left)
+        right_height = height(node.right)
+        
+        if abs(left_height - right_height) > 1:
+            return False
+        
+        return is_balanced_helper(node.left) and is_balanced_helper(node.right)
+    
+    return is_balanced_helper(root)
+
+def find_lca(root, p, q):
+    """Find the lowest common ancestor of two nodes in a binary tree."""
+    if not root:
+        return None
+    
+    if root.value == p or root.value == q:
+        return root
+    
+    left_lca = find_lca(root.left, p, q)
+    right_lca = find_lca(root.right, p, q)
+    
+    if left_lca and right_lca:
+        return root
+    
+    return left_lca if left_lca else right_lca
+"""
+        }
+        
+        # Choose a template based on difficulty
+        if difficulty in templates:
+            return templates[difficulty]
+        else:
+            return templates["medium"]  # Default to medium if difficulty not found
+    
+    def _generate_template_tests(self, code: str) -> List[Dict[str, Any]]:
+        """
+        Generate template tests based on the code.
+        
+        Args:
+            code: The template code
+            
+        Returns:
+            List of test dictionaries
+        """
+        # Extract function names from the code
+        function_names = re.findall(r'def\s+(\w+)', code)
+        
+        # Generate tests for each function
+        tests = []
+        for func_name in function_names:
+            test_content = self._generate_test_for_function(func_name)
+            if test_content:
+                tests.append({
+                    "name": f"test_{func_name}",
+                    "content": test_content,
+                    "description": f"Test for {func_name} function"
+                })
+        
+        return tests
+    
+    def _generate_test_for_function(self, func_name: str) -> str:
+        """
+        Generate a test for a specific function.
+        
+        Args:
+            func_name: The name of the function to test
+            
+        Returns:
+            Test content
+        """
+        # Check if we have a template for this function
+        if func_name in self.test_templates:
+            return self.test_templates[func_name]
+        
+        # Generate a basic test based on the function name
+        if "sum" in func_name.lower():
+            return """
+import unittest
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import calculate_sum
+
+class TestCalculateSum(unittest.TestCase):
+    def test_calculate_sum(self):
+        self.assertEqual(calculate_sum([1, 2, 3, 4, 5]), 15)
+        self.assertEqual(calculate_sum([]), 0)
+        self.assertEqual(calculate_sum([-1, -2, -3]), -6)
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        elif "average" in func_name.lower():
+            return """
+import unittest
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import calculate_average
+
+class TestCalculateAverage(unittest.TestCase):
+    def test_calculate_average(self):
+        self.assertEqual(calculate_average([1, 2, 3, 4, 5]), 3)
+        self.assertEqual(calculate_average([]), 0)
+        self.assertEqual(calculate_average([10]), 10)
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        elif "frequent" in func_name.lower():
+            return """
+import unittest
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import find_most_frequent
+
+class TestFindMostFrequent(unittest.TestCase):
+    def test_find_most_frequent(self):
+        self.assertEqual(find_most_frequent([1, 2, 2, 3, 3, 3, 4]), 3)
+        self.assertEqual(find_most_frequent(['a', 'b', 'a', 'c', 'a']), 'a')
+        self.assertIsNone(find_most_frequent([]))
+        self.assertEqual(find_most_frequent([5]), 5)
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        elif "search" in func_name.lower():
+            return """
+import unittest
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import binary_search
+
+class TestBinarySearch(unittest.TestCase):
+    def test_binary_search(self):
+        self.assertEqual(binary_search([1, 2, 3, 4, 5], 3), 2)
+        self.assertEqual(binary_search([1, 2, 3, 4, 5], 1), 0)
+        self.assertEqual(binary_search([1, 2, 3, 4, 5], 5), 4)
+        self.assertEqual(binary_search([1, 2, 3, 4, 5], 6), -1)
+        self.assertEqual(binary_search([], 5), -1)
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        elif "sort" in func_name.lower():
+            return """
+import unittest
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import {0}
+
+class Test{1}(unittest.TestCase):
+    def test_sorting(self):
+        self.assertEqual({0}([]), [])
+        self.assertEqual({0}([1]), [1])
+        self.assertEqual({0}([3, 1, 4, 1, 5, 9, 2, 6, 5]), [1, 1, 2, 3, 4, 5, 5, 6, 9])
+        self.assertEqual({0}([9, 8, 7, 6, 5, 4, 3, 2, 1]), [1, 2, 3, 4, 5, 6, 7, 8, 9])
+        self.assertEqual({0}([1, 1, 1, 1]), [1, 1, 1, 1])
+        
+if __name__ == '__main__':
+    unittest.main()
+""".format(func_name, func_name.title())
+        elif "balanced" in func_name.lower():
+            return """
+import unittest
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import Node, is_balanced
+
+class TestIsBalanced(unittest.TestCase):
+    def test_is_balanced(self):
+        # Create a balanced tree
+        #      1
+        #    /   \\
+        #   2     3
+        #  / \\   / \\
+        # 4   5 6   7
+        root = Node(1)
+        root.left = Node(2)
+        root.right = Node(3)
+        root.left.left = Node(4)
+        root.left.right = Node(5)
+        root.right.left = Node(6)
+        root.right.right = Node(7)
+        self.assertTrue(is_balanced(root))
+        
+        # Create an unbalanced tree
+        #      1
+        #    /   \\
+        #   2     3
+        #  / \\
+        # 4   5
+        #/
+        #6
+        root = Node(1)
+        root.left = Node(2)
+        root.right = Node(3)
+        root.left.left = Node(4)
+        root.left.right = Node(5)
+        root.left.left.left = Node(6)
+        self.assertFalse(is_balanced(root))
+        
+        # Empty tree is balanced
+        self.assertTrue(is_balanced(None))
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        elif "lca" in func_name.lower():
+            return """
+import unittest
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import Node, find_lca
+
+class TestFindLCA(unittest.TestCase):
+    def test_find_lca(self):
+        # Create a tree
+        #      1
+        #    /   \\
+        #   2     3
+        #  / \\   / \\
+        # 4   5 6   7
+        root = Node(1)
+        root.left = Node(2)
+        root.right = Node(3)
+        root.left.left = Node(4)
+        root.left.right = Node(5)
+        root.right.left = Node(6)
+        root.right.right = Node(7)
+        
+        # Test cases
+        self.assertEqual(find_lca(root, 4, 5).value, 2)  # LCA of 4 and 5 is 2
+        self.assertEqual(find_lca(root, 4, 6).value, 1)  # LCA of 4 and 6 is 1
+        self.assertEqual(find_lca(root, 3, 7).value, 3)  # LCA of 3 and 7 is 3
+        self.assertEqual(find_lca(root, 2, 7).value, 1)  # LCA of 2 and 7 is 1
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        elif "tree" in func_name.lower():
+            return """
+import unittest
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import Node, build_binary_tree
+
+class TestBuildBinaryTree(unittest.TestCase):
+    def test_build_binary_tree(self):
+        # Test empty list
+        self.assertIsNone(build_binary_tree([]))
+        
+        # Test single node
+        root = build_binary_tree([1])
+        self.assertEqual(root.value, 1)
+        self.assertIsNone(root.left)
+        self.assertIsNone(root.right)
+        
+        # Test complete tree
+        #      1
+        #    /   \\
+        #   2     3
+        #  / \\   / \\
+        # 4   5 6   7
+        values = [1, 2, 3, 4, 5, 6, 7]
+        root = build_binary_tree(values)
+        self.assertEqual(root.value, 1)
+        self.assertEqual(root.left.value, 2)
+        self.assertEqual(root.right.value, 3)
+        self.assertEqual(root.left.left.value, 4)
+        self.assertEqual(root.left.right.value, 5)
+        self.assertEqual(root.right.left.value, 6)
+        self.assertEqual(root.right.right.value, 7)
+        
+        # Test tree with None values
+        #      1
+        #    /   \\
+        #   2     3
+        #  /     / 
+        # 4     6   
+        values = [1, 2, 3, 4, None, 6, None]
+        root = build_binary_tree(values)
+        self.assertEqual(root.value, 1)
+        self.assertEqual(root.left.value, 2)
+        self.assertEqual(root.right.value, 3)
+        self.assertEqual(root.left.left.value, 4)
+        self.assertIsNone(root.left.right)
+        self.assertEqual(root.right.left.value, 6)
+        self.assertIsNone(root.right.right)
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        else:
+            # Generic test template
+            return """
+import unittest
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import {0}
+
+class Test{1}(unittest.TestCase):
+    def test_{0}(self):
+        # TODO: Add specific test cases for {0}
+        # This is a placeholder test
+        self.assertTrue(True)
+        
+if __name__ == '__main__':
+    unittest.main()
+""".format(func_name, func_name.title())
+    
+    def _load_test_templates(self) -> Dict[str, str]:
+        """
+        Load test templates for common functions.
+        
+        Returns:
+            Dictionary of test templates
+        """
+        # In a real implementation, these would be loaded from files
+        return {
+            "calculate_sum": """
+import unittest
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import calculate_sum
+
+class TestCalculateSum(unittest.TestCase):
+    def test_calculate_sum(self):
+        self.assertEqual(calculate_sum([1, 2, 3, 4, 5]), 15)
+        self.assertEqual(calculate_sum([]), 0)
+        self.assertEqual(calculate_sum([-1, -2, -3]), -6)
+        
+if __name__ == '__main__':
+    unittest.main()
+""",
+            "calculate_average": """
+import unittest
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from solution import calculate_average
+
+class TestCalculateAverage(unittest.TestCase):
+    def test_calculate_average(self):
+        self.assertEqual(calculate_average([1, 2, 3, 4, 5]), 3)
+        self.assertEqual(calculate_average([]), 0)
+        self.assertEqual(calculate_average([10]), 10)
+        
+if __name__ == '__main__':
+    unittest.main()
+"""
+        }
+    
+    def _insert_bug(self, problem_state: ProblemState, bug_category: str) -> None:
+        """
+        Insert a bug of the specified category into the problem state.
+        
+        Args:
+            problem_state: The problem state to modify
+            bug_category: The category of bug to insert
+        """
+        if "code" not in problem_state.code_context:
+            return
+        
+        # Parse the code to find potential bug insertion points
+        code = problem_state.code_context["code"]
+        try:
+            parsed_code = ast.parse(code)
+        except SyntaxError:
+            # If the code already has syntax errors, don't add more bugs
+            return
+        
+        # Insert different types of bugs based on the category
+        if bug_category == BugCategory.SYNTAX:
+            self._insert_syntax_bug(problem_state)
+        elif bug_category == BugCategory.LOGICAL:
+            self._insert_logical_bug(problem_state)
+        elif bug_category == BugCategory.PERFORMANCE:
+            self._insert_performance_bug(problem_state)
+        elif bug_category == BugCategory.EDGE_CASE:
+            self._insert_edge_case_bug(problem_state)
+        else:
+            # Default to logical bug
+            self._insert_logical_bug(problem_state)
+        
+        # Update bug count and categories
+        if "bug_count" not in problem_state.code_context:
+            problem_state.code_context["bug_count"] = 0
+        problem_state.code_context["bug_count"] += 1
+        
+        if "bug_categories" not in problem_state.code_context:
+            problem_state.code_context["bug_categories"] = []
+        if bug_category not in problem_state.code_context["bug_categories"]:
+            problem_state.code_context["bug_categories"].append(bug_category)
+    
+    def _insert_syntax_bug(self, problem_state: ProblemState) -> None:
+        """
+        Insert a syntax bug into the problem state.
+        
+        Args:
+            problem_state: The problem state to modify
+        """
+        code = problem_state.code_context["code"]
+        lines = code.split('\n')
+        if not lines:
+            return
+        
+        # Choose a non-empty line to modify
+        idx = random.randint(0, len(lines) - 1)
+        line = lines[idx]
+        
+        # Skip empty lines or comment lines
+        attempts = 0
+        while (not line.strip() or line.strip().startswith('#')) and attempts < 10:
+            idx = random.randint(0, len(lines) - 1)
+            line = lines[idx]
+            attempts += 1
+        
+        if attempts >= 10:
+            # Couldn't find a suitable line, use the first non-empty line
+            for i, line in enumerate(lines):
+                if line.strip() and not line.strip().startswith('#'):
+                    idx = i
+                    break
+            else:
+                return  # No suitable line found
+        
+        # Choose a modification type
+        mod_type = random.choice([
+            "remove_character",
+            "add_character",
+            "swap_characters",
+            "change_indent"
+        ])
+        
+        if mod_type == "remove_character" and line:
+            char_idx = random.randint(0, len(line) - 1)
+            lines[idx] = line[:char_idx] + line[char_idx+1:]
+        
+        elif mod_type == "add_character":
+            char_idx = random.randint(0, len(line))
+            char = random.choice(["(", ")", "{", "}", "[", "]", ":", ";", ",", "."])
+            lines[idx] = line[:char_idx] + char + line[char_idx:]
+        
+        elif mod_type == "swap_characters" and len(line) >= 2:
+            char_idx = random.randint(0, len(line) - 2)
+            lines[idx] = (line[:char_idx] + line[char_idx+1] + 
+                         line[char_idx] + line[char_idx+2:])
+        
+        elif mod_type == "change_indent":
+            # Either add or remove indentation
+            if line.startswith("    "):
+                lines[idx] = line[2:]  # Remove some indent
+            else:
+                lines[idx] = "  " + line  # Add inconsistent indent
+        
+        # Update the code
+        problem_state.code_context["code"] = '\n'.join(lines)
+        
+        # Add information about the bug
+        if "bugs" not in problem_state.code_context:
+            problem_state.code_context["bugs"] = []
+        
+        problem_state.code_context["bugs"].append({
+            "type": BugCategory.SYNTAX,
+            "line": idx + 1,
+            "description": f"Syntax error introduced in line {idx + 1}"
+        })
+    
+    def _insert_logical_bug(self, problem_state: ProblemState) -> None:
+        """
+        Insert a logical bug into the problem state.
+        
+        Args:
+            problem_state: The problem state to modify
+        """
+        code = problem_state.code_context["code"]
+        lines = code.split('\n')
+        if not lines:
+            return
+        
+        # Find all if statements and loops
+        if_statements = []
+        for i, line in enumerate(lines):
+            if re.search(r'\bif\b|\bwhile\b|\bfor\b', line):
+                if_statements.append((i, line))
+        
+        # Choose a modification type
+        mod_type = random.choice([
+            "change_comparison",
+            "invert_condition",
+            "off_by_one",
+            "change_operator",
+            "reverse_logic"
+        ])
+        
+        if if_statements:
+            # Choose an if statement to modify
+            idx, line = random.choice(if_statements)
+            
+            if mod_type == "change_comparison":
+                # Change comparison operators
+                comparisons = {"==": "!=", "!=": "==", ">": "<", "<": ">", ">=": "<=", "<=": ">="}
+                for op, new_op in comparisons.items():
+                    if op in line:
+                        lines[idx] = line.replace(op, new_op, 1)
+                        break
+            
+            elif mod_type == "invert_condition":
+                # Add or remove a "not" to invert the condition
+                if "not" in line:
+                    lines[idx] = line.replace("not ", "", 1)
+                else:
+                    match = re.search(r'(if|while)\s+([^:]+):', line)
+                    if match:
+                        condition = match.group(2)
+                        lines[idx] = line.replace(condition, f"not ({condition})", 1)
+            
+            elif mod_type == "off_by_one":
+                # Introduce an off-by-one error
+                for op in ["+", "-"]:
+                    if op in line:
+                        # If there's a number after the operator, change it
+                        match = re.search(f'\\{op}\\s*(\\d+)', line)
+                        if match:
+                            num = int(match.group(1))
+                            new_num = num + 1 if op == "+" else max(0, num - 1)
+                            lines[idx] = line.replace(f"{op} {num}", f"{op} {new_num}", 1)
+                            break
+            
+            elif mod_type == "change_operator":
+                # Change arithmetic or logical operators
+                operators = {"+": "-", "-": "+", "*": "/", "/": "*", "and": "or", "or": "and"}
+                for op, new_op in operators.items():
+                    if f" {op} " in line:
+                        lines[idx] = line.replace(f" {op} ", f" {new_op} ", 1)
+                        break
+            
+            elif mod_type == "reverse_logic":
+                # Reverse the logic of a compound condition
+                if " and " in line:
+                    parts = line.split(" and ")
+                    lines[idx] = line.replace(" and ".join(parts), " or ".join(parts), 1)
+                elif " or " in line:
+                    parts = line.split(" or ")
+                    lines[idx] = line.replace(" or ".join(parts), " and ".join(parts), 1)
+        
+        else:
+            # If no if statements found, introduce a different kind of logical error
+            # Find variable assignments
+            assignments = []
+            for i, line in enumerate(lines):
+                if "=" in line and "==" not in line and "!=" not in line:
+                    assignments.append((i, line))
+            
+            if assignments:
+                # Choose an assignment to modify
+                idx, line = random.choice(assignments)
+                
+                # Modify the assignment
+                if "+" in line:
+                    lines[idx] = line.replace("+", "-", 1)
+                elif "-" in line:
+                    lines[idx] = line.replace("-", "+", 1)
+                elif "*" in line:
+                    lines[idx] = line.replace("*", "/", 1)
+                elif "/" in line:
+                    lines[idx] = line.replace("/", "*", 1)
+                else:
+                    # If no arithmetic operator, change the value
+                    match = re.search(r'=\s*(\d+)', line)
+                    if match:
+                        num = int(match.group(1))
+                        new_num = num + random.choice([-1, 1]) * random.randint(1, 3)
+                        lines[idx] = line.replace(f"= {num}", f"= {new_num}", 1)
+        
+        # Update the code
+        problem_state.code_context["code"] = '\n'.join(lines)
+        
+        # Add information about the bug
+        if "bugs" not in problem_state.code_context:
+            problem_state.code_context["bugs"] = []
+        
+        problem_state.code_context["bugs"].append({
+            "type": BugCategory.LOGICAL,
+            "line": idx + 1,
+            "description": f"Logical error introduced in line {idx + 1}"
+        })
+    
+    def _insert_performance_bug(self, problem_state: ProblemState) -> None:
+        """
+        Insert a performance bug into the problem state.
+        
+        Args:
+            problem_state: The problem state to modify
+        """
+        code = problem_state.code_context["code"]
+        lines = code.split('\n')
+        if not lines:
+            return
+        
+        # Find functions in the code
+        functions = []
+        current_func = None
+        func_start = None
+        for i, line in enumerate(lines):
+            if line.strip().startswith("def "):
+                if current_func:
+                    functions.append((func_start, i - 1, current_func))
+                current_func = line.strip()[4:].split("(")[0]
+                func_start = i
+            elif i == len(lines) - 1 and current_func:
+                functions.append((func_start, i, current_func))
+        
+        if not functions:
+            return
+        
+        # Choose a function to modify
+        start_idx, end_idx, func_name = random.choice(functions)
+        
+        # Choose a modification type
+        mod_type = random.choice([
+            "add_nested_loop",
+            "inefficient_data_structure",
+            "redundant_computation"
+        ])
+        
+        if mod_type == "add_nested_loop":
+            # Find indentation of the function
+            for i in range(start_idx + 1, end_idx + 1):
+                if lines[i].strip():
+                    indent = len(lines[i]) - len(lines[i].lstrip())
+                    break
+            else:
+                indent = 4
+            
+            # Find a suitable place to add a nested loop
+            for i in range(start_idx + 1, end_idx + 1):
+                if "for " in lines[i] or "while " in lines[i]:
+                    # Add a nested loop after this loop
+                    inner_indent = len(lines[i]) - len(lines[i].lstrip()) + 4
+                    inner_indent_str = ' ' * inner_indent
+                    
+                    # Add an unnecessary nested loop
+                    lines.insert(i + 1, f"{inner_indent_str}for _ in range(100):  # Inefficient nested loop")
+                    lines.insert(i + 2, f"{inner_indent_str}    pass")
+                    
+                    # Update indices
+                    end_idx += 2
+                    break
+            else:
+                # If no loop found, add one at the beginning of the function
+                inner_indent = indent + 4
+                inner_indent_str = ' ' * inner_indent
+                
+                # Find the first non-docstring line
+                for i in range(start_idx + 1, end_idx + 1):
+                    if lines[i].strip() and not (lines[i].strip().startswith('"""') or lines[i].strip().startswith("'''")):
+                        # Add an unnecessary loop
+                        lines.insert(i, f"{' ' * indent}for i in range(100):  # Inefficient loop")
+                        lines.insert(i + 1, f"{inner_indent_str}pass")
+                        
+                        # Update indices
+                        end_idx += 2
+                        break
+        
+        elif mod_type == "ineff
+# recursive_swe_bench/task_generators/bug_fixing.py (finalized)
+
+        elif mod_type == "inefficient_data_structure":
+            # Find indentation of the function
+            for i in range(start_idx + 1, end_idx + 1):
+                if lines[i].strip():
+                    indent = len(lines[i]) - len(lines[i].lstrip())
+                    break
+            else:
+                indent = 4
+            
+            # Find a suitable place to add inefficient data structure usage
+            for i in range(start_idx + 1, end_idx + 1):
+                if "def " not in lines[i] and lines[i].strip():
+                    # Add inefficient data structure usage after this line
+                    indent_str = ' ' * indent
+                    
+                    # Add inefficient code
+                    lines.insert(i + 1, f"{indent_str}# Inefficient data structure usage")
+                    lines.insert(i + 2, f"{indent_str}results = []")
+                    lines.insert(i + 3, f"{indent_str}for i in range(1000):  # Unnecessarily large range")
+                    lines.insert(i + 4, f"{indent_str}    # Using list instead of set for lookups")
+                    lines.insert(i + 5, f"{indent_str}    if i % 10 in results:  # O(n) lookup instead of O(1)")
+                    lines.insert(i + 6, f"{indent_str}        results.append(i)  # Unnecessary storage")
+                    
+                    # Update indices
+                    end_idx += 6
+                    break
+        
+        elif mod_type == "redundant_computation":
+            # Find indentation of the function
+            for i in range(start_idx + 1, end_idx + 1):
+                if lines[i].strip():
+                    indent = len(lines[i]) - len(lines[i].lstrip())
+                    break
+            else:
+                indent = 4
+            
+            # Find a suitable place to add redundant computation
+            for i in range(start_idx + 1, end_idx + 1):
+                if "for " in lines[i] or "while " in lines[i]:
+                    # Add redundant computation inside the loop
+                    inner_indent = len(lines[i]) - len(lines[i].lstrip()) + 4
+                    inner_indent_str = ' ' * inner_indent
+                    
+                    # Add redundant computation
+                    lines.insert(i + 1, f"{inner_indent_str}# Redundant computation in each iteration")
+                    lines.insert(i + 2, f"{inner_indent_str}temp_sum = 0")
+                    lines.insert(i + 3, f"{inner_indent_str}for j in range(100):  # Unnecessary nested computation")
+                    lines.insert(i + 4, f"{inner_indent_str}    temp_sum += j")
+                    
+                    # Update indices
+                    end_idx += 4
+                    break
+        
+        # Update the code
+        problem_state.code_context["code"] = '\n'.join(lines)
+        
+        # Add information about the bug
+        if "bugs" not in problem_state.code_context:
+            problem_state.code_context["bugs"] = []
+        
+        problem_state.code_context["bugs"].append({
+            "type": BugCategory.PERFORMANCE,
+            "line": start_idx + 1,
+            "description": f"Performance issue introduced in function '{func_name}'"
+        })
+    
+    def _insert_edge_case_bug(self, problem_state: ProblemState) -> None:
+        """
+        Insert an edge case bug into the problem state.
+        
+        Args:
+            problem_state: The problem state to modify
+        """
+        code = problem_state.code_context["code"]
+        lines = code.split('\n')
+        if not lines:
+            return
+        
+        # Find functions in the code
+        functions = []
+        current_func = None
+        func_start = None
+        for i, line in enumerate(lines):
+            if line.strip().startswith("def "):
+                if current_func:
+                    functions.append((func_start, i - 1, current_func))
+                current_func = line.strip()[4:].split("(")[0]
+                func_start = i
+            elif i == len(lines) - 1 and current_func:
+                functions.append((func_start, i, current_func))
+        
+        if not functions:
+            return
+        
+        # Choose a function to modify
+        start_idx, end_idx, func_name = random.choice(functions)
+        
+        # Choose a modification type
+        mod_type = random.choice([
+            "remove_boundary_check",
+            "missing_edge_case",
+            "type_assumption"
+        ])
+        
+        if mod_type == "remove_boundary_check":
+            # Find boundary checks (if statements with conditions that check boundaries)
+            boundary_checks = []
+            for i in range(start_idx + 1, end_idx + 1):
+                if (re.search(r'if\s+.*(len|empty|<=|>=|<|>|==|!=)', lines[i]) and 
+                    (("if not " in lines[i]) or ("if len(" in lines[i]) or 
+                     ("if " in lines[i] and " == 0" in lines[i]) or
+                     ("if " in lines[i] and " == []" in lines[i]) or
+                     ("if " in lines[i] and " == ''" in lines[i]) or
+                     ("if " in lines[i] and " is None" in lines[i]))):
+                    boundary_checks.append(i)
+            
+            if boundary_checks:
+                # Choose a boundary check to remove
+                idx = random.choice(boundary_checks)
+                
+                # Comment out the boundary check
+                lines[idx] = f"# {lines[idx]}  # Boundary check removed"
+                
+                # Comment out the body of the if statement
+                i = idx + 1
+                while i <= end_idx and (not lines[i].strip() or len(lines[i]) - len(lines[i].lstrip()) > len(lines[idx]) - len(lines[idx].lstrip())):
+                    lines[i] = f"# {lines[i]}"
+                    i += 1
+            else:
+                # If no boundary check found, add code that assumes a non-empty input
+                # Find the first non-docstring line in the function
+                for i in range(start_idx + 1, end_idx + 1):
+                    if lines[i].strip() and not (lines[i].strip().startswith('"""') or lines[i].strip().startswith("'''")):
+                        indent = len(lines[i]) - len(lines[i].lstrip())
+                        indent_str = ' ' * indent
+                        
+                        # Add code that assumes non-empty input
+                        lines.insert(i, f"{indent_str}# Missing check for empty input")
+                        lines.insert(i + 1, f"{indent_str}first_item = items[0]  # Will fail on empty input")
+                        
+                        # Update indices
+                        end_idx += 2
+                        break
+        
+        elif mod_type == "missing_edge_case":
+            # Find a suitable place to insert the bug
+            for i in range(start_idx + 1, end_idx + 1):
+                if ("/" in lines[i] or 
+                    "if " in lines[i] and "==" in lines[i] or 
+                    "if " in lines[i] and "!=" in lines[i]):
+                    
+                    if "/" in lines[i] and not re.search(r'if\s+.*!=\s*0', lines[i-1]):
+                        # Add code that doesn't check for zero division
+                        indent = len(lines[i]) - len(lines[i].lstrip())
+                        indent_str = ' ' * indent
+                        
+                        # Extract the denominator
+                        match = re.search(r'/\s*(\w+)', lines[i])
+                        if match:
+                            denominator = match.group(1)
+                            
+                            # Comment out any existing check
+                            j = i - 1
+                            while j >= start_idx and len(lines[j]) - len(lines[j].lstrip()) >= indent:
+                                if f"if {denominator}" in lines[j] and "== 0" in lines[j]:
+                                    lines[j] = f"# {lines[j]}  # Zero division check removed"
+                                j -= 1
+                            
+                            # Add a comment about the missing check
+                            lines.insert(i, f"{indent_str}# Missing check for zero division")
+                            
+                            # Update indices
+                            end_idx += 1
+                            break
+                    
+                    elif ("==" in lines[i] or "!=" in lines[i]) and "None" not in lines[i]:
+                        # Comment out edge case check
+                        lines[i] = f"# {lines[i]}  # Edge case check removed"
+                        break
+            else:
+                # If no suitable place found, add code that doesn't handle an edge case
+                # Find the first non-docstring line in the function
+                for i in range(start_idx + 1, end_idx + 1):
+                    if lines[i].strip() and not (lines[i].strip().startswith('"""') or lines[i].strip().startswith("'''")):
+                        indent = len(lines[i]) - len(lines[i].lstrip())
+                        indent_str = ' ' * indent
+                        
+                        # Add code that doesn't handle an edge case
+                        lines.insert(i, f"{indent_str}# Missing handling for edge cases")
+                        lines.insert(i + 1, f"{indent_str}# This function doesn't handle special cases properly")
+                        
+                        # Update indices
+                        end_idx += 2
+                        break
+        
+        elif mod_type == "type_assumption":
+            # Find a suitable place to insert a type assumption bug
+            for i in range(start_idx + 1, end_idx + 1):
+                if re.search(r'for\s+\w+\s+in\s+\w+', lines[i]) or "=" in lines[i] and "[" in lines[i]:
+                    # Extract the variable name
+                    var_match = re.search(r'for\s+\w+\s+in\s+(\w+)', lines[i])
+                    if not var_match:
+                        var_match = re.search(r'(\w+)\s*=', lines[i])
+                    
+                    if var_match:
+                        var_name = var_match.group(1)
+                        indent = len(lines[i]) - len(lines[i].lstrip())
+                        indent_str = ' ' * indent
+                        
+                        # Add code that assumes a specific type
+                        lines.insert(i + 1, f"{indent_str}# Type assumption: {var_name} is assumed to be a list")
+                        lines.insert(i + 2, f"{indent_str}if len({var_name}) > 0:  # Will fail if {var_name} doesn't support len()")
+                        lines.insert(i + 3, f"{indent_str}    first = {var_name}[0]  # Will fail if {var_name} is not subscriptable")
+                        
+                        # Update indices
+                        end_idx += 3
+                        break
+            else:
+                # If no suitable place found, add code at the beginning of the function
+                for i in range(start_idx + 1, end_idx + 1):
+                    if lines[i].strip() and not (lines[i].strip().startswith('"""') or lines[i].strip().startswith("'''")):
+                        indent = len(lines[i]) - len(lines[i].lstrip())
+                        indent_str = ' ' * indent
+                        
+                        # Extract parameter name
+                        param_match = re.search(r'def\s+\w+\s*\(\s*(\w+)', lines[start_idx])
+                        param_name = param_match.group(1) if param_match else "input_data"
+                        
+                        # Add code that assumes a specific type
+                        lines.insert(i, f"{indent_str}# Type assumption: {param_name} is assumed to be a specific type")
+                        lines.insert(i + 1, f"{indent_str}{param_name}_str = str({param_name})  # Will fail if {param_name} can't be converted to string")
+                        
+                        # Update indices
+                        end_idx += 2
+                        break
+        
+        # Update the code
+        problem_state.code_context["code"] = '\n'.join(lines)
+        
+        # Add information about the bug
+        if "bugs" not in problem_state.code_context:
+            problem_state.code_context["bugs"] = []
+        
+        problem_state.code_context["bugs"].append({
+            "type": BugCategory.EDGE_CASE,
+            "line": start_idx + 1,
+            "description": f"Edge case bug introduced in function '{func_name}'"
+        })
+    
+    def _generate_description(self, problem_state: ProblemState) -> str:
+        """
+        Generate a description for the current problem state.
+        
+        Args:
+            problem_state: The problem state
+            
+        Returns:
+            A descriptive prompt for the problem
+        """
+        # Base description
+        bug_count = problem_state.code_context.get("bug_count", 0)
+        plural = "bugs" if bug_count != 1 else "bug"
+        
+        base_desc = (
+            f"Fix the {plural} in the code below. "
+            f"There {'are' if bug_count != 1 else 'is'} {bug_count} {plural} to find and fix."
+        )
+        
+        # Add information about bug categories
+        if "bug_categories" in problem_state.code_context:
+            categories = problem_state.code_context["bug_categories"]
+            if categories:
+                category_desc = ", ".join(categories)
+                base_desc += f"\n\nThe code contains the following types of issues: {category_desc}."
+        
+        # Add requirements
+        if problem_state.requirements:
+            base_desc += "\n\nRequirements:"
+            for i, req in enumerate(problem_state.requirements):
+                base_desc += f"\n{i+1}. {req['description']}"
+        
+        # Add difficulty level
+        difficulty_desc = "easy"
+        if problem_state.difficulty > 0.3 and problem_state.difficulty <= 0.6:
+            difficulty_desc = "moderate"
+        elif problem_state.difficulty > 0.6 and problem_state.difficulty <= 0.8:
+            difficulty_desc = "challenging"
+        elif problem_state.difficulty > 0.8:
+            difficulty_desc = "very challenging"
+        
+        base_desc += f"\n\nThis is a {difficulty_desc} bug fixing task."
+        
+        return base_desc
+
+
+# Default implementation of TestRunner for when no custom runner is provided
+class DefaultTestRunner:
+    """
+    Default test runner for evaluating solutions.
+    
+    This class runs tests against a solution file and collects the results.
+    """
+    
+    def run_tests(
+        self, 
+        solution_file: Path, 
+        test_files: List[Path],
+        code_context: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Run tests against a solution file.
+        
+        Args:
+            solution_file: Path to the solution file
+            test_files: List of test file paths
+            code_context: Additional context about the code
+            
+        Returns:
+            Dictionary containing test results
+        """
+        # Initialize results dictionary
+        results = {
+            "all_passed": True,
+            "passed_tests": 0,
+            "total_tests": 0,
+            "tests": {},
+            "execution": {
+                "success": True,
+                "error": None,
+                "stdout": "",
+                "stderr": ""
+            },
+            "execution_time": 0.0
+        }
+        
+        # Check if solution file exists
+        if not solution_file.exists():
+            results["execution"]["success"] = False
+            results["execution"]["error"] = f"Solution file not found: {solution_file}"
+            results["all_passed"] = False
+            return results
+        
+        # Try to import the solution module
+        try:
+            start_time = time.time()
+            
+            # Add solution directory to path
+            sys.path.insert(0, str(solution_file.parent))
+            
+            # Import the solution module
+            spec = importlib.util.spec_from_file_location(
+                "solution", solution_file)
+            solution_module = importlib.util.module_from_spec(spec)
+            spec.loader.exec_module(solution_module)
+            
+            # Remove the solution directory from path
+            sys.path.pop(0)
+            
+            # Record execution time
+            end_time = time.time()
+            results["execution_time"] = end_time - start_time
+            
+        except Exception as e:
+            results["execution"]["success"] = False
+            results["execution"]["error"] = str(e)
+            results["all_passed"] = False
+            return results
+        
+        # Run each test file
+        for test_file in test_files:
+            # Skip if the test file doesn't exist
+            if not test_file.exists():
+                continue
+            
+            try:
+                # Set up test loading
+                loader = unittest.TestLoader()
+                
+                # Add test directory to path
+                sys.path.insert(0, str(test_file.parent))
+                
+                # Capture stdout and stderr
+                stdout_buffer = io.StringIO()
+                stderr_buffer = io.StringIO()
+                
+                # Create a test suite from the test file
+                test_suite = loader.discover(
+                    str(test_file.parent),
+                    pattern=test_file.name
+                )
+                
+                # Count test cases
+                test_count = 0
+                for suite in test_suite:
+                    for test_case in suite:
+                        test_count += test_case.countTestCases()
+                
+                results["total_tests"] += test_count
+                
+                # Run the tests with captured output
+                with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
+                    test_runner = unittest.TextTestRunner(verbosity=2)
+                    test_result = test_runner.run(test_suite)
+                
+                # Get the captured output
+                stdout = stdout_buffer.getvalue()
+                stderr = stderr_buffer.getvalue()
+                
+                # Remove the test directory from path
+                sys.path.pop(0)
+                
+                # Check if all tests passed
+                if not test_result.wasSuccessful():
+                    results["all_passed"] = False
+                
+                # Count passed tests
+                passed_tests = test_count - len(test_result.failures) - len(test_result.errors)
+                results["passed_tests"] += passed_tests
+                
+                # Store individual test results
+                test_name = test_file.stem
+                results["tests"][test_name] = {
+                    "passed": test_result.wasSuccessful(),
+                    "failures": len(test_result.failures),
+                    "errors": len(test_result.errors),
+                    "skipped": len(test_result.skipped),
+                    "total": test_count,
+                    "passed_count": passed_tests,
+                    "stdout": stdout,
+                    "stderr": stderr
+                }
+                
+                # Store details for individual test failures
+                for failure in test_result.failures + test_result.errors:
+                    test_id = failure[0].id().split('.')[-1]
+                    failure_message = failure[1]
+                    
+                    # Try to extract expected and actual values
+                    expected_match = re.search(r'Expected\s*:(.+)', failure_message)
+                    actual_match = re.search(r'Actual\s*:(.+)', failure_message)
+                    
+                    expected = expected_match.group(1).strip() if expected_match else None
+                    actual = actual_match.group(1).strip() if actual_match else None
+                    
+                    if test_id not in results["tests"]:
+                        results["tests"][test_id] = {}
+                    
+                    results["tests"][test_id].update({
+                        "passed": False,
+                        "message": failure_message,
+                        "expected": expected,
+                        "actual": actual
+                    })
+                
+            except Exception as e:
+                # If there's an error in the test file itself
+                results["all_passed"] = False
+                test_name = test_file.stem
+                results["tests"][test_name] = {
+                    "passed": False,
+                    "error": str(e),
+                    "failures": 0,
+                    "errors": 1,
+                    "skipped": 0,
+                    "total": 1,
+                    "passed_count": 0
+                }
+                results["total_tests"] += 1
+        
+        return results