File size: 8,803 Bytes
e8a0a6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 |
# recursive_swe_bench/models/base_model.py
from typing import Any, Dict, List, Optional, Union
import logging
import time
from abc import ABC, abstractmethod
class ModelInterface(ABC):
"""
Base interface for models that can be evaluated using Recursive-SWE-bench.
This abstract class defines the core functionality required for a model to
be evaluated using the recursive evaluation framework. Concrete implementations
must provide the actual model-specific logic.
"""
def __init__(self, model_identifier: str, config: Optional[Dict[str, Any]] = None):
"""
Initialize the model interface.
Args:
model_identifier: Identifier for the model
config: Configuration options
"""
self.model_identifier = model_identifier
self.config = config or {}
self.logger = self._setup_logger()
def _setup_logger(self) -> logging.Logger:
"""Set up logging for the model."""
logger = logging.getLogger(f"Model.{self.model_identifier}")
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(self.config.get("log_level", logging.INFO))
return logger
@abstractmethod
def solve(self, problem: Dict[str, Any], history: Optional[List[Dict[str, Any]]] = None) -> str:
"""
Generate a solution for the given problem.
Args:
problem: The problem to solve
history: Optional history of previous solution attempts
Returns:
The generated solution
"""
pass
@abstractmethod
def get_meta_information(self) -> Dict[str, Any]:
"""
Get meta information about the model.
Returns:
Dictionary containing model information
"""
pass
# recursive_swe_bench/models/openai.py
import openai
import json
import backoff
from typing import Any, Dict, List, Optional, Union
from recursive_swe_bench.models.base_model import ModelInterface
class OpenAIModel(ModelInterface):
"""
Integration with OpenAI models (GPT-3.5, GPT-4, etc.).
This class provides integration with OpenAI's API for evaluating
models like GPT-3.5 and GPT-4 with Recursive-SWE-bench.
"""
def __init__(
self,
model_identifier: str,
api_key: Optional[str] = None,
config: Optional[Dict[str, Any]] = None
):
"""
Initialize the OpenAI model interface.
Args:
model_identifier: OpenAI model identifier (e.g., "gpt-4", "gpt-3.5-turbo")
api_key: OpenAI API key (optional if set in environment)
config: Additional configuration options
"""
super().__init__(model_identifier, config)
# Set API key if provided
if api_key:
openai.api_key = api_key
# Load default prompts or use config-provided ones
self.prompts = self.config.get("prompts", {
"system": "You are an expert programmer tasked with fixing bugs in code. Fix the code based on the description and tests.",
"user_template": "# Bug Fixing Task\n\n{description}\n\n# Code\n```python\n{code}\n```\n\n{tests_description}\n\n# Your task\nFix the bugs in the code above. Provide only the corrected code without any explanations.",
})
# Configure API parameters
self.api_params = self.config.get("api_params", {
"temperature": 0.2,
"max_tokens": 2000,
"top_p": 0.95,
"frequency_penalty": 0,
"presence_penalty": 0,
})
self.logger.info(f"Initialized OpenAI model: {model_identifier}")
@backoff.on_exception(
backoff.expo,
(openai.error.RateLimitError, openai.error.ServiceUnavailableError, openai.error.APIError),
max_tries=5
)
def solve(
self,
problem: Dict[str, Any],
history: Optional[List[Dict[str, Any]]] = None
) -> str:
"""
Generate a solution using the OpenAI model.
Args:
problem: The problem to solve
history: Optional history of previous solution attempts
Returns:
The generated solution
"""
self.logger.info(f"Solving problem with OpenAI model: {self.model_identifier}")
start_time = time.time()
# Format the problem for the model
messages = self._format_messages(problem, history)
# Make API call
response = openai.ChatCompletion.create(
model=self.model_identifier,
messages=messages,
**self.api_params
)
# Extract the solution from the response
solution = response.choices[0].message.content.strip()
end_time = time.time()
self.logger.info(f"Solution generated in {end_time - start_time:.2f} seconds")
return self._extract_code(solution)
def _format_messages(
self,
problem: Dict[str, Any],
history: Optional[List[Dict[str, Any]]] = None
) -> List[Dict[str, str]]:
"""
Format the problem and history into messages for the OpenAI API.
Args:
problem: The problem to solve
history: Optional history of previous solution attempts
Returns:
List of formatted messages
"""
messages = [
{"role": "system", "content": self.prompts["system"]}
]
# Format the user message
code = problem["code_context"]["code"]
# Prepare tests description
tests_description = "# Tests\n"
if "tests" in problem["code_context"]:
tests_description += "The code must pass the following tests:\n\n"
for i, test in enumerate(problem["code_context"]["tests"]):
tests_description += f"## Test {i+1}: {test['name']}\n```python\n{test['content']}\n```\n\n"
else:
tests_description += "The code must work correctly according to its intended functionality."
# Create the user message using the template
user_content = self.prompts["user_template"].format(
description=problem["description"],
code=code,
tests_description=tests_description
)
messages.append({"role": "user", "content": user_content})
# Add history if available
if history and self.config.get("include_history", True):
for entry in history:
# Add previous attempt
messages.append({
"role": "assistant",
"content": entry["solution"]
})
# Add feedback on previous attempt
feedback_content = f"Your solution has the following issues:\n"
for issue in entry["feedback"]["issues"]:
feedback_content += f"- {issue['message']}\n"
feedback_content += "\nPlease try again with these improvements:\n"
for suggestion in entry["feedback"]["suggestions"]:
feedback_content += f"- {suggestion['message']}\n"
messages.append({
"role": "user",
"content": feedback_content
})
return messages
def _extract_code(self, text: str) -> str:
"""
Extract code from the model's response.
Args:
text: The model's response
Returns:
Extracted code
"""
# Try to extract code from markdown code blocks
import re
code_blocks = re.findall(r'```(?:python)?\s*(.*?)\s*```', text, re.DOTALL)
if code_blocks:
return code_blocks[0].strip()
# If no code blocks, return the full text (it might be just code)
return text.strip()
def get_meta_information(self) -> Dict[str, Any]:
"""
Get meta information about the model.
Returns:
Dictionary containing model information
"""
return {
"model_name": self.model_identifier,
"provider": "OpenAI",
"type": "API",
"parameters": self.api_params,
"system_prompt": self.prompts["system"]
}
|