File size: 17,314 Bytes
4b4b93c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 |
import openai # Using the openai library for Nebius's OpenAI-compatible API
import os # For environment variables if not using config_loader directly here
import json
# Import API key and base URL from our config loader
from utils.config_loader import OPENAI_API_KEY
# Initialize the OpenAI client
client = None
if OPENAI_API_KEY:
try:
client = openai.OpenAI(
api_key=OPENAI_API_KEY
# No base_url needed for direct OpenAI
)
print("OpenAI client initialized successfully in llm_handler.")
except Exception as e:
print(f"Error initializing OpenAI client in llm_handler: {e}")
client = None
else:
print("WARNING (llm_handler): OPENAI_API_KEY not configured. LLM calls will fail.")
def get_simple_issue_suggestion(
issues_data: list[dict],
language: str,
target_count: int = 1,
model_name: str = "gpt-4o-mini", # Or your preferred model
additional_prompt_context: str = "" # NEW parameter
) -> str | None:
"""
Sends issue data to OpenAI API to suggest which one(s) might be best for a beginner.
"""
if not client:
print("LLM client (OpenAI) in get_simple_issue_suggestion is not initialized.")
return "LLM client (OpenAI) not initialized. Check API Key configuration."
if not issues_data:
print("No issues provided to LLM for suggestion.")
return "No issues provided to LLM for suggestion."
prompt_issues_str = "" # Rebuild this based on your existing logic
for i, issue in enumerate(issues_data):
snippet = issue.get('body_snippet', 'No description available.')
title = issue.get('title', 'No title')
url = issue.get('html_url', '#')
labels = ", ".join(issue.get('labels', [])) if issue.get('labels') else "No labels"
prompt_issues_str += (
f"\n--- Issue {i+1} ---\n"
f"Title: {title}\nURL: {url}\nLabels: {labels}\nSnippet from body: {snippet}\n-----------------\n"
)
system_prompt = (
"You are an expert assistant helping a new open-source contributor. "
"Your task is to analyze the provided list of GitHub issues and recommend "
f"the top {target_count} that would be most suitable for a beginner ideally in {language} (if specified and makes sense for the issues). "
"Consider factors like clarity, labels, and apparent scope. "
f"{additional_prompt_context}" # ADDED additional context here
" If the user-specified language seems mismatched with the provided issues, please make your best judgment "
"based on the issue content itself or note the potential mismatch in your recommendation."
)
user_prompt = (
# ... (user prompt construction as before, including prompt_issues_str) ...
f"Here is a list of GitHub issues found when searching for the language '{language}'. "
# (The additional_prompt_context is now in the system prompt)
f"Please review them and suggest the top {target_count} issue(s) that seem most suitable for a beginner. "
f"For each suggested issue, provide a concise explanation (1-2 sentences) stating *why* it's a good choice for a beginner. "
f"If you suggest an issue, please refer to it by its number (e.g., 'Issue 1')."
f"\nHere are the issues:\n{prompt_issues_str}"
)
temperature_val = 0.4
max_tokens_val = 200 + (target_count * 150)
top_p_val = 0.9 # Usually 1.0 for temperature-based sampling, or 0.9 if also using top_p
print(f"\nSending request to OpenAI LLM for issue suggestion...")
print(f"Model: {model_name}, Temp: {temperature_val}, MaxTokens: {max_tokens_val}")
try:
completion = client.chat.completions.create( # Ensure client is defined
model=model_name,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
# ... other params
temperature=0.4,
max_tokens=200 + (target_count * 150),
top_p=0.9
)
suggestion_text = completion.choices[0].message.content
print("OpenAI LLM Suggestion Received.")
return suggestion_text.strip()
except openai.APIConnectionError as e:
print(f"OpenAI API Connection Error: {e}")
return f"LLM suggestion failed due to connection error: {e}"
except openai.RateLimitError as e: # Good to handle this explicitly
print(f"OpenAI API Rate Limit Error: {e}")
return f"LLM suggestion failed due to rate limit: {e}. Check your OpenAI plan and usage."
except openai.AuthenticationError as e: # Added for bad API key
print(f"OpenAI API Authentication Error: {e}. Check your OPENAI_API_KEY.")
return f"LLM suggestion failed due to authentication error: {e}."
except openai.APIStatusError as e:
print(f"OpenAI API Status Error: Status {e.status_code} - Response: {e.response}")
return f"LLM suggestion failed due to API status error: {e.status_code}"
except Exception as e:
print(f"LLM API call to OpenAI failed with an unexpected error: {e}")
print(f"Type of error: {type(e)}")
return f"LLM suggestion failed with an unexpected error: {e}"
# --- NEW FUNCTION 1: Summarize Text Content ---
def summarize_text_content(
text_content: str,
purpose: str = "contribution guidelines", # e.g., "issue description", "documentation section"
max_summary_tokens: int = 200, # Adjust as needed
model_name: str = "gpt-4o-mini" # Or your preferred model
) -> str | None:
"""
Summarizes a given text content using an LLM.
"""
if not client:
print("ERROR (llm_handler.summarize_text_content): LLM client not initialized.")
return "LLM Client not initialized. Cannot summarize."
if not text_content or not text_content.strip():
print("Warning (llm_handler.summarize_text_content): No text content provided to summarize.")
return "No content provided for summarization."
# Heuristic: If text is already short, just return it or a small part.
# This avoids wasting API calls on tiny texts. (Count words approx)
if len(text_content.split()) < 75 : # Arbitrary threshold for "short"
print("Info (llm_handler.summarize_text_content): Content too short, returning as is or snippet.")
return f"The {purpose} document is brief: \"{text_content[:500]}...\"" if len(text_content) > 500 else text_content
system_prompt = (
f"You are an expert summarizer. Your task is to provide a concise summary of the following '{purpose}' document. "
"Focus on the most critical information a new contributor would need. "
"For contribution guidelines, highlight key setup steps, coding style conventions, testing requirements, and pull request procedures. "
"Keep the summary brief and actionable."
)
user_prompt = (
f"Please summarize the key points of the following {purpose} document:\n\n"
f"```text\n{text_content[:8000]}\n```" # Limit context sent to LLM
# Using 8000 characters as a rough limit to fit within context windows & manage cost.
# Adjust this based on typical CONTRIBUTING.md length and model context limits.
)
print(f"LLM Handler: Sending request to summarize {purpose}. Model: {model_name}")
try:
completion = client.chat.completions.create(
model=model_name,
messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
temperature=0.2, # Lower temperature for factual summarization
max_tokens=max_summary_tokens,
top_p=1.0
)
summary_text = completion.choices[0].message.content
print(f"LLM Handler: Summary for {purpose} received.")
return summary_text.strip()
except Exception as e:
print(f"ERROR (llm_handler.summarize_text_content): LLM API call failed: {e}")
return f"Could not summarize the {purpose}: LLM API error."
# --- NEW FUNCTION 2: Suggest Relevant Code Locations ---
def suggest_relevant_code_locations(
issue_snippet: str,
file_list: list[str],
language: str, # Language of the project
max_suggestion_tokens: int = 200, # Adjust as needed
model_name: str = "gpt-4o-mini" # Or your preferred model
) -> str | None:
"""
Suggests relevant files/folders based on an issue snippet and a list of files.
"""
if not client:
print("ERROR (llm_handler.suggest_relevant_code_locations): LLM client not initialized.")
return "LLM Client not initialized. Cannot suggest locations."
if not issue_snippet or not issue_snippet.strip():
return "No issue description provided to suggest locations."
if not file_list:
return "No file list provided to suggest locations from."
# Format file list for the prompt
formatted_file_list = "\n".join([f"- `{f}`" for f in file_list])
if not formatted_file_list: # Should not happen if file_list is not empty
formatted_file_list = "No files listed."
system_prompt = (
f"You are an AI assistant helping a software developer navigate a new '{language}' codebase. "
"Your goal is to identify potentially relevant files or folders for a given issue, based on a provided list of top-level project files/folders."
)
user_prompt = (
f"A developer is starting work on an issue with the following description snippet:\n"
f"'''\n{issue_snippet}\n'''\n\n"
f"The top-level files and folders available in the repository are:\n"
f"{formatted_file_list}\n\n"
f"Based *only* on the issue snippet and this file list, please suggest 2-3 files or folders that might be most relevant for investigating this issue. "
f"For each suggestion, provide a brief (1-sentence) explanation of why it might be relevant. "
f"If no files seem obviously relevant from the top-level list, say so."
)
print(f"LLM Handler: Sending request to suggest relevant code locations. Model: {model_name}")
try:
completion = client.chat.completions.create(
model=model_name,
messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
temperature=0.5, # Moderate temperature for some reasoning
max_tokens=max_suggestion_tokens,
top_p=1.0
)
suggestion_text = completion.choices[0].message.content
print("LLM Handler: Code location suggestions received.")
return suggestion_text.strip()
except Exception as e:
print(f"ERROR (llm_handler.suggest_relevant_code_locations): LLM API call failed: {e}")
return f"Could not suggest code locations: LLM API error."
def plan_onboarding_kit_components(
issue_data: dict,
language_searched: str,
model_name: str = "gpt-4.1-mini" # Or your preferred model
) -> dict | None:
"""
Uses an LLM to decide which onboarding kit components are most relevant for a given issue.
Returns a dictionary based on the LLM's JSON output.
"""
if not client:
print("ERROR (llm_handler.plan_kit): LLM client not initialized.")
return None # Or: {"error": "LLM Client not initialized"}
if not issue_data:
print("ERROR (llm_handler.plan_kit): No issue data provided for planning.")
return None # Or: {"error": "No issue data"}
issue_title = issue_data.get("title", "N/A")
issue_snippet = issue_data.get("body_snippet", "No description available.")
issue_labels = issue_data.get("labels", [])
# Define available kit components for the LLM to choose from
available_components = [
"repo_details_and_clone_command", # Basic repo info, clone command
"contribution_guidelines_link", # Link to CONTRIBUTING.md
"contribution_guidelines_summary_ai", # AI Summary of CONTRIBUTING.md
"repository_structure_modal_ai", # File listing via Modal + AI suggested files
# We could break down "repository_structure_modal_ai" further if needed:
# "repository_files_modal_raw_list",
# "ai_suggested_start_files_from_list"
]
components_description = (
"- repo_details_and_clone_command: Basic repository information and git clone command.\n"
"- contribution_guidelines_link: A direct link to the project's CONTRIBUTING.md file (if found).\n"
"- contribution_guidelines_summary_ai: An AI-generated summary of the key points from CONTRIBUTING.md.\n"
"- repository_structure_modal_ai: A top-level file/folder listing from a repository clone (via Modal), followed by AI suggestions for relevant files based on the issue."
)
system_prompt = (
"You are an expert onboarding assistant for open-source contributors. Your task is to intelligently plan "
"the components of an onboarding kit that would be most helpful for a developer tackling a specific GitHub issue. "
"You must respond ONLY with a valid JSON object containing a single key 'include_components' whose value is a list of strings, "
"where each string is one of the component names provided."
)
user_prompt = (
f"Based on the following GitHub issue details for a project searched under the language context '{language_searched}':\n"
f"Issue Title: \"{issue_title}\"\n"
f"Issue Snippet: \"{issue_snippet}\"\n"
f"Issue Labels: {issue_labels}\n\n"
f"And considering the following available onboarding kit components and their descriptions:\n"
f"{components_description}\n\n"
f"Which components should be included in the onboarding kit for this specific issue to be most helpful? "
f"For example, if the issue is a very simple documentation typo, a full 'repository_structure_modal_ai' might be overkill. "
f"If no contribution guidelines are typically found for a project, 'contribution_guidelines_summary_ai' would not be applicable. (You don't know this yet, but keep it in mind for general reasoning). "
f"Prioritize helpfulness for a beginner. Respond ONLY with a JSON object in the format: "
f"{{\"include_components\": [\"component_name_1\", \"component_name_2\", ...]}}"
)
print(f"LLM Handler (plan_kit): Sending request to plan kit components. Model: {model_name}")
try:
# Forcing JSON response mode if available and model supports it well
# gpt-4o-mini and newer gpt-3.5-turbo models usually handle "Respond ONLY with a valid JSON" well.
# For stronger enforcement, you can use response_format={"type": "json_object"} with compatible models.
completion_params = {
"model": model_name,
"messages": [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
"temperature": 0.2, # Low temperature for more deterministic structural output
"max_tokens": 200, # JSON output should be relatively small
"top_p": 1.0,
}
# Check if the model might be one that supports explicit JSON mode via response_format
if "gpt-4o" in model_name or "gpt-3.5-turbo-0125" in model_name or "gpt-3.5-turbo-1106" in model_name: # Add other compatible models if known
completion_params["response_format"] = {"type": "json_object"}
completion = client.chat.completions.create(**completion_params)
raw_response_content = completion.choices[0].message.content
print(f"LLM Handler (plan_kit): Raw JSON response received: {raw_response_content}")
# Attempt to parse the JSON
parsed_plan = json.loads(raw_response_content)
if "include_components" in parsed_plan and isinstance(parsed_plan["include_components"], list):
# Further validation: ensure all component names are valid (optional but good)
valid_components = [comp for comp in parsed_plan["include_components"] if comp in available_components]
if len(valid_components) != len(parsed_plan["include_components"]):
print("Warning (llm_handler.plan_kit): LLM returned some invalid component names.")
final_plan = {"include_components": valid_components}
print(f"LLM Handler (plan_kit): Parsed plan: {final_plan}")
return final_plan
else:
print("ERROR (llm_handler.plan_kit): LLM response was not in the expected JSON format (missing 'include_components' list).")
return {"error": "LLM response format error", "details": "Missing 'include_components' list."}
except json.JSONDecodeError as json_e:
print(f"ERROR (llm_handler.plan_kit): Failed to decode JSON from LLM response. Error: {json_e}. Response was: {raw_response_content}")
return {"error": "JSON decode error", "details": str(json_e), "raw_response": raw_response_content}
except Exception as e:
print(f"ERROR (llm_handler.plan_kit): LLM API call failed: {e}")
return {"error": f"LLM API call failed: {str(e)}"} |