|
import openai |
|
import os |
|
import json |
|
|
|
from utils.config_loader import OPENAI_API_KEY |
|
|
|
|
|
client = None |
|
if OPENAI_API_KEY: |
|
try: |
|
client = openai.OpenAI( |
|
api_key=OPENAI_API_KEY |
|
|
|
) |
|
print("OpenAI client initialized successfully in llm_handler.") |
|
except Exception as e: |
|
print(f"Error initializing OpenAI client in llm_handler: {e}") |
|
client = None |
|
else: |
|
print("WARNING (llm_handler): OPENAI_API_KEY not configured. LLM calls will fail.") |
|
|
|
|
|
def get_simple_issue_suggestion( |
|
issues_data: list[dict], |
|
language: str, |
|
target_count: int = 1, |
|
model_name: str = "gpt-4o-mini", |
|
additional_prompt_context: str = "" |
|
) -> str | None: |
|
""" |
|
Sends issue data to OpenAI API to suggest which one(s) might be best for a beginner. |
|
""" |
|
if not client: |
|
print("LLM client (OpenAI) in get_simple_issue_suggestion is not initialized.") |
|
return "LLM client (OpenAI) not initialized. Check API Key configuration." |
|
if not issues_data: |
|
print("No issues provided to LLM for suggestion.") |
|
return "No issues provided to LLM for suggestion." |
|
|
|
prompt_issues_str = "" |
|
for i, issue in enumerate(issues_data): |
|
snippet = issue.get('body_snippet', 'No description available.') |
|
title = issue.get('title', 'No title') |
|
url = issue.get('html_url', '#') |
|
labels = ", ".join(issue.get('labels', [])) if issue.get('labels') else "No labels" |
|
prompt_issues_str += ( |
|
f"\n--- Issue {i+1} ---\n" |
|
f"Title: {title}\nURL: {url}\nLabels: {labels}\nSnippet from body: {snippet}\n-----------------\n" |
|
) |
|
|
|
system_prompt = ( |
|
"You are an expert assistant helping a new open-source contributor. " |
|
"Your task is to analyze the provided list of GitHub issues and recommend " |
|
f"the top {target_count} that would be most suitable for a beginner ideally in {language} (if specified and makes sense for the issues). " |
|
"Consider factors like clarity, labels, and apparent scope. " |
|
f"{additional_prompt_context}" |
|
" If the user-specified language seems mismatched with the provided issues, please make your best judgment " |
|
"based on the issue content itself or note the potential mismatch in your recommendation." |
|
) |
|
user_prompt = ( |
|
|
|
f"Here is a list of GitHub issues found when searching for the language '{language}'. " |
|
|
|
f"Please review them and suggest the top {target_count} issue(s) that seem most suitable for a beginner. " |
|
f"For each suggested issue, provide a concise explanation (1-2 sentences) stating *why* it's a good choice for a beginner. " |
|
f"If you suggest an issue, please refer to it by its number (e.g., 'Issue 1')." |
|
f"\nHere are the issues:\n{prompt_issues_str}" |
|
) |
|
|
|
temperature_val = 0.4 |
|
max_tokens_val = 200 + (target_count * 150) |
|
top_p_val = 0.9 |
|
|
|
print(f"\nSending request to OpenAI LLM for issue suggestion...") |
|
print(f"Model: {model_name}, Temp: {temperature_val}, MaxTokens: {max_tokens_val}") |
|
|
|
try: |
|
completion = client.chat.completions.create( |
|
model=model_name, |
|
messages=[ |
|
{"role": "system", "content": system_prompt}, |
|
{"role": "user", "content": user_prompt} |
|
], |
|
|
|
temperature=0.4, |
|
max_tokens=200 + (target_count * 150), |
|
top_p=0.9 |
|
) |
|
|
|
suggestion_text = completion.choices[0].message.content |
|
print("OpenAI LLM Suggestion Received.") |
|
return suggestion_text.strip() |
|
|
|
except openai.APIConnectionError as e: |
|
print(f"OpenAI API Connection Error: {e}") |
|
return f"LLM suggestion failed due to connection error: {e}" |
|
except openai.RateLimitError as e: |
|
print(f"OpenAI API Rate Limit Error: {e}") |
|
return f"LLM suggestion failed due to rate limit: {e}. Check your OpenAI plan and usage." |
|
except openai.AuthenticationError as e: |
|
print(f"OpenAI API Authentication Error: {e}. Check your OPENAI_API_KEY.") |
|
return f"LLM suggestion failed due to authentication error: {e}." |
|
except openai.APIStatusError as e: |
|
print(f"OpenAI API Status Error: Status {e.status_code} - Response: {e.response}") |
|
return f"LLM suggestion failed due to API status error: {e.status_code}" |
|
except Exception as e: |
|
print(f"LLM API call to OpenAI failed with an unexpected error: {e}") |
|
print(f"Type of error: {type(e)}") |
|
return f"LLM suggestion failed with an unexpected error: {e}" |
|
|
|
|
|
def summarize_text_content( |
|
text_content: str, |
|
purpose: str = "contribution guidelines", |
|
max_summary_tokens: int = 200, |
|
model_name: str = "gpt-4o-mini" |
|
) -> str | None: |
|
""" |
|
Summarizes a given text content using an LLM. |
|
""" |
|
if not client: |
|
print("ERROR (llm_handler.summarize_text_content): LLM client not initialized.") |
|
return "LLM Client not initialized. Cannot summarize." |
|
if not text_content or not text_content.strip(): |
|
print("Warning (llm_handler.summarize_text_content): No text content provided to summarize.") |
|
return "No content provided for summarization." |
|
|
|
|
|
|
|
if len(text_content.split()) < 75 : |
|
print("Info (llm_handler.summarize_text_content): Content too short, returning as is or snippet.") |
|
return f"The {purpose} document is brief: \"{text_content[:500]}...\"" if len(text_content) > 500 else text_content |
|
|
|
|
|
system_prompt = ( |
|
f"You are an expert summarizer. Your task is to provide a concise summary of the following '{purpose}' document. " |
|
"Focus on the most critical information a new contributor would need. " |
|
"For contribution guidelines, highlight key setup steps, coding style conventions, testing requirements, and pull request procedures. " |
|
"Keep the summary brief and actionable." |
|
) |
|
user_prompt = ( |
|
f"Please summarize the key points of the following {purpose} document:\n\n" |
|
f"```text\n{text_content[:8000]}\n```" |
|
|
|
|
|
) |
|
|
|
print(f"LLM Handler: Sending request to summarize {purpose}. Model: {model_name}") |
|
try: |
|
completion = client.chat.completions.create( |
|
model=model_name, |
|
messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}], |
|
temperature=0.2, |
|
max_tokens=max_summary_tokens, |
|
top_p=1.0 |
|
) |
|
summary_text = completion.choices[0].message.content |
|
print(f"LLM Handler: Summary for {purpose} received.") |
|
return summary_text.strip() |
|
except Exception as e: |
|
print(f"ERROR (llm_handler.summarize_text_content): LLM API call failed: {e}") |
|
return f"Could not summarize the {purpose}: LLM API error." |
|
|
|
|
|
def suggest_relevant_code_locations( |
|
issue_snippet: str, |
|
file_list: list[str], |
|
language: str, |
|
max_suggestion_tokens: int = 200, |
|
model_name: str = "gpt-4o-mini" |
|
) -> str | None: |
|
""" |
|
Suggests relevant files/folders based on an issue snippet and a list of files. |
|
""" |
|
if not client: |
|
print("ERROR (llm_handler.suggest_relevant_code_locations): LLM client not initialized.") |
|
return "LLM Client not initialized. Cannot suggest locations." |
|
if not issue_snippet or not issue_snippet.strip(): |
|
return "No issue description provided to suggest locations." |
|
if not file_list: |
|
return "No file list provided to suggest locations from." |
|
|
|
|
|
formatted_file_list = "\n".join([f"- `{f}`" for f in file_list]) |
|
if not formatted_file_list: |
|
formatted_file_list = "No files listed." |
|
|
|
system_prompt = ( |
|
f"You are an AI assistant helping a software developer navigate a new '{language}' codebase. " |
|
"Your goal is to identify potentially relevant files or folders for a given issue, based on a provided list of top-level project files/folders." |
|
) |
|
user_prompt = ( |
|
f"A developer is starting work on an issue with the following description snippet:\n" |
|
f"'''\n{issue_snippet}\n'''\n\n" |
|
f"The top-level files and folders available in the repository are:\n" |
|
f"{formatted_file_list}\n\n" |
|
f"Based *only* on the issue snippet and this file list, please suggest 2-3 files or folders that might be most relevant for investigating this issue. " |
|
f"For each suggestion, provide a brief (1-sentence) explanation of why it might be relevant. " |
|
f"If no files seem obviously relevant from the top-level list, say so." |
|
) |
|
|
|
print(f"LLM Handler: Sending request to suggest relevant code locations. Model: {model_name}") |
|
try: |
|
completion = client.chat.completions.create( |
|
model=model_name, |
|
messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}], |
|
temperature=0.5, |
|
max_tokens=max_suggestion_tokens, |
|
top_p=1.0 |
|
) |
|
suggestion_text = completion.choices[0].message.content |
|
print("LLM Handler: Code location suggestions received.") |
|
return suggestion_text.strip() |
|
except Exception as e: |
|
print(f"ERROR (llm_handler.suggest_relevant_code_locations): LLM API call failed: {e}") |
|
return f"Could not suggest code locations: LLM API error." |
|
|
|
def plan_onboarding_kit_components( |
|
issue_data: dict, |
|
language_searched: str, |
|
model_name: str = "gpt-4.1-mini" |
|
) -> dict | None: |
|
""" |
|
Uses an LLM to decide which onboarding kit components are most relevant for a given issue. |
|
Returns a dictionary based on the LLM's JSON output. |
|
""" |
|
if not client: |
|
print("ERROR (llm_handler.plan_kit): LLM client not initialized.") |
|
return None |
|
if not issue_data: |
|
print("ERROR (llm_handler.plan_kit): No issue data provided for planning.") |
|
return None |
|
|
|
issue_title = issue_data.get("title", "N/A") |
|
issue_snippet = issue_data.get("body_snippet", "No description available.") |
|
issue_labels = issue_data.get("labels", []) |
|
|
|
|
|
available_components = [ |
|
"repo_details_and_clone_command", |
|
"contribution_guidelines_link", |
|
"contribution_guidelines_summary_ai", |
|
"repository_structure_modal_ai", |
|
|
|
|
|
|
|
] |
|
components_description = ( |
|
"- repo_details_and_clone_command: Basic repository information and git clone command.\n" |
|
"- contribution_guidelines_link: A direct link to the project's CONTRIBUTING.md file (if found).\n" |
|
"- contribution_guidelines_summary_ai: An AI-generated summary of the key points from CONTRIBUTING.md.\n" |
|
"- repository_structure_modal_ai: A top-level file/folder listing from a repository clone (via Modal), followed by AI suggestions for relevant files based on the issue." |
|
) |
|
|
|
system_prompt = ( |
|
"You are an expert onboarding assistant for open-source contributors. Your task is to intelligently plan " |
|
"the components of an onboarding kit that would be most helpful for a developer tackling a specific GitHub issue. " |
|
"You must respond ONLY with a valid JSON object containing a single key 'include_components' whose value is a list of strings, " |
|
"where each string is one of the component names provided." |
|
) |
|
user_prompt = ( |
|
f"Based on the following GitHub issue details for a project searched under the language context '{language_searched}':\n" |
|
f"Issue Title: \"{issue_title}\"\n" |
|
f"Issue Snippet: \"{issue_snippet}\"\n" |
|
f"Issue Labels: {issue_labels}\n\n" |
|
f"And considering the following available onboarding kit components and their descriptions:\n" |
|
f"{components_description}\n\n" |
|
f"Which components should be included in the onboarding kit for this specific issue to be most helpful? " |
|
f"For example, if the issue is a very simple documentation typo, a full 'repository_structure_modal_ai' might be overkill. " |
|
f"If no contribution guidelines are typically found for a project, 'contribution_guidelines_summary_ai' would not be applicable. (You don't know this yet, but keep it in mind for general reasoning). " |
|
f"Prioritize helpfulness for a beginner. Respond ONLY with a JSON object in the format: " |
|
f"{{\"include_components\": [\"component_name_1\", \"component_name_2\", ...]}}" |
|
) |
|
|
|
print(f"LLM Handler (plan_kit): Sending request to plan kit components. Model: {model_name}") |
|
try: |
|
|
|
|
|
|
|
completion_params = { |
|
"model": model_name, |
|
"messages": [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}], |
|
"temperature": 0.2, |
|
"max_tokens": 200, |
|
"top_p": 1.0, |
|
} |
|
|
|
if "gpt-4o" in model_name or "gpt-3.5-turbo-0125" in model_name or "gpt-3.5-turbo-1106" in model_name: |
|
completion_params["response_format"] = {"type": "json_object"} |
|
|
|
|
|
completion = client.chat.completions.create(**completion_params) |
|
|
|
raw_response_content = completion.choices[0].message.content |
|
print(f"LLM Handler (plan_kit): Raw JSON response received: {raw_response_content}") |
|
|
|
|
|
parsed_plan = json.loads(raw_response_content) |
|
if "include_components" in parsed_plan and isinstance(parsed_plan["include_components"], list): |
|
|
|
valid_components = [comp for comp in parsed_plan["include_components"] if comp in available_components] |
|
if len(valid_components) != len(parsed_plan["include_components"]): |
|
print("Warning (llm_handler.plan_kit): LLM returned some invalid component names.") |
|
|
|
final_plan = {"include_components": valid_components} |
|
print(f"LLM Handler (plan_kit): Parsed plan: {final_plan}") |
|
return final_plan |
|
else: |
|
print("ERROR (llm_handler.plan_kit): LLM response was not in the expected JSON format (missing 'include_components' list).") |
|
return {"error": "LLM response format error", "details": "Missing 'include_components' list."} |
|
|
|
except json.JSONDecodeError as json_e: |
|
print(f"ERROR (llm_handler.plan_kit): Failed to decode JSON from LLM response. Error: {json_e}. Response was: {raw_response_content}") |
|
return {"error": "JSON decode error", "details": str(json_e), "raw_response": raw_response_content} |
|
except Exception as e: |
|
print(f"ERROR (llm_handler.plan_kit): LLM API call failed: {e}") |
|
return {"error": f"LLM API call failed: {str(e)}"} |