import openai # Using the openai library for Nebius's OpenAI-compatible API import os # For environment variables if not using config_loader directly here import json # Import API key and base URL from our config loader from utils.config_loader import OPENAI_API_KEY # Initialize the OpenAI client client = None if OPENAI_API_KEY: try: client = openai.OpenAI( api_key=OPENAI_API_KEY # No base_url needed for direct OpenAI ) print("OpenAI client initialized successfully in llm_handler.") except Exception as e: print(f"Error initializing OpenAI client in llm_handler: {e}") client = None else: print("WARNING (llm_handler): OPENAI_API_KEY not configured. LLM calls will fail.") def get_simple_issue_suggestion( issues_data: list[dict], language: str, target_count: int = 1, model_name: str = "gpt-4o-mini", # Or your preferred model additional_prompt_context: str = "" # NEW parameter ) -> str | None: """ Sends issue data to OpenAI API to suggest which one(s) might be best for a beginner. """ if not client: print("LLM client (OpenAI) in get_simple_issue_suggestion is not initialized.") return "LLM client (OpenAI) not initialized. Check API Key configuration." if not issues_data: print("No issues provided to LLM for suggestion.") return "No issues provided to LLM for suggestion." prompt_issues_str = "" # Rebuild this based on your existing logic for i, issue in enumerate(issues_data): snippet = issue.get('body_snippet', 'No description available.') title = issue.get('title', 'No title') url = issue.get('html_url', '#') labels = ", ".join(issue.get('labels', [])) if issue.get('labels') else "No labels" prompt_issues_str += ( f"\n--- Issue {i+1} ---\n" f"Title: {title}\nURL: {url}\nLabels: {labels}\nSnippet from body: {snippet}\n-----------------\n" ) system_prompt = ( "You are an expert assistant helping a new open-source contributor. " "Your task is to analyze the provided list of GitHub issues and recommend " f"the top {target_count} that would be most suitable for a beginner ideally in {language} (if specified and makes sense for the issues). " "Consider factors like clarity, labels, and apparent scope. " f"{additional_prompt_context}" # ADDED additional context here " If the user-specified language seems mismatched with the provided issues, please make your best judgment " "based on the issue content itself or note the potential mismatch in your recommendation." ) user_prompt = ( # ... (user prompt construction as before, including prompt_issues_str) ... f"Here is a list of GitHub issues found when searching for the language '{language}'. " # (The additional_prompt_context is now in the system prompt) f"Please review them and suggest the top {target_count} issue(s) that seem most suitable for a beginner. " f"For each suggested issue, provide a concise explanation (1-2 sentences) stating *why* it's a good choice for a beginner. " f"If you suggest an issue, please refer to it by its number (e.g., 'Issue 1')." f"\nHere are the issues:\n{prompt_issues_str}" ) temperature_val = 0.4 max_tokens_val = 200 + (target_count * 150) top_p_val = 0.9 # Usually 1.0 for temperature-based sampling, or 0.9 if also using top_p print(f"\nSending request to OpenAI LLM for issue suggestion...") print(f"Model: {model_name}, Temp: {temperature_val}, MaxTokens: {max_tokens_val}") try: completion = client.chat.completions.create( # Ensure client is defined model=model_name, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ], # ... other params temperature=0.4, max_tokens=200 + (target_count * 150), top_p=0.9 ) suggestion_text = completion.choices[0].message.content print("OpenAI LLM Suggestion Received.") return suggestion_text.strip() except openai.APIConnectionError as e: print(f"OpenAI API Connection Error: {e}") return f"LLM suggestion failed due to connection error: {e}" except openai.RateLimitError as e: # Good to handle this explicitly print(f"OpenAI API Rate Limit Error: {e}") return f"LLM suggestion failed due to rate limit: {e}. Check your OpenAI plan and usage." except openai.AuthenticationError as e: # Added for bad API key print(f"OpenAI API Authentication Error: {e}. Check your OPENAI_API_KEY.") return f"LLM suggestion failed due to authentication error: {e}." except openai.APIStatusError as e: print(f"OpenAI API Status Error: Status {e.status_code} - Response: {e.response}") return f"LLM suggestion failed due to API status error: {e.status_code}" except Exception as e: print(f"LLM API call to OpenAI failed with an unexpected error: {e}") print(f"Type of error: {type(e)}") return f"LLM suggestion failed with an unexpected error: {e}" # --- NEW FUNCTION 1: Summarize Text Content --- def summarize_text_content( text_content: str, purpose: str = "contribution guidelines", # e.g., "issue description", "documentation section" max_summary_tokens: int = 200, # Adjust as needed model_name: str = "gpt-4o-mini" # Or your preferred model ) -> str | None: """ Summarizes a given text content using an LLM. """ if not client: print("ERROR (llm_handler.summarize_text_content): LLM client not initialized.") return "LLM Client not initialized. Cannot summarize." if not text_content or not text_content.strip(): print("Warning (llm_handler.summarize_text_content): No text content provided to summarize.") return "No content provided for summarization." # Heuristic: If text is already short, just return it or a small part. # This avoids wasting API calls on tiny texts. (Count words approx) if len(text_content.split()) < 75 : # Arbitrary threshold for "short" print("Info (llm_handler.summarize_text_content): Content too short, returning as is or snippet.") return f"The {purpose} document is brief: \"{text_content[:500]}...\"" if len(text_content) > 500 else text_content system_prompt = ( f"You are an expert summarizer. Your task is to provide a concise summary of the following '{purpose}' document. " "Focus on the most critical information a new contributor would need. " "For contribution guidelines, highlight key setup steps, coding style conventions, testing requirements, and pull request procedures. " "Keep the summary brief and actionable." ) user_prompt = ( f"Please summarize the key points of the following {purpose} document:\n\n" f"```text\n{text_content[:8000]}\n```" # Limit context sent to LLM # Using 8000 characters as a rough limit to fit within context windows & manage cost. # Adjust this based on typical CONTRIBUTING.md length and model context limits. ) print(f"LLM Handler: Sending request to summarize {purpose}. Model: {model_name}") try: completion = client.chat.completions.create( model=model_name, messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}], temperature=0.2, # Lower temperature for factual summarization max_tokens=max_summary_tokens, top_p=1.0 ) summary_text = completion.choices[0].message.content print(f"LLM Handler: Summary for {purpose} received.") return summary_text.strip() except Exception as e: print(f"ERROR (llm_handler.summarize_text_content): LLM API call failed: {e}") return f"Could not summarize the {purpose}: LLM API error." # --- NEW FUNCTION 2: Suggest Relevant Code Locations --- def suggest_relevant_code_locations( issue_snippet: str, file_list: list[str], language: str, # Language of the project max_suggestion_tokens: int = 200, # Adjust as needed model_name: str = "gpt-4o-mini" # Or your preferred model ) -> str | None: """ Suggests relevant files/folders based on an issue snippet and a list of files. """ if not client: print("ERROR (llm_handler.suggest_relevant_code_locations): LLM client not initialized.") return "LLM Client not initialized. Cannot suggest locations." if not issue_snippet or not issue_snippet.strip(): return "No issue description provided to suggest locations." if not file_list: return "No file list provided to suggest locations from." # Format file list for the prompt formatted_file_list = "\n".join([f"- `{f}`" for f in file_list]) if not formatted_file_list: # Should not happen if file_list is not empty formatted_file_list = "No files listed." system_prompt = ( f"You are an AI assistant helping a software developer navigate a new '{language}' codebase. " "Your goal is to identify potentially relevant files or folders for a given issue, based on a provided list of top-level project files/folders." ) user_prompt = ( f"A developer is starting work on an issue with the following description snippet:\n" f"'''\n{issue_snippet}\n'''\n\n" f"The top-level files and folders available in the repository are:\n" f"{formatted_file_list}\n\n" f"Based *only* on the issue snippet and this file list, please suggest 2-3 files or folders that might be most relevant for investigating this issue. " f"For each suggestion, provide a brief (1-sentence) explanation of why it might be relevant. " f"If no files seem obviously relevant from the top-level list, say so." ) print(f"LLM Handler: Sending request to suggest relevant code locations. Model: {model_name}") try: completion = client.chat.completions.create( model=model_name, messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}], temperature=0.5, # Moderate temperature for some reasoning max_tokens=max_suggestion_tokens, top_p=1.0 ) suggestion_text = completion.choices[0].message.content print("LLM Handler: Code location suggestions received.") return suggestion_text.strip() except Exception as e: print(f"ERROR (llm_handler.suggest_relevant_code_locations): LLM API call failed: {e}") return f"Could not suggest code locations: LLM API error." def plan_onboarding_kit_components( issue_data: dict, language_searched: str, model_name: str = "gpt-4.1-mini" # Or your preferred model ) -> dict | None: """ Uses an LLM to decide which onboarding kit components are most relevant for a given issue. Returns a dictionary based on the LLM's JSON output. """ if not client: print("ERROR (llm_handler.plan_kit): LLM client not initialized.") return None # Or: {"error": "LLM Client not initialized"} if not issue_data: print("ERROR (llm_handler.plan_kit): No issue data provided for planning.") return None # Or: {"error": "No issue data"} issue_title = issue_data.get("title", "N/A") issue_snippet = issue_data.get("body_snippet", "No description available.") issue_labels = issue_data.get("labels", []) # Define available kit components for the LLM to choose from available_components = [ "repo_details_and_clone_command", # Basic repo info, clone command "contribution_guidelines_link", # Link to CONTRIBUTING.md "contribution_guidelines_summary_ai", # AI Summary of CONTRIBUTING.md "repository_structure_modal_ai", # File listing via Modal + AI suggested files # We could break down "repository_structure_modal_ai" further if needed: # "repository_files_modal_raw_list", # "ai_suggested_start_files_from_list" ] components_description = ( "- repo_details_and_clone_command: Basic repository information and git clone command.\n" "- contribution_guidelines_link: A direct link to the project's CONTRIBUTING.md file (if found).\n" "- contribution_guidelines_summary_ai: An AI-generated summary of the key points from CONTRIBUTING.md.\n" "- repository_structure_modal_ai: A top-level file/folder listing from a repository clone (via Modal), followed by AI suggestions for relevant files based on the issue." ) system_prompt = ( "You are an expert onboarding assistant for open-source contributors. Your task is to intelligently plan " "the components of an onboarding kit that would be most helpful for a developer tackling a specific GitHub issue. " "You must respond ONLY with a valid JSON object containing a single key 'include_components' whose value is a list of strings, " "where each string is one of the component names provided." ) user_prompt = ( f"Based on the following GitHub issue details for a project searched under the language context '{language_searched}':\n" f"Issue Title: \"{issue_title}\"\n" f"Issue Snippet: \"{issue_snippet}\"\n" f"Issue Labels: {issue_labels}\n\n" f"And considering the following available onboarding kit components and their descriptions:\n" f"{components_description}\n\n" f"Which components should be included in the onboarding kit for this specific issue to be most helpful? " f"For example, if the issue is a very simple documentation typo, a full 'repository_structure_modal_ai' might be overkill. " f"If no contribution guidelines are typically found for a project, 'contribution_guidelines_summary_ai' would not be applicable. (You don't know this yet, but keep it in mind for general reasoning). " f"Prioritize helpfulness for a beginner. Respond ONLY with a JSON object in the format: " f"{{\"include_components\": [\"component_name_1\", \"component_name_2\", ...]}}" ) print(f"LLM Handler (plan_kit): Sending request to plan kit components. Model: {model_name}") try: # Forcing JSON response mode if available and model supports it well # gpt-4o-mini and newer gpt-3.5-turbo models usually handle "Respond ONLY with a valid JSON" well. # For stronger enforcement, you can use response_format={"type": "json_object"} with compatible models. completion_params = { "model": model_name, "messages": [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}], "temperature": 0.2, # Low temperature for more deterministic structural output "max_tokens": 200, # JSON output should be relatively small "top_p": 1.0, } # Check if the model might be one that supports explicit JSON mode via response_format if "gpt-4o" in model_name or "gpt-3.5-turbo-0125" in model_name or "gpt-3.5-turbo-1106" in model_name: # Add other compatible models if known completion_params["response_format"] = {"type": "json_object"} completion = client.chat.completions.create(**completion_params) raw_response_content = completion.choices[0].message.content print(f"LLM Handler (plan_kit): Raw JSON response received: {raw_response_content}") # Attempt to parse the JSON parsed_plan = json.loads(raw_response_content) if "include_components" in parsed_plan and isinstance(parsed_plan["include_components"], list): # Further validation: ensure all component names are valid (optional but good) valid_components = [comp for comp in parsed_plan["include_components"] if comp in available_components] if len(valid_components) != len(parsed_plan["include_components"]): print("Warning (llm_handler.plan_kit): LLM returned some invalid component names.") final_plan = {"include_components": valid_components} print(f"LLM Handler (plan_kit): Parsed plan: {final_plan}") return final_plan else: print("ERROR (llm_handler.plan_kit): LLM response was not in the expected JSON format (missing 'include_components' list).") return {"error": "LLM response format error", "details": "Missing 'include_components' list."} except json.JSONDecodeError as json_e: print(f"ERROR (llm_handler.plan_kit): Failed to decode JSON from LLM response. Error: {json_e}. Response was: {raw_response_content}") return {"error": "JSON decode error", "details": str(json_e), "raw_response": raw_response_content} except Exception as e: print(f"ERROR (llm_handler.plan_kit): LLM API call failed: {e}") return {"error": f"LLM API call failed: {str(e)}"}