Spaces:

Agents-MCP-Hackathon
/

ContribNavigator

Running

File size: 17,314 Bytes

4b4b93c

import openai # Using the openai library for Nebius's OpenAI-compatible API
import os     # For environment variables if not using config_loader directly here
import json
# Import API key and base URL from our config loader
from utils.config_loader import OPENAI_API_KEY

# Initialize the OpenAI client
client = None
if OPENAI_API_KEY:
    try:
        client = openai.OpenAI(
            api_key=OPENAI_API_KEY
            # No base_url needed for direct OpenAI
        )
        print("OpenAI client initialized successfully in llm_handler.")
    except Exception as e:
        print(f"Error initializing OpenAI client in llm_handler: {e}")
        client = None
else:
    print("WARNING (llm_handler): OPENAI_API_KEY not configured. LLM calls will fail.")


def get_simple_issue_suggestion(
        issues_data: list[dict],
        language: str,
        target_count: int = 1,
        model_name: str = "gpt-4o-mini", # Or your preferred model
        additional_prompt_context: str = "" # NEW parameter
    ) -> str | None:
    """
    Sends issue data to OpenAI API to suggest which one(s) might be best for a beginner.
    """
    if not client:
        print("LLM client (OpenAI) in get_simple_issue_suggestion is not initialized.")
        return "LLM client (OpenAI) not initialized. Check API Key configuration."
    if not issues_data:
        print("No issues provided to LLM for suggestion.")
        return "No issues provided to LLM for suggestion."

    prompt_issues_str = "" # Rebuild this based on your existing logic
    for i, issue in enumerate(issues_data):
        snippet = issue.get('body_snippet', 'No description available.')
        title = issue.get('title', 'No title')
        url = issue.get('html_url', '#')
        labels = ", ".join(issue.get('labels', [])) if issue.get('labels') else "No labels"
        prompt_issues_str += (
            f"\n--- Issue {i+1} ---\n"
            f"Title: {title}\nURL: {url}\nLabels: {labels}\nSnippet from body: {snippet}\n-----------------\n"
        )

    system_prompt = (
        "You are an expert assistant helping a new open-source contributor. "
        "Your task is to analyze the provided list of GitHub issues and recommend "
        f"the top {target_count} that would be most suitable for a beginner ideally in {language} (if specified and makes sense for the issues). "
        "Consider factors like clarity, labels, and apparent scope. "
        f"{additional_prompt_context}" # ADDED additional context here
        " If the user-specified language seems mismatched with the provided issues, please make your best judgment "
        "based on the issue content itself or note the potential mismatch in your recommendation."
    )
    user_prompt = (
        # ... (user prompt construction as before, including prompt_issues_str) ...
        f"Here is a list of GitHub issues found when searching for the language '{language}'. "
        # (The additional_prompt_context is now in the system prompt)
        f"Please review them and suggest the top {target_count} issue(s) that seem most suitable for a beginner. "
        f"For each suggested issue, provide a concise explanation (1-2 sentences) stating *why* it's a good choice for a beginner. "
        f"If you suggest an issue, please refer to it by its number (e.g., 'Issue 1')."
        f"\nHere are the issues:\n{prompt_issues_str}"
    )

    temperature_val = 0.4
    max_tokens_val = 200 + (target_count * 150)
    top_p_val = 0.9 # Usually 1.0 for temperature-based sampling, or 0.9 if also using top_p

    print(f"\nSending request to OpenAI LLM for issue suggestion...")
    print(f"Model: {model_name}, Temp: {temperature_val}, MaxTokens: {max_tokens_val}")

    try:
        completion = client.chat.completions.create( # Ensure client is defined
            model=model_name,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            # ... other params
            temperature=0.4,
            max_tokens=200 + (target_count * 150),
            top_p=0.9
        )

        suggestion_text = completion.choices[0].message.content
        print("OpenAI LLM Suggestion Received.")
        return suggestion_text.strip()

    except openai.APIConnectionError as e:
        print(f"OpenAI API Connection Error: {e}")
        return f"LLM suggestion failed due to connection error: {e}"
    except openai.RateLimitError as e: # Good to handle this explicitly
        print(f"OpenAI API Rate Limit Error: {e}")
        return f"LLM suggestion failed due to rate limit: {e}. Check your OpenAI plan and usage."
    except openai.AuthenticationError as e: # Added for bad API key
        print(f"OpenAI API Authentication Error: {e}. Check your OPENAI_API_KEY.")
        return f"LLM suggestion failed due to authentication error: {e}."
    except openai.APIStatusError as e:
        print(f"OpenAI API Status Error: Status {e.status_code} - Response: {e.response}")
        return f"LLM suggestion failed due to API status error: {e.status_code}"
    except Exception as e:
        print(f"LLM API call to OpenAI failed with an unexpected error: {e}")
        print(f"Type of error: {type(e)}")
        return f"LLM suggestion failed with an unexpected error: {e}"

# --- NEW FUNCTION 1: Summarize Text Content ---
def summarize_text_content(
        text_content: str,
        purpose: str = "contribution guidelines", # e.g., "issue description", "documentation section"
        max_summary_tokens: int = 200, # Adjust as needed
        model_name: str = "gpt-4o-mini" # Or your preferred model
    ) -> str | None:
    """
    Summarizes a given text content using an LLM.
    """
    if not client:
        print("ERROR (llm_handler.summarize_text_content): LLM client not initialized.")
        return "LLM Client not initialized. Cannot summarize."
    if not text_content or not text_content.strip():
        print("Warning (llm_handler.summarize_text_content): No text content provided to summarize.")
        return "No content provided for summarization."

    # Heuristic: If text is already short, just return it or a small part.
    # This avoids wasting API calls on tiny texts. (Count words approx)
    if len(text_content.split()) < 75 : # Arbitrary threshold for "short"
        print("Info (llm_handler.summarize_text_content): Content too short, returning as is or snippet.")
        return f"The {purpose} document is brief: \"{text_content[:500]}...\"" if len(text_content) > 500 else text_content


    system_prompt = (
        f"You are an expert summarizer. Your task is to provide a concise summary of the following '{purpose}' document. "
        "Focus on the most critical information a new contributor would need. "
        "For contribution guidelines, highlight key setup steps, coding style conventions, testing requirements, and pull request procedures. "
        "Keep the summary brief and actionable."
    )
    user_prompt = (
        f"Please summarize the key points of the following {purpose} document:\n\n"
        f"```text\n{text_content[:8000]}\n```" # Limit context sent to LLM
        # Using 8000 characters as a rough limit to fit within context windows & manage cost.
        # Adjust this based on typical CONTRIBUTING.md length and model context limits.
    )

    print(f"LLM Handler: Sending request to summarize {purpose}. Model: {model_name}")
    try:
        completion = client.chat.completions.create(
            model=model_name,
            messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
            temperature=0.2, # Lower temperature for factual summarization
            max_tokens=max_summary_tokens,
            top_p=1.0
        )
        summary_text = completion.choices[0].message.content
        print(f"LLM Handler: Summary for {purpose} received.")
        return summary_text.strip()
    except Exception as e:
        print(f"ERROR (llm_handler.summarize_text_content): LLM API call failed: {e}")
        return f"Could not summarize the {purpose}: LLM API error."

# --- NEW FUNCTION 2: Suggest Relevant Code Locations ---
def suggest_relevant_code_locations(
        issue_snippet: str,
        file_list: list[str],
        language: str, # Language of the project
        max_suggestion_tokens: int = 200, # Adjust as needed
        model_name: str = "gpt-4o-mini" # Or your preferred model
    ) -> str | None:
    """
    Suggests relevant files/folders based on an issue snippet and a list of files.
    """
    if not client:
        print("ERROR (llm_handler.suggest_relevant_code_locations): LLM client not initialized.")
        return "LLM Client not initialized. Cannot suggest locations."
    if not issue_snippet or not issue_snippet.strip():
        return "No issue description provided to suggest locations."
    if not file_list:
        return "No file list provided to suggest locations from."

    # Format file list for the prompt
    formatted_file_list = "\n".join([f"- `{f}`" for f in file_list])
    if not formatted_file_list: # Should not happen if file_list is not empty
        formatted_file_list = "No files listed."

    system_prompt = (
        f"You are an AI assistant helping a software developer navigate a new '{language}' codebase. "
        "Your goal is to identify potentially relevant files or folders for a given issue, based on a provided list of top-level project files/folders."
    )
    user_prompt = (
        f"A developer is starting work on an issue with the following description snippet:\n"
        f"'''\n{issue_snippet}\n'''\n\n"
        f"The top-level files and folders available in the repository are:\n"
        f"{formatted_file_list}\n\n"
        f"Based *only* on the issue snippet and this file list, please suggest 2-3 files or folders that might be most relevant for investigating this issue. "
        f"For each suggestion, provide a brief (1-sentence) explanation of why it might be relevant. "
        f"If no files seem obviously relevant from the top-level list, say so."
    )

    print(f"LLM Handler: Sending request to suggest relevant code locations. Model: {model_name}")
    try:
        completion = client.chat.completions.create(
            model=model_name,
            messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
            temperature=0.5, # Moderate temperature for some reasoning
            max_tokens=max_suggestion_tokens,
            top_p=1.0
        )
        suggestion_text = completion.choices[0].message.content
        print("LLM Handler: Code location suggestions received.")
        return suggestion_text.strip()
    except Exception as e:
        print(f"ERROR (llm_handler.suggest_relevant_code_locations): LLM API call failed: {e}")
        return f"Could not suggest code locations: LLM API error."

def plan_onboarding_kit_components(
        issue_data: dict,
        language_searched: str,
        model_name: str = "gpt-4.1-mini" # Or your preferred model
    ) -> dict | None:
    """
    Uses an LLM to decide which onboarding kit components are most relevant for a given issue.
    Returns a dictionary based on the LLM's JSON output.
    """
    if not client:
        print("ERROR (llm_handler.plan_kit): LLM client not initialized.")
        return None # Or: {"error": "LLM Client not initialized"}
    if not issue_data:
        print("ERROR (llm_handler.plan_kit): No issue data provided for planning.")
        return None # Or: {"error": "No issue data"}

    issue_title = issue_data.get("title", "N/A")
    issue_snippet = issue_data.get("body_snippet", "No description available.")
    issue_labels = issue_data.get("labels", [])

    # Define available kit components for the LLM to choose from
    available_components = [
        "repo_details_and_clone_command",      # Basic repo info, clone command
        "contribution_guidelines_link",        # Link to CONTRIBUTING.md
        "contribution_guidelines_summary_ai",  # AI Summary of CONTRIBUTING.md
        "repository_structure_modal_ai",       # File listing via Modal + AI suggested files
        # We could break down "repository_structure_modal_ai" further if needed:
        # "repository_files_modal_raw_list",
        # "ai_suggested_start_files_from_list"
    ]
    components_description = (
        "- repo_details_and_clone_command: Basic repository information and git clone command.\n"
        "- contribution_guidelines_link: A direct link to the project's CONTRIBUTING.md file (if found).\n"
        "- contribution_guidelines_summary_ai: An AI-generated summary of the key points from CONTRIBUTING.md.\n"
        "- repository_structure_modal_ai: A top-level file/folder listing from a repository clone (via Modal), followed by AI suggestions for relevant files based on the issue."
    )

    system_prompt = (
        "You are an expert onboarding assistant for open-source contributors. Your task is to intelligently plan "
        "the components of an onboarding kit that would be most helpful for a developer tackling a specific GitHub issue. "
        "You must respond ONLY with a valid JSON object containing a single key 'include_components' whose value is a list of strings, "
        "where each string is one of the component names provided."
    )
    user_prompt = (
        f"Based on the following GitHub issue details for a project searched under the language context '{language_searched}':\n"
        f"Issue Title: \"{issue_title}\"\n"
        f"Issue Snippet: \"{issue_snippet}\"\n"
        f"Issue Labels: {issue_labels}\n\n"
        f"And considering the following available onboarding kit components and their descriptions:\n"
        f"{components_description}\n\n"
        f"Which components should be included in the onboarding kit for this specific issue to be most helpful? "
        f"For example, if the issue is a very simple documentation typo, a full 'repository_structure_modal_ai' might be overkill. "
        f"If no contribution guidelines are typically found for a project, 'contribution_guidelines_summary_ai' would not be applicable. (You don't know this yet, but keep it in mind for general reasoning). "
        f"Prioritize helpfulness for a beginner. Respond ONLY with a JSON object in the format: "
        f"{{\"include_components\": [\"component_name_1\", \"component_name_2\", ...]}}"
    )

    print(f"LLM Handler (plan_kit): Sending request to plan kit components. Model: {model_name}")
    try:
        # Forcing JSON response mode if available and model supports it well
        # gpt-4o-mini and newer gpt-3.5-turbo models usually handle "Respond ONLY with a valid JSON" well.
        # For stronger enforcement, you can use response_format={"type": "json_object"} with compatible models.
        completion_params = {
            "model": model_name,
            "messages": [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
            "temperature": 0.2, # Low temperature for more deterministic structural output
            "max_tokens": 200, # JSON output should be relatively small
            "top_p": 1.0,
        }
        # Check if the model might be one that supports explicit JSON mode via response_format
        if "gpt-4o" in model_name or "gpt-3.5-turbo-0125" in model_name or "gpt-3.5-turbo-1106" in model_name: # Add other compatible models if known
             completion_params["response_format"] = {"type": "json_object"}


        completion = client.chat.completions.create(**completion_params)
        
        raw_response_content = completion.choices[0].message.content
        print(f"LLM Handler (plan_kit): Raw JSON response received: {raw_response_content}")

        # Attempt to parse the JSON
        parsed_plan = json.loads(raw_response_content)
        if "include_components" in parsed_plan and isinstance(parsed_plan["include_components"], list):
            # Further validation: ensure all component names are valid (optional but good)
            valid_components = [comp for comp in parsed_plan["include_components"] if comp in available_components]
            if len(valid_components) != len(parsed_plan["include_components"]):
                print("Warning (llm_handler.plan_kit): LLM returned some invalid component names.")
            
            final_plan = {"include_components": valid_components}
            print(f"LLM Handler (plan_kit): Parsed plan: {final_plan}")
            return final_plan
        else:
            print("ERROR (llm_handler.plan_kit): LLM response was not in the expected JSON format (missing 'include_components' list).")
            return {"error": "LLM response format error", "details": "Missing 'include_components' list."}

    except json.JSONDecodeError as json_e:
        print(f"ERROR (llm_handler.plan_kit): Failed to decode JSON from LLM response. Error: {json_e}. Response was: {raw_response_content}")
        return {"error": "JSON decode error", "details": str(json_e), "raw_response": raw_response_content}
    except Exception as e:
        print(f"ERROR (llm_handler.plan_kit): LLM API call failed: {e}")
        return {"error": f"LLM API call failed: {str(e)}"}