# core/kit_generator.py from .github_client import get_repository_details, get_file_url_from_repo, get_file_content from .modal_processor import get_repo_file_listing_via_modal from .llm_handler import summarize_text_content, suggest_relevant_code_locations # --- Helper function to get common repo info needed by multiple sections --- def _get_common_repo_info(issue_data: dict) -> tuple[str | None, str | None, str]: """Extracts/derives repo_full_name, repo_api_url, and default_branch_name.""" repo_html_url = issue_data.get("repository_html_url", "#") repo_api_url = issue_data.get("repository_api_url") repo_full_name = None if repo_html_url and repo_html_url.startswith("https://github.com/"): parts = repo_html_url.split('/') if len(parts) >= 5: repo_full_name = f"{parts[3]}/{parts[4]}" branch_from_api = None default_branch_name = "main (assumed)" # Fallback if repo_full_name: # Use the direct repo_api_url from issue_data if available and valid current_repo_api_url = repo_api_url if not current_repo_api_url or not current_repo_api_url.startswith("https://api.github.com/repos/"): # If not valid, construct it from repo_full_name current_repo_api_url = f"https://api.github.com/repos/{repo_full_name}" print(f"Kit Generator (_get_common_repo_info): Constructed repo_api_url: {current_repo_api_url}") repo_details = get_repository_details(current_repo_api_url) if repo_details and repo_details.get("default_branch"): branch_from_api = repo_details.get("default_branch") default_branch_name = branch_from_api return repo_full_name, branch_from_api, default_branch_name # --- Helper functions for generating individual kit sections --- def _generate_repo_details_section(issue_data: dict, default_branch_name: str) -> str: issue_title = issue_data.get("title", "N/A") issue_html_url = issue_data.get("html_url", "#") repo_html_url = issue_data.get("repository_html_url", "#") # Ensure .git suffix for clone command displayed to user clone_url_display = repo_html_url if repo_html_url.endswith(".git") else repo_html_url + ".git" repo_name_for_cd = repo_html_url.split('/')[-1] if repo_html_url != "#" else "repository-name" return f""" ## 🔗 Issue Details - **Issue Link:** [{issue_title}]({issue_html_url}) - **Repository:** [{repo_html_url}]({repo_html_url}) ## 🛠️ Initial Setup Guide 1. **Clone the Repository:** Open your terminal and run the following command to clone the repository to your local machine: ```bash git clone {clone_url_display} ``` *(Note: Ensure you have Git installed. The repository might use a different default branch name than '{default_branch_name}'.)* 2. **Navigate into the Project Directory:** ```bash cd {repo_name_for_cd} ``` *(This assumes the directory name matches the repository name. Adjust if needed.)* """ def _generate_contribution_guidelines_section( repo_full_name: str | None, branch_from_api: str | None ) -> str: section_title = "## 📖 Contribution Guidelines\nIt's highly recommended to read the project's contribution guidelines before you start coding.\n" guidelines_link_markdown = "- **Guidelines Link:** _Could not find contribution guidelines in common locations._" summary_markdown = "" if repo_full_name: contributing_paths_to_check = [ "CONTRIBUTING.md", ".github/CONTRIBUTING.md", "docs/CONTRIBUTING.md", "CONTRIBUTING.rst", ".github/CONTRIBUTING.rst", "CONTRIBUTING" ] found_contrib_display_url = get_file_url_from_repo(repo_full_name, contributing_paths_to_check, default_branch=branch_from_api) if found_contrib_display_url: guidelines_link_markdown = f"- **Guidelines Link:** [{found_contrib_display_url}]({found_contrib_display_url})" path_that_worked = None for p in contributing_paths_to_check: # Try to infer path for content fetching if p.lower() in found_contrib_display_url.lower(): path_that_worked = p break if path_that_worked: print(f"Kit Generator (_contrib_guidelines): Found guidelines at '{path_that_worked}'. Fetching content...") contrib_content_text = get_file_content(repo_full_name, path_that_worked, branch=branch_from_api) if contrib_content_text: print("Kit Generator (_contrib_guidelines): Content fetched. Requesting LLM summary...") summary = summarize_text_content(contrib_content_text, purpose="contribution guidelines") if summary and "LLM Client not initialized" not in summary and "LLM API error" not in summary and "No content provided" not in summary: summary_markdown = f"\n\n**Key Takeaways (AI Summary):**\n{summary}" else: summary_markdown = "\n\n_AI summary for contribution guidelines could not be generated at this time._" print(f"Kit Generator (_contrib_guidelines): LLM summary failed or returned error: {summary}") else: summary_markdown = "\n\n_Could not fetch content of contribution guidelines for AI summary._" else: summary_markdown = "\n\n_Could not determine specific path of contribution file for AI summary, but a link was found._" return f"{section_title}{guidelines_link_markdown}{summary_markdown}" def _generate_modal_repo_structure_section( issue_data: dict, # Contains repo_html_url and issue_body_snippet language_searched: str ) -> str: section_title = "## 📂 Quick Look: Repository Structure (via Modal)\n" modal_file_listing_text = "_Could not retrieve repository file listing at this time._" ai_suggested_files_text = "" repo_html_url = issue_data.get("repository_html_url", "#") issue_body_snippet = issue_data.get("body_snippet", "No issue description snippet available.") if repo_html_url and repo_html_url != "#": print(f"Kit Generator (_modal_structure): Requesting file listing for '{repo_html_url}' via Modal...") clone_url_for_modal = repo_html_url if repo_html_url.endswith(".git") else repo_html_url + ".git" modal_response = get_repo_file_listing_via_modal(clone_url_for_modal) if modal_response and modal_response.get("status") == "success": files_from_modal = modal_response.get("files", []) if files_from_modal: max_files_to_display = 15 file_list_items = [f"- `{item}`" for item in files_from_modal[:max_files_to_display]] if len(files_from_modal) > max_files_to_display: file_list_items.append(f"- ... and {len(files_from_modal) - max_files_to_display} more.") modal_file_listing_text = ("Here's a quick look at some top-level files and folders:\n" + "\n".join(file_list_items)) print("Kit Generator (_modal_structure): Sending file list and issue snippet to LLM for relevant file suggestions.") ai_suggestions = suggest_relevant_code_locations( issue_snippet=issue_body_snippet, file_list=files_from_modal, language=language_searched ) if ai_suggestions and "LLM Client not initialized" not in ai_suggestions and "LLM API error" not in ai_suggestions: ai_suggested_files_text = f"\n\n**💡 AI Suggested Starting Points (based on issue & file list):**\n{ai_suggestions}" else: ai_suggested_files_text = "\n\n_AI could not suggest specific files to start with for this issue at this time._" else: modal_file_listing_text = "_Repository cloned successfully via Modal, but no files were found at the top level._" elif modal_response: modal_file_listing_text = f"_Could not retrieve repository file listing via Modal: {modal_response.get('message', 'Unknown error from Modal')}_" else: modal_file_listing_text = "_Repository URL not available to fetch file listing._" return f"{section_title}{modal_file_listing_text}{ai_suggested_files_text}" # --- Main New Orchestrating Function --- def generate_kit_from_plan( issue_data: dict, language_searched: str, components_to_include: list[str] ) -> str: """ Generates Markdown content for an onboarding kit for a given issue, based on a plan specifying which components to include. """ if not issue_data: return "Error: No issue data provided to generate kit." if not components_to_include: return "Error: No components specified for kit generation plan." print(f"Kit Generator (plan): Starting kit generation with components: {components_to_include}") # Fetch common repo info once repo_full_name, branch_from_api, default_branch_name = _get_common_repo_info(issue_data) # Header for the kit issue_title = issue_data.get("title", "N/A") kit_header = f""" # 👋 Onboarding Kit for: {issue_title} Congratulations on choosing this issue! Here's some information to help you get started. """ markdown_parts = [kit_header.strip()] # Generate sections based on the plan if "repo_details_and_clone_command" in components_to_include: print("Kit Generator (plan): Adding repo details and clone command.") markdown_parts.append(_generate_repo_details_section(issue_data, default_branch_name)) # For guidelines, we handle link and summary together if summary is requested # The LLM planner should ideally request "contribution_guidelines_summary_ai" which implies needing the link. # If only "contribution_guidelines_link" is requested, we can adapt. generate_guidelines_link = "contribution_guidelines_link" in components_to_include generate_guidelines_summary = "contribution_guidelines_summary_ai" in components_to_include if generate_guidelines_link or generate_guidelines_summary: print("Kit Generator (plan): Adding contribution guidelines section (link and/or summary).") # The helper function _generate_contribution_guidelines_section now handles both # and will only generate summary if content is fetched. # We can refine it to only fetch/summarize if generate_guidelines_summary is true. # For now, let's make the helper smarter or adjust the planner. # # Simplified: Let _generate_contribution_guidelines_section do its thing. # If only link was asked, it will still try to get content for summary but summary_markdown will be empty if not asked. # This needs further refinement if we want to strictly adhere to planner *not* doing summary if not asked. # # Let's adjust the helper to accept a flag for summary. # For now, the existing _generate_contribution_guidelines_section will attempt summary if content is found. # We will rely on the LLM planner to be smart. If it asks for summary, it implies link is also useful. guidelines_section_md = _generate_contribution_guidelines_section(repo_full_name, branch_from_api) # Temporary fix: if only link is requested, strip summary if it was generated. # This is a bit hacky, better to pass a flag to the helper. if generate_guidelines_link and not generate_guidelines_summary: if "**Key Takeaways (AI Summary):**" in guidelines_section_md: guidelines_section_md = guidelines_section_md.split("**Key Takeaways (AI Summary):**")[0].strip() markdown_parts.append(guidelines_section_md) if "repository_structure_modal_ai" in components_to_include: print("Kit Generator (plan): Adding repository structure (Modal) and AI file suggestions.") markdown_parts.append(_generate_modal_repo_structure_section(issue_data, language_searched)) # Footer markdown_parts.append("\nHappy contributing! Remember to communicate with the project maintainers if you have questions.") return "\n\n".join(markdown_parts).strip() # --- Keep your old generate_basic_kit_content for now, or comment out/remove if fully replaced --- # def generate_basic_kit_content(issue_data: dict, language_searched: str) -> str: # # ... your previous full implementation ... # # This will eventually be replaced by calls to generate_kit_from_plan # print("WARNING: generate_basic_kit_content is called, should be generate_kit_from_plan") # # For now, let it be, app.py will call the new one once updated. # # To avoid breaking app.py immediately, we can have it call the new function with a default plan. default_plan_for_basic = [ "repo_details_and_clone_command", "contribution_guidelines_link", # Basic just wants the link # "contribution_guidelines_summary_ai", # Not in "basic" "repository_structure_modal_ai" # Basic had the modal files and AI suggestions ] return generate_kit_from_plan(issue_data, language_searched, default_plan_for_basic)