Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import logging | |
| from typing import List, Dict, Tuple | |
| from analyzer import combine_repo_files_for_llm | |
| from hf_utils import download_filtered_space_files | |
| # Setup logger | |
| logger = logging.getLogger(__name__) | |
| def analyze_repo_chunk_for_context(chunk: str, repo_id: str) -> str: | |
| """ | |
| Analyze a repository chunk to create conversational context for the chatbot. | |
| This creates summaries focused on helping users understand the repository. | |
| """ | |
| try: | |
| from openai import OpenAI | |
| client = OpenAI(api_key=os.getenv("modal_api")) | |
| client.base_url = os.getenv("base_url") | |
| context_prompt = f"""You are analyzing a chunk of code from the repository '{repo_id}' to create a conversational summary for a chatbot assistant. | |
| Create a concise but informative summary that helps understand: | |
| - What this code section does | |
| - Key functions, classes, or components | |
| - Important features or capabilities | |
| - How it relates to the overall repository purpose | |
| - Any notable patterns or technologies used | |
| Focus on information that would be useful for answering user questions about the repository. | |
| Repository chunk: | |
| {chunk} | |
| Provide a clear, conversational summary in 2-3 paragraphs:""" | |
| response = client.chat.completions.create( | |
| model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", | |
| messages=[ | |
| {"role": "system", "content": "You are an expert code analyst creating conversational summaries for a repository assistant chatbot."}, | |
| {"role": "user", "content": context_prompt} | |
| ], | |
| max_tokens=600, # Increased for more detailed analysis with larger chunks | |
| temperature=0.3 | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| logger.error(f"Error analyzing chunk for context: {e}") | |
| return f"Code section analysis unavailable: {e}" | |
| def create_repo_context_summary(repo_content: str, repo_id: str) -> str: | |
| """ | |
| Create a comprehensive context summary by analyzing the repository in chunks. | |
| Returns a detailed summary that the chatbot can use to answer questions. | |
| """ | |
| try: | |
| lines = repo_content.split('\n') | |
| chunk_size = 1200 # Increased for better context and fewer API calls | |
| chunk_summaries = [] | |
| logger.info(f"Analyzing repository {repo_id} in chunks for chatbot context") | |
| for i in range(0, len(lines), chunk_size): | |
| chunk = '\n'.join(lines[i:i+chunk_size]) | |
| if chunk.strip(): # Only analyze non-empty chunks | |
| summary = analyze_repo_chunk_for_context(chunk, repo_id) | |
| chunk_summaries.append(f"=== Section {len(chunk_summaries) + 1} ===\n{summary}") | |
| # Create final comprehensive summary | |
| try: | |
| from openai import OpenAI | |
| client = OpenAI(api_key=os.getenv("modal_api")) | |
| client.base_url = os.getenv("base_url") | |
| final_prompt = f"""Based on the following section summaries of repository '{repo_id}', create a comprehensive overview that a chatbot can use to answer user questions. | |
| Section Summaries: | |
| {chr(10).join(chunk_summaries)} | |
| Create a well-structured overview covering: | |
| 1. Repository Purpose & Main Functionality | |
| 2. Key Components & Architecture | |
| 3. Important Features & Capabilities | |
| 4. Technology Stack & Dependencies | |
| 5. Usage Patterns & Examples | |
| Make this comprehensive but conversational - it will be used by a chatbot to answer user questions about the repository.""" | |
| response = client.chat.completions.create( | |
| model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", | |
| messages=[ | |
| {"role": "system", "content": "You are creating a comprehensive repository summary for a chatbot assistant."}, | |
| {"role": "user", "content": final_prompt} | |
| ], | |
| max_tokens=1500, # Increased for more comprehensive summaries | |
| temperature=0.3 | |
| ) | |
| final_summary = response.choices[0].message.content | |
| # Combine everything for the chatbot context | |
| full_context = f"""=== REPOSITORY ANALYSIS FOR {repo_id.upper()} === | |
| {final_summary} | |
| === DETAILED SECTION SUMMARIES === | |
| {chr(10).join(chunk_summaries)}""" | |
| logger.info(f"Created comprehensive context summary for {repo_id}") | |
| return full_context | |
| except Exception as e: | |
| logger.error(f"Error creating final summary: {e}") | |
| # Fallback to just section summaries | |
| return f"=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===\n\n" + '\n\n'.join(chunk_summaries) | |
| except Exception as e: | |
| logger.error(f"Error creating repo context summary: {e}") | |
| return f"Repository analysis unavailable: {e}" | |
| def create_repo_explorer_tab() -> Tuple[Dict[str, gr.components.Component], Dict[str, gr.State]]: | |
| """ | |
| Creates the Repo Explorer tab content and returns the component references and state variables. | |
| """ | |
| # State variables for repo explorer | |
| states = { | |
| "repo_context_summary": gr.State(""), | |
| "current_repo_id": gr.State("") | |
| } | |
| gr.Markdown("### 🗂️ Deep Dive into a Specific Repository") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| repo_explorer_input = gr.Textbox( | |
| label="📁 Repository ID", | |
| placeholder="microsoft/DialoGPT-medium", | |
| info="Enter a Hugging Face repository ID to explore" | |
| ) | |
| with gr.Column(scale=1): | |
| load_repo_btn = gr.Button("🚀 Load Repository", variant="primary", size="lg") | |
| with gr.Row(): | |
| repo_status_display = gr.Textbox( | |
| label="📊 Repository Status", | |
| interactive=False, | |
| lines=3, | |
| info="Current repository loading status and basic info" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| repo_chatbot = gr.Chatbot( | |
| label="🤖 Repository Assistant", | |
| height=400, | |
| type="messages", | |
| avatar_images=( | |
| "https://cdn-icons-png.flaticon.com/512/149/149071.png", | |
| "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png" | |
| ), | |
| show_copy_button=True, | |
| value=[{"role": "assistant", "content": "👋 Welcome to the Repository Explorer! \n\n🔍 **How to get started:**\n1. Enter a Hugging Face repository ID above (e.g., 'microsoft/DialoGPT-medium')\n2. Click '🚀 Load Repository' to download and analyze the repository\n3. Once loaded, I'll have comprehensive knowledge of all the files and can answer questions about:\n • What the repository does\n • How to install and use it\n • Code structure and architecture\n • Key features and capabilities\n • Examples and usage patterns\n\n💡 **Tip:** I analyze repositories in chunks to understand the entire codebase, not just a summary!\n\nPlease load a repository to begin exploring! 🚀"}] | |
| ) | |
| with gr.Row(): | |
| repo_msg_input = gr.Textbox( | |
| label="💭 Ask about this repository", | |
| placeholder="What does this repository do? How do I use it?", | |
| lines=1, | |
| scale=4, | |
| info="Ask anything about the loaded repository" | |
| ) | |
| repo_send_btn = gr.Button("📤 Send", variant="primary", scale=1) | |
| # with gr.Column(scale=1): | |
| # # Repository content preview | |
| # repo_content_display = gr.Textbox( | |
| # label="📄 Repository Content Preview", | |
| # lines=20, | |
| # show_copy_button=True, | |
| # interactive=False, | |
| # info="Overview of the loaded repository structure and content" | |
| # ) | |
| # Component references | |
| components = { | |
| "repo_explorer_input": repo_explorer_input, | |
| "load_repo_btn": load_repo_btn, | |
| "repo_status_display": repo_status_display, | |
| "repo_chatbot": repo_chatbot, | |
| "repo_msg_input": repo_msg_input, | |
| "repo_send_btn": repo_send_btn, | |
| # "repo_content_display": repo_content_display | |
| } | |
| return components, states | |
| def handle_load_repository(repo_id: str) -> Tuple[str, str]: | |
| """Load a specific repository and prepare it for exploration with chunk-based analysis.""" | |
| if not repo_id.strip(): | |
| return "Status: Please enter a repository ID.", "" | |
| try: | |
| logger.info(f"Loading repository for exploration: {repo_id}") | |
| # Download and process the repository | |
| try: | |
| download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt']) | |
| combined_text_path = combine_repo_files_for_llm() | |
| except Exception as e: | |
| logger.error(f"Error downloading repository {repo_id}: {e}") | |
| error_status = f"❌ Error downloading repository: {e}" | |
| return error_status, "" | |
| with open(combined_text_path, "r", encoding="utf-8") as f: | |
| repo_content = f.read() | |
| status = f"✅ Repository '{repo_id}' loaded successfully!\n📁 Files processed and ready for exploration.\n🔄 Analyzing repository in chunks for comprehensive context...\n💬 You can now ask questions about this repository." | |
| # Create comprehensive context summary using chunk analysis | |
| logger.info(f"Creating context summary for {repo_id}") | |
| context_summary = create_repo_context_summary(repo_content, repo_id) | |
| logger.info(f"Repository {repo_id} loaded and analyzed successfully for exploration") | |
| return status, context_summary | |
| except Exception as e: | |
| logger.error(f"Error loading repository {repo_id}: {e}") | |
| error_status = f"❌ Error loading repository: {e}" | |
| return error_status, "" | |
| def handle_repo_user_message(user_message: str, history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> Tuple[List[Dict[str, str]], str]: | |
| """Handle user messages in the repo-specific chatbot.""" | |
| if not repo_context_summary.strip(): | |
| return history, "" | |
| # Initialize with repository-specific welcome message if empty | |
| if not history: | |
| welcome_msg = f"Hello! I'm your assistant for the '{repo_id}' repository. I have analyzed all the files and created a comprehensive understanding of this repository. I'm ready to answer any questions about its functionality, usage, architecture, and more. What would you like to know?" | |
| history = [{"role": "assistant", "content": welcome_msg}] | |
| if user_message: | |
| history.append({"role": "user", "content": user_message}) | |
| return history, "" | |
| def handle_repo_bot_response(history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> List[Dict[str, str]]: | |
| """Generate bot response for repo-specific questions using comprehensive context.""" | |
| if not history or history[-1]["role"] != "user" or not repo_context_summary.strip(): | |
| return history | |
| user_message = history[-1]["content"] | |
| # Create a specialized prompt using the comprehensive context summary | |
| repo_system_prompt = f"""You are an expert assistant for the Hugging Face repository '{repo_id}'. | |
| You have comprehensive knowledge about this repository based on detailed analysis of all its files and components. | |
| Use the following comprehensive analysis to answer user questions accurately and helpfully: | |
| {repo_context_summary} | |
| Instructions: | |
| - Answer questions clearly and conversationally about this specific repository | |
| - Reference specific components, functions, or features when relevant | |
| - Provide practical guidance on installation, usage, and implementation | |
| - If asked about code details, refer to the analysis above | |
| - Be helpful and informative while staying focused on this repository | |
| - If something isn't covered in the analysis, acknowledge the limitation | |
| Answer the user's question based on your comprehensive knowledge of this repository.""" | |
| try: | |
| from openai import OpenAI | |
| client = OpenAI(api_key=os.getenv("modal_api")) | |
| client.base_url = os.getenv("base_url") | |
| response = client.chat.completions.create( | |
| model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", | |
| messages=[ | |
| {"role": "system", "content": repo_system_prompt}, | |
| {"role": "user", "content": user_message} | |
| ], | |
| max_tokens=1024, | |
| temperature=0.7 | |
| ) | |
| bot_response = response.choices[0].message.content | |
| history.append({"role": "assistant", "content": bot_response}) | |
| except Exception as e: | |
| logger.error(f"Error generating repo bot response: {e}") | |
| error_response = f"I apologize, but I encountered an error while processing your question: {e}" | |
| history.append({"role": "assistant", "content": error_response}) | |
| return history | |
| def setup_repo_explorer_events(components: Dict[str, gr.components.Component], states: Dict[str, gr.State]): | |
| """Setup event handlers for the repo explorer components.""" | |
| # Load repository event | |
| components["load_repo_btn"].click( | |
| fn=handle_load_repository, | |
| inputs=[components["repo_explorer_input"]], | |
| outputs=[components["repo_status_display"], states["repo_context_summary"]] | |
| ).then( | |
| fn=lambda repo_id: repo_id, | |
| inputs=[components["repo_explorer_input"]], | |
| outputs=[states["current_repo_id"]] | |
| ) | |
| # Chat message submission events | |
| components["repo_msg_input"].submit( | |
| fn=handle_repo_user_message, | |
| inputs=[components["repo_msg_input"], components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]], | |
| outputs=[components["repo_chatbot"], components["repo_msg_input"]] | |
| ).then( | |
| fn=handle_repo_bot_response, | |
| inputs=[components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]], | |
| outputs=[components["repo_chatbot"]] | |
| ) | |
| components["repo_send_btn"].click( | |
| fn=handle_repo_user_message, | |
| inputs=[components["repo_msg_input"], components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]], | |
| outputs=[components["repo_chatbot"], components["repo_msg_input"]] | |
| ).then( | |
| fn=handle_repo_bot_response, | |
| inputs=[components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]], | |
| outputs=[components["repo_chatbot"]] | |
| ) |