Spaces:

Agents-MCP-Hackathon
/

HF_RepoSense

Running

App Files Files Community

naman1102 commited on 13 days ago

Commit

fd7c5f8

1 Parent(s): 3330689

new_bot

Browse files

Files changed (3) hide show

analyzer.py +2 -2
app.py +9 -0
repo_explorer.py +331 -0

analyzer.py CHANGED Viewed

@@ -140,7 +140,7 @@ def analyze_code_chunk(code: str, user_requirements: str = "") -> str:
             {"role": "system", "content": chunk_prompt},
             {"role": "user", "content": code}
         ],
-        max_tokens=512,
         temperature=0.4
     )
     return response.choices[0].message.content
@@ -190,7 +190,7 @@ def analyze_combined_file(output_file="combined_repo.txt", user_requirements: st
     try:
         with open(output_file, "r", encoding="utf-8") as f:
             lines = f.readlines()
-        chunk_size = 500
         chunk_jsons = []
         for i in range(0, len(lines), chunk_size):
             chunk = "".join(lines[i:i+chunk_size])

             {"role": "system", "content": chunk_prompt},
             {"role": "user", "content": code}
         ],
         temperature=0.4
     )
     return response.choices[0].message.content
     try:
         with open(output_file, "r", encoding="utf-8") as f:
             lines = f.readlines()
+        chunk_size = 1200
         chunk_jsons = []
         for i in range(0, len(lines), chunk_size):
             chunk = "".join(lines[i:i+chunk_size])

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import os
 from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
 from hf_utils import download_space_repo, search_top_spaces
 from chatbot_page import chat_with_user, extract_keywords_from_conversation
 # --- Configuration ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -242,6 +243,8 @@ def create_ui() -> gr.Blocks:
         repo_ids_state = gr.State([])
         current_repo_idx_state = gr.State(0)
         user_requirements_state = gr.State("")  # Store user requirements from chatbot
         gr.Markdown(
             """
@@ -365,6 +368,9 @@ def create_ui() -> gr.Blocks:
                             interactive=False,
                             info="Current conversation status"
                         )
         # --- Footer ---
         gr.Markdown(
@@ -561,6 +567,9 @@ def create_ui() -> gr.Blocks:
             outputs=[repo_ids_state, current_repo_idx_state, df_output, status_box_analysis, tabs]
         )
     return app
 if __name__ == "__main__":

 from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
 from hf_utils import download_space_repo, search_top_spaces
 from chatbot_page import chat_with_user, extract_keywords_from_conversation
+from repo_explorer import create_repo_explorer_tab, setup_repo_explorer_events
 # --- Configuration ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
         repo_ids_state = gr.State([])
         current_repo_idx_state = gr.State(0)
         user_requirements_state = gr.State("")  # Store user requirements from chatbot
+        loaded_repo_content_state = gr.State("")  # Store loaded repository content
+        current_repo_id_state = gr.State("")  # Store current repository ID
         gr.Markdown(
             """
                             interactive=False,
                             info="Current conversation status"
                         )
+            # --- Repo Explorer Tab ---
+            repo_explorer_tab, repo_components, repo_states = create_repo_explorer_tab()
         # --- Footer ---
         gr.Markdown(
             outputs=[repo_ids_state, current_repo_idx_state, df_output, status_box_analysis, tabs]
         )
+        # Repo Explorer Tab
+        setup_repo_explorer_events(repo_components, repo_states)
     return app
 if __name__ == "__main__":

repo_explorer.py ADDED Viewed

	@@ -0,0 +1,331 @@

+import gradio as gr
+import os
+import logging
+from typing import List, Dict, Tuple
+from analyzer import combine_repo_files_for_llm
+from hf_utils import download_space_repo
+# Setup logger
+logger = logging.getLogger(__name__)
+def analyze_repo_chunk_for_context(chunk: str, repo_id: str) -> str:
+    """
+    Analyze a repository chunk to create conversational context for the chatbot.
+    This creates summaries focused on helping users understand the repository.
+    """
+    try:
+        from openai import OpenAI
+        client = OpenAI(api_key=os.getenv("modal_api"))
+        client.base_url = os.getenv("base_url")
+        context_prompt = f"""You are analyzing a chunk of code from the repository '{repo_id}' to create a conversational summary for a chatbot assistant.
+Create a concise but informative summary that helps understand:
+- What this code section does
+- Key functions, classes, or components
+- Important features or capabilities
+- How it relates to the overall repository purpose
+- Any notable patterns or technologies used
+Focus on information that would be useful for answering user questions about the repository.
+Repository chunk:
+{chunk}
+Provide a clear, conversational summary in 2-3 paragraphs:"""
+        response = client.chat.completions.create(
+            model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
+            messages=[
+                {"role": "system", "content": "You are an expert code analyst creating conversational summaries for a repository assistant chatbot."},
+                {"role": "user", "content": context_prompt}
+            ],
+            max_tokens=600,  # Increased for more detailed analysis with larger chunks
+            temperature=0.3
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        logger.error(f"Error analyzing chunk for context: {e}")
+        return f"Code section analysis unavailable: {e}"
+def create_repo_context_summary(repo_content: str, repo_id: str) -> str:
+    """
+    Create a comprehensive context summary by analyzing the repository in chunks.
+    Returns a detailed summary that the chatbot can use to answer questions.
+    """
+    try:
+        lines = repo_content.split('\n')
+        chunk_size = 1200  # Increased for better context and fewer API calls
+        chunk_summaries = []
+        logger.info(f"Analyzing repository {repo_id} in chunks for chatbot context")
+        for i in range(0, len(lines), chunk_size):
+            chunk = '\n'.join(lines[i:i+chunk_size])
+            if chunk.strip():  # Only analyze non-empty chunks
+                summary = analyze_repo_chunk_for_context(chunk, repo_id)
+                chunk_summaries.append(f"=== Section {len(chunk_summaries) + 1} ===\n{summary}")
+        # Create final comprehensive summary
+        try:
+            from openai import OpenAI
+            client = OpenAI(api_key=os.getenv("modal_api"))
+            client.base_url = os.getenv("base_url")
+            final_prompt = f"""Based on the following section summaries of repository '{repo_id}', create a comprehensive overview that a chatbot can use to answer user questions.
+Section Summaries:
+{chr(10).join(chunk_summaries)}
+Create a well-structured overview covering:
+1. Repository Purpose & Main Functionality
+2. Key Components & Architecture
+3. Important Features & Capabilities
+4. Technology Stack & Dependencies
+5. Usage Patterns & Examples
+Make this comprehensive but conversational - it will be used by a chatbot to answer user questions about the repository."""
+            response = client.chat.completions.create(
+                model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
+                messages=[
+                    {"role": "system", "content": "You are creating a comprehensive repository summary for a chatbot assistant."},
+                    {"role": "user", "content": final_prompt}
+                ],
+                max_tokens=1500,  # Increased for more comprehensive summaries
+                temperature=0.3
+            )
+            final_summary = response.choices[0].message.content
+            # Combine everything for the chatbot context
+            full_context = f"""=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===
+{final_summary}
+=== DETAILED SECTION SUMMARIES ===
+{chr(10).join(chunk_summaries)}"""
+            logger.info(f"Created comprehensive context summary for {repo_id}")
+            return full_context
+        except Exception as e:
+            logger.error(f"Error creating final summary: {e}")
+            # Fallback to just section summaries
+            return f"=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===\n\n" + '\n\n'.join(chunk_summaries)
+    except Exception as e:
+        logger.error(f"Error creating repo context summary: {e}")
+        return f"Repository analysis unavailable: {e}"
+def create_repo_explorer_tab() -> Tuple[gr.TabItem, Dict[str, gr.components.Component], Dict[str, gr.State]]:
+    """
+    Creates the Repo Explorer tab with all its components and returns the tab,
+    component references, and state variables.
+    """
+    # State variables for repo explorer
+    states = {
+        "repo_context_summary": gr.State(""),
+        "current_repo_id": gr.State("")
+    }
+    with gr.TabItem("🔍 Repo Explorer", id="repo_explorer_tab") as tab:
+        gr.Markdown("### 🗂️ Deep Dive into a Specific Repository")
+        with gr.Row():
+            with gr.Column(scale=2):
+                repo_explorer_input = gr.Textbox(
+                    label="📁 Repository ID",
+                    placeholder="microsoft/DialoGPT-medium",
+                    info="Enter a Hugging Face repository ID to explore"
+                )
+            with gr.Column(scale=1):
+                load_repo_btn = gr.Button("🚀 Load Repository", variant="primary", size="lg")
+        with gr.Row():
+            repo_status_display = gr.Textbox(
+                label="📊 Repository Status",
+                interactive=False,
+                lines=3,
+                info="Current repository loading status and basic info"
+            )
+        with gr.Row():
+            with gr.Column(scale=2):
+                repo_chatbot = gr.Chatbot(
+                    label="🤖 Repository Assistant",
+                    height=500,
+                    type="messages",
+                    avatar_images=(
+                        "https://cdn-icons-png.flaticon.com/512/149/149071.png",
+                        "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png"
+                    ),
+                    show_copy_button=True,
+                    info="Ask questions about the loaded repository"
+                )
+                with gr.Row():
+                    repo_msg_input = gr.Textbox(
+                        label="💭 Ask about this repository",
+                        placeholder="What does this repository do? How do I use it?",
+                        lines=1,
+                        scale=4,
+                        info="Ask anything about the loaded repository"
+                    )
+                    repo_send_btn = gr.Button("📤 Send", variant="primary", scale=1)
+            with gr.Column(scale=1):
+                repo_content_display = gr.Textbox(
+                    label="📄 Repository Content Preview",
+                    lines=25,
+                    interactive=False,
+                    show_copy_button=True,
+                    info="Preview of the repository files and content"
+                )
+    # Component references
+    components = {
+        "repo_explorer_input": repo_explorer_input,
+        "load_repo_btn": load_repo_btn,
+        "repo_status_display": repo_status_display,
+        "repo_chatbot": repo_chatbot,
+        "repo_msg_input": repo_msg_input,
+        "repo_send_btn": repo_send_btn,
+        "repo_content_display": repo_content_display
+    }
+    return tab, components, states
+def handle_load_repository(repo_id: str) -> Tuple[str, str, str]:
+    """Load a specific repository and prepare it for exploration with chunk-based analysis."""
+    if not repo_id.strip():
+        return "", "Status: Please enter a repository ID.", ""
+    try:
+        logger.info(f"Loading repository for exploration: {repo_id}")
+        # Download and combine repository files
+        download_space_repo(repo_id, local_dir="repo_files")
+        txt_path = combine_repo_files_for_llm()
+        with open(txt_path, "r", encoding="utf-8") as f:
+            repo_content = f.read()
+        # Create a preview (first 2000 characters)
+        preview = repo_content[:2000] + "..." if len(repo_content) > 2000 else repo_content
+        status = f"✅ Repository '{repo_id}' loaded successfully!\n📁 Files processed and ready for exploration.\n🔄 Analyzing repository in chunks for comprehensive context...\n💬 You can now ask questions about this repository."
+        # Create comprehensive context summary using chunk analysis
+        logger.info(f"Creating context summary for {repo_id}")
+        context_summary = create_repo_context_summary(repo_content, repo_id)
+        logger.info(f"Repository {repo_id} loaded and analyzed successfully for exploration")
+        return status, preview, context_summary
+    except Exception as e:
+        logger.error(f"Error loading repository {repo_id}: {e}")
+        error_status = f"❌ Error loading repository: {e}"
+        return error_status, "", ""
+def handle_repo_user_message(user_message: str, history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> Tuple[List[Dict[str, str]], str]:
+    """Handle user messages in the repo-specific chatbot."""
+    if not repo_context_summary.strip():
+        return history, ""
+    # Initialize with repository-specific welcome message if empty
+    if not history:
+        welcome_msg = f"Hello! I'm your assistant for the '{repo_id}' repository. I have analyzed all the files and created a comprehensive understanding of this repository. I'm ready to answer any questions about its functionality, usage, architecture, and more. What would you like to know?"
+        history = [{"role": "assistant", "content": welcome_msg}]
+    if user_message:
+        history.append({"role": "user", "content": user_message})
+    return history, ""
+def handle_repo_bot_response(history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> List[Dict[str, str]]:
+    """Generate bot response for repo-specific questions using comprehensive context."""
+    if not history or history[-1]["role"] != "user" or not repo_context_summary.strip():
+        return history
+    user_message = history[-1]["content"]
+    # Create a specialized prompt using the comprehensive context summary
+    repo_system_prompt = f"""You are an expert assistant for the Hugging Face repository '{repo_id}'.
+You have comprehensive knowledge about this repository based on detailed analysis of all its files and components.
+Use the following comprehensive analysis to answer user questions accurately and helpfully:
+{repo_context_summary}
+Instructions:
+- Answer questions clearly and conversationally about this specific repository
+- Reference specific components, functions, or features when relevant
+- Provide practical guidance on installation, usage, and implementation
+- If asked about code details, refer to the analysis above
+- Be helpful and informative while staying focused on this repository
+- If something isn't covered in the analysis, acknowledge the limitation
+Answer the user's question based on your comprehensive knowledge of this repository."""
+    try:
+        from openai import OpenAI
+        client = OpenAI(api_key=os.getenv("modal_api"))
+        client.base_url = os.getenv("base_url")
+        response = client.chat.completions.create(
+            model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
+            messages=[
+                {"role": "system", "content": repo_system_prompt},
+                {"role": "user", "content": user_message}
+            ],
+            max_tokens=1024,
+            temperature=0.7
+        )
+        bot_response = response.choices[0].message.content
+        history.append({"role": "assistant", "content": bot_response})
+    except Exception as e:
+        logger.error(f"Error generating repo bot response: {e}")
+        error_response = f"I apologize, but I encountered an error while processing your question: {e}"
+        history.append({"role": "assistant", "content": error_response})
+    return history
+def setup_repo_explorer_events(components: Dict[str, gr.components.Component], states: Dict[str, gr.State]):
+    """Setup event handlers for the repo explorer components."""
+    # Load repository event
+    components["load_repo_btn"].click(
+        fn=handle_load_repository,
+        inputs=[components["repo_explorer_input"]],
+        outputs=[components["repo_status_display"], components["repo_content_display"], states["repo_context_summary"]]
+    ).then(
+        fn=lambda repo_id: repo_id,
+        inputs=[components["repo_explorer_input"]],
+        outputs=[states["current_repo_id"]]
+    )
+    # Chat message submission events
+    components["repo_msg_input"].submit(
+        fn=handle_repo_user_message,
+        inputs=[components["repo_msg_input"], components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]],
+        outputs=[components["repo_chatbot"], components["repo_msg_input"]]
+    ).then(
+        fn=handle_repo_bot_response,
+        inputs=[components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]],
+        outputs=[components["repo_chatbot"]]
+    )
+    components["repo_send_btn"].click(
+        fn=handle_repo_user_message,
+        inputs=[components["repo_msg_input"], components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]],
+        outputs=[components["repo_chatbot"], components["repo_msg_input"]]
+    ).then(
+        fn=handle_repo_bot_response,
+        inputs=[components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]],
+        outputs=[components["repo_chatbot"]]
+    )