HF_RepoSense / repo_explorer.py
naman1102's picture
hi
953203e
raw
history blame
14.6 kB
import gradio as gr
import os
import logging
from typing import List, Dict, Tuple
from analyzer import combine_repo_files_for_llm
from hf_utils import download_space_repo
# Setup logger
logger = logging.getLogger(__name__)
def analyze_repo_chunk_for_context(chunk: str, repo_id: str) -> str:
"""
Analyze a repository chunk to create conversational context for the chatbot.
This creates summaries focused on helping users understand the repository.
"""
try:
from openai import OpenAI
client = OpenAI(api_key=os.getenv("modal_api"))
client.base_url = os.getenv("base_url")
context_prompt = f"""You are analyzing a chunk of code from the repository '{repo_id}' to create a conversational summary for a chatbot assistant.
Create a concise but informative summary that helps understand:
- What this code section does
- Key functions, classes, or components
- Important features or capabilities
- How it relates to the overall repository purpose
- Any notable patterns or technologies used
Focus on information that would be useful for answering user questions about the repository.
Repository chunk:
{chunk}
Provide a clear, conversational summary in 2-3 paragraphs:"""
response = client.chat.completions.create(
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
messages=[
{"role": "system", "content": "You are an expert code analyst creating conversational summaries for a repository assistant chatbot."},
{"role": "user", "content": context_prompt}
],
max_tokens=600, # Increased for more detailed analysis with larger chunks
temperature=0.3
)
return response.choices[0].message.content
except Exception as e:
logger.error(f"Error analyzing chunk for context: {e}")
return f"Code section analysis unavailable: {e}"
def create_repo_context_summary(repo_content: str, repo_id: str) -> str:
"""
Create a comprehensive context summary by analyzing the repository in chunks.
Returns a detailed summary that the chatbot can use to answer questions.
"""
try:
lines = repo_content.split('\n')
chunk_size = 1200 # Increased for better context and fewer API calls
chunk_summaries = []
logger.info(f"Analyzing repository {repo_id} in chunks for chatbot context")
for i in range(0, len(lines), chunk_size):
chunk = '\n'.join(lines[i:i+chunk_size])
if chunk.strip(): # Only analyze non-empty chunks
summary = analyze_repo_chunk_for_context(chunk, repo_id)
chunk_summaries.append(f"=== Section {len(chunk_summaries) + 1} ===\n{summary}")
# Create final comprehensive summary
try:
from openai import OpenAI
client = OpenAI(api_key=os.getenv("modal_api"))
client.base_url = os.getenv("base_url")
final_prompt = f"""Based on the following section summaries of repository '{repo_id}', create a comprehensive overview that a chatbot can use to answer user questions.
Section Summaries:
{chr(10).join(chunk_summaries)}
Create a well-structured overview covering:
1. Repository Purpose & Main Functionality
2. Key Components & Architecture
3. Important Features & Capabilities
4. Technology Stack & Dependencies
5. Usage Patterns & Examples
Make this comprehensive but conversational - it will be used by a chatbot to answer user questions about the repository."""
response = client.chat.completions.create(
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
messages=[
{"role": "system", "content": "You are creating a comprehensive repository summary for a chatbot assistant."},
{"role": "user", "content": final_prompt}
],
max_tokens=1500, # Increased for more comprehensive summaries
temperature=0.3
)
final_summary = response.choices[0].message.content
# Combine everything for the chatbot context
full_context = f"""=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===
{final_summary}
=== DETAILED SECTION SUMMARIES ===
{chr(10).join(chunk_summaries)}"""
logger.info(f"Created comprehensive context summary for {repo_id}")
return full_context
except Exception as e:
logger.error(f"Error creating final summary: {e}")
# Fallback to just section summaries
return f"=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===\n\n" + '\n\n'.join(chunk_summaries)
except Exception as e:
logger.error(f"Error creating repo context summary: {e}")
return f"Repository analysis unavailable: {e}"
def create_repo_explorer_tab() -> Tuple[Dict[str, gr.components.Component], Dict[str, gr.State]]:
"""
Creates the Repo Explorer tab content and returns the component references and state variables.
"""
# State variables for repo explorer
states = {
"repo_context_summary": gr.State(""),
"current_repo_id": gr.State("")
}
gr.Markdown("### πŸ—‚οΈ Deep Dive into a Specific Repository")
with gr.Row():
with gr.Column(scale=2):
repo_explorer_input = gr.Textbox(
label="πŸ“ Repository ID",
placeholder="microsoft/DialoGPT-medium",
info="Enter a Hugging Face repository ID to explore"
)
with gr.Column(scale=1):
load_repo_btn = gr.Button("πŸš€ Load Repository", variant="primary", size="lg")
with gr.Row():
repo_status_display = gr.Textbox(
label="πŸ“Š Repository Status",
interactive=False,
lines=3,
info="Current repository loading status and basic info"
)
with gr.Row():
with gr.Column(scale=2):
repo_chatbot = gr.Chatbot(
label="πŸ€– Repository Assistant",
height=400,
type="messages",
avatar_images=(
"https://cdn-icons-png.flaticon.com/512/149/149071.png",
"https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png"
),
show_copy_button=True,
value=[{"role": "assistant", "content": "πŸ‘‹ Welcome to the Repository Explorer! \n\nπŸ” **How to get started:**\n1. Enter a Hugging Face repository ID above (e.g., 'microsoft/DialoGPT-medium')\n2. Click 'πŸš€ Load Repository' to download and analyze the repository\n3. Once loaded, I'll have comprehensive knowledge of all the files and can answer questions about:\n β€’ What the repository does\n β€’ How to install and use it\n β€’ Code structure and architecture\n β€’ Key features and capabilities\n β€’ Examples and usage patterns\n\nπŸ’‘ **Tip:** I analyze repositories in chunks to understand the entire codebase, not just a summary!\n\nPlease load a repository to begin exploring! πŸš€"}]
)
with gr.Row():
repo_msg_input = gr.Textbox(
label="πŸ’­ Ask about this repository",
placeholder="What does this repository do? How do I use it?",
lines=1,
scale=4,
info="Ask anything about the loaded repository"
)
repo_send_btn = gr.Button("πŸ“€ Send", variant="primary", scale=1)
# with gr.Column(scale=1):
# # Repository content preview
# repo_content_display = gr.Textbox(
# label="πŸ“„ Repository Content Preview",
# lines=20,
# show_copy_button=True,
# interactive=False,
# info="Overview of the loaded repository structure and content"
# )
# Component references
components = {
"repo_explorer_input": repo_explorer_input,
"load_repo_btn": load_repo_btn,
"repo_status_display": repo_status_display,
"repo_chatbot": repo_chatbot,
"repo_msg_input": repo_msg_input,
"repo_send_btn": repo_send_btn,
# "repo_content_display": repo_content_display
}
return components, states
def handle_load_repository(repo_id: str) -> Tuple[str, str]:
"""Load a specific repository and prepare it for exploration with chunk-based analysis."""
if not repo_id.strip():
return "Status: Please enter a repository ID.", ""
try:
logger.info(f"Loading repository for exploration: {repo_id}")
# Download and combine repository files
download_space_repo(repo_id, local_dir="repo_files")
txt_path = combine_repo_files_for_llm()
with open(txt_path, "r", encoding="utf-8") as f:
repo_content = f.read()
status = f"βœ… Repository '{repo_id}' loaded successfully!\nπŸ“ Files processed and ready for exploration.\nπŸ”„ Analyzing repository in chunks for comprehensive context...\nπŸ’¬ You can now ask questions about this repository."
# Create comprehensive context summary using chunk analysis
logger.info(f"Creating context summary for {repo_id}")
context_summary = create_repo_context_summary(repo_content, repo_id)
logger.info(f"Repository {repo_id} loaded and analyzed successfully for exploration")
return status, context_summary
except Exception as e:
logger.error(f"Error loading repository {repo_id}: {e}")
error_status = f"❌ Error loading repository: {e}"
return error_status, ""
def handle_repo_user_message(user_message: str, history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> Tuple[List[Dict[str, str]], str]:
"""Handle user messages in the repo-specific chatbot."""
if not repo_context_summary.strip():
return history, ""
# Initialize with repository-specific welcome message if empty
if not history:
welcome_msg = f"Hello! I'm your assistant for the '{repo_id}' repository. I have analyzed all the files and created a comprehensive understanding of this repository. I'm ready to answer any questions about its functionality, usage, architecture, and more. What would you like to know?"
history = [{"role": "assistant", "content": welcome_msg}]
if user_message:
history.append({"role": "user", "content": user_message})
return history, ""
def handle_repo_bot_response(history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> List[Dict[str, str]]:
"""Generate bot response for repo-specific questions using comprehensive context."""
if not history or history[-1]["role"] != "user" or not repo_context_summary.strip():
return history
user_message = history[-1]["content"]
# Create a specialized prompt using the comprehensive context summary
repo_system_prompt = f"""You are an expert assistant for the Hugging Face repository '{repo_id}'.
You have comprehensive knowledge about this repository based on detailed analysis of all its files and components.
Use the following comprehensive analysis to answer user questions accurately and helpfully:
{repo_context_summary}
Instructions:
- Answer questions clearly and conversationally about this specific repository
- Reference specific components, functions, or features when relevant
- Provide practical guidance on installation, usage, and implementation
- If asked about code details, refer to the analysis above
- Be helpful and informative while staying focused on this repository
- If something isn't covered in the analysis, acknowledge the limitation
Answer the user's question based on your comprehensive knowledge of this repository."""
try:
from openai import OpenAI
client = OpenAI(api_key=os.getenv("modal_api"))
client.base_url = os.getenv("base_url")
response = client.chat.completions.create(
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
messages=[
{"role": "system", "content": repo_system_prompt},
{"role": "user", "content": user_message}
],
max_tokens=1024,
temperature=0.7
)
bot_response = response.choices[0].message.content
history.append({"role": "assistant", "content": bot_response})
except Exception as e:
logger.error(f"Error generating repo bot response: {e}")
error_response = f"I apologize, but I encountered an error while processing your question: {e}"
history.append({"role": "assistant", "content": error_response})
return history
def setup_repo_explorer_events(components: Dict[str, gr.components.Component], states: Dict[str, gr.State]):
"""Setup event handlers for the repo explorer components."""
# Load repository event
components["load_repo_btn"].click(
fn=handle_load_repository,
inputs=[components["repo_explorer_input"]],
outputs=[components["repo_status_display"], states["repo_context_summary"]]
).then(
fn=lambda repo_id: repo_id,
inputs=[components["repo_explorer_input"]],
outputs=[states["current_repo_id"]]
)
# Chat message submission events
components["repo_msg_input"].submit(
fn=handle_repo_user_message,
inputs=[components["repo_msg_input"], components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]],
outputs=[components["repo_chatbot"], components["repo_msg_input"]]
).then(
fn=handle_repo_bot_response,
inputs=[components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]],
outputs=[components["repo_chatbot"]]
)
components["repo_send_btn"].click(
fn=handle_repo_user_message,
inputs=[components["repo_msg_input"], components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]],
outputs=[components["repo_chatbot"], components["repo_msg_input"]]
).then(
fn=handle_repo_bot_response,
inputs=[components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]],
outputs=[components["repo_chatbot"]]
)