Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
import logging | |
from typing import List, Dict, Tuple | |
from analyzer import combine_repo_files_for_llm | |
from hf_utils import download_filtered_space_files | |
# Setup logger | |
logger = logging.getLogger(__name__) | |
def analyze_repo_chunk_for_context(chunk: str, repo_id: str) -> str: | |
""" | |
Analyze a repository chunk to create conversational context for the chatbot. | |
This creates summaries focused on helping users understand the repository. | |
""" | |
try: | |
from openai import OpenAI | |
client = OpenAI(api_key=os.getenv("modal_api")) | |
client.base_url = os.getenv("base_url") | |
context_prompt = f"""You are analyzing a chunk of code from the repository '{repo_id}' to create a conversational summary for a chatbot assistant. | |
Create a concise but informative summary that helps understand: | |
- What this code section does | |
- Key functions, classes, or components | |
- Important features or capabilities | |
- How it relates to the overall repository purpose | |
- Any notable patterns or technologies used | |
Focus on information that would be useful for answering user questions about the repository. | |
Repository chunk: | |
{chunk} | |
Provide a clear, conversational summary in 2-3 paragraphs:""" | |
response = client.chat.completions.create( | |
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", | |
messages=[ | |
{"role": "system", "content": "You are an expert code analyst creating conversational summaries for a repository assistant chatbot."}, | |
{"role": "user", "content": context_prompt} | |
], | |
max_tokens=600, # Increased for more detailed analysis with larger chunks | |
temperature=0.3 | |
) | |
return response.choices[0].message.content | |
except Exception as e: | |
logger.error(f"Error analyzing chunk for context: {e}") | |
return f"Code section analysis unavailable: {e}" | |
def create_repo_context_summary(repo_content: str, repo_id: str) -> str: | |
""" | |
Create a comprehensive context summary by analyzing the repository in chunks. | |
Returns a detailed summary that the chatbot can use to answer questions. | |
""" | |
try: | |
lines = repo_content.split('\n') | |
chunk_size = 1200 # Increased for better context and fewer API calls | |
chunk_summaries = [] | |
logger.info(f"Analyzing repository {repo_id} in chunks for chatbot context") | |
for i in range(0, len(lines), chunk_size): | |
chunk = '\n'.join(lines[i:i+chunk_size]) | |
if chunk.strip(): # Only analyze non-empty chunks | |
summary = analyze_repo_chunk_for_context(chunk, repo_id) | |
chunk_summaries.append(f"=== Section {len(chunk_summaries) + 1} ===\n{summary}") | |
# Create final comprehensive summary | |
try: | |
from openai import OpenAI | |
client = OpenAI(api_key=os.getenv("modal_api")) | |
client.base_url = os.getenv("base_url") | |
final_prompt = f"""Based on the following section summaries of repository '{repo_id}', create a comprehensive overview that a chatbot can use to answer user questions. | |
Section Summaries: | |
{chr(10).join(chunk_summaries)} | |
Create a well-structured overview covering: | |
1. Repository Purpose & Main Functionality | |
2. Key Components & Architecture | |
3. Important Features & Capabilities | |
4. Technology Stack & Dependencies | |
5. Usage Patterns & Examples | |
Make this comprehensive but conversational - it will be used by a chatbot to answer user questions about the repository.""" | |
response = client.chat.completions.create( | |
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", | |
messages=[ | |
{"role": "system", "content": "You are creating a comprehensive repository summary for a chatbot assistant."}, | |
{"role": "user", "content": final_prompt} | |
], | |
max_tokens=1500, # Increased for more comprehensive summaries | |
temperature=0.3 | |
) | |
final_summary = response.choices[0].message.content | |
# Combine everything for the chatbot context | |
full_context = f"""=== REPOSITORY ANALYSIS FOR {repo_id.upper()} === | |
{final_summary} | |
=== DETAILED SECTION SUMMARIES === | |
{chr(10).join(chunk_summaries)}""" | |
logger.info(f"Created comprehensive context summary for {repo_id}") | |
return full_context | |
except Exception as e: | |
logger.error(f"Error creating final summary: {e}") | |
# Fallback to just section summaries | |
return f"=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===\n\n" + '\n\n'.join(chunk_summaries) | |
except Exception as e: | |
logger.error(f"Error creating repo context summary: {e}") | |
return f"Repository analysis unavailable: {e}" | |
def create_repo_explorer_tab() -> Tuple[Dict[str, gr.components.Component], Dict[str, gr.State]]: | |
""" | |
Creates the Repo Explorer tab content and returns the component references and state variables. | |
""" | |
# State variables for repo explorer | |
states = { | |
"repo_context_summary": gr.State(""), | |
"current_repo_id": gr.State("") | |
} | |
gr.Markdown("### 🗂️ Deep Dive into a Specific Repository") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
repo_explorer_input = gr.Textbox( | |
label="📁 Repository ID", | |
placeholder="microsoft/DialoGPT-medium", | |
info="Enter a Hugging Face repository ID to explore" | |
) | |
with gr.Column(scale=1): | |
load_repo_btn = gr.Button("🚀 Load Repository", variant="primary", size="lg") | |
with gr.Row(): | |
repo_status_display = gr.Textbox( | |
label="📊 Repository Status", | |
interactive=False, | |
lines=3, | |
info="Current repository loading status and basic info" | |
) | |
with gr.Row(): | |
with gr.Column(scale=2): | |
repo_chatbot = gr.Chatbot( | |
label="🤖 Repository Assistant", | |
height=400, | |
type="messages", | |
avatar_images=( | |
"https://cdn-icons-png.flaticon.com/512/149/149071.png", | |
"https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png" | |
), | |
show_copy_button=True, | |
value=[{"role": "assistant", "content": "👋 Welcome to the Repository Explorer! \n\n🔍 **How to get started:**\n1. Enter a Hugging Face repository ID above (e.g., 'microsoft/DialoGPT-medium')\n2. Click '🚀 Load Repository' to download and analyze the repository\n3. Once loaded, I'll have comprehensive knowledge of all the files and can answer questions about:\n • What the repository does\n • How to install and use it\n • Code structure and architecture\n • Key features and capabilities\n • Examples and usage patterns\n\n💡 **Tip:** I analyze repositories in chunks to understand the entire codebase, not just a summary!\n\nPlease load a repository to begin exploring! 🚀"}] | |
) | |
with gr.Row(): | |
repo_msg_input = gr.Textbox( | |
label="💭 Ask about this repository", | |
placeholder="What does this repository do? How do I use it?", | |
lines=1, | |
scale=4, | |
info="Ask anything about the loaded repository" | |
) | |
repo_send_btn = gr.Button("📤 Send", variant="primary", scale=1) | |
# with gr.Column(scale=1): | |
# # Repository content preview | |
# repo_content_display = gr.Textbox( | |
# label="📄 Repository Content Preview", | |
# lines=20, | |
# show_copy_button=True, | |
# interactive=False, | |
# info="Overview of the loaded repository structure and content" | |
# ) | |
# Component references | |
components = { | |
"repo_explorer_input": repo_explorer_input, | |
"load_repo_btn": load_repo_btn, | |
"repo_status_display": repo_status_display, | |
"repo_chatbot": repo_chatbot, | |
"repo_msg_input": repo_msg_input, | |
"repo_send_btn": repo_send_btn, | |
# "repo_content_display": repo_content_display | |
} | |
return components, states | |
def handle_load_repository(repo_id: str) -> Tuple[str, str]: | |
"""Load a specific repository and prepare it for exploration with chunk-based analysis.""" | |
if not repo_id.strip(): | |
return "Status: Please enter a repository ID.", "" | |
try: | |
logger.info(f"Loading repository for exploration: {repo_id}") | |
# Download and process the repository | |
try: | |
download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt']) | |
combined_text_path = combine_repo_files_for_llm() | |
except Exception as e: | |
logger.error(f"Error downloading repository {repo_id}: {e}") | |
error_status = f"❌ Error downloading repository: {e}" | |
return error_status, "" | |
with open(combined_text_path, "r", encoding="utf-8") as f: | |
repo_content = f.read() | |
status = f"✅ Repository '{repo_id}' loaded successfully!\n📁 Files processed and ready for exploration.\n🔄 Analyzing repository in chunks for comprehensive context...\n💬 You can now ask questions about this repository." | |
# Create comprehensive context summary using chunk analysis | |
logger.info(f"Creating context summary for {repo_id}") | |
context_summary = create_repo_context_summary(repo_content, repo_id) | |
logger.info(f"Repository {repo_id} loaded and analyzed successfully for exploration") | |
return status, context_summary | |
except Exception as e: | |
logger.error(f"Error loading repository {repo_id}: {e}") | |
error_status = f"❌ Error loading repository: {e}" | |
return error_status, "" | |
def handle_repo_user_message(user_message: str, history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> Tuple[List[Dict[str, str]], str]: | |
"""Handle user messages in the repo-specific chatbot.""" | |
if not repo_context_summary.strip(): | |
return history, "" | |
# Initialize with repository-specific welcome message if empty | |
if not history: | |
welcome_msg = f"Hello! I'm your assistant for the '{repo_id}' repository. I have analyzed all the files and created a comprehensive understanding of this repository. I'm ready to answer any questions about its functionality, usage, architecture, and more. What would you like to know?" | |
history = [{"role": "assistant", "content": welcome_msg}] | |
if user_message: | |
history.append({"role": "user", "content": user_message}) | |
return history, "" | |
def handle_repo_bot_response(history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> List[Dict[str, str]]: | |
"""Generate bot response for repo-specific questions using comprehensive context.""" | |
if not history or history[-1]["role"] != "user" or not repo_context_summary.strip(): | |
return history | |
user_message = history[-1]["content"] | |
# Create a specialized prompt using the comprehensive context summary | |
repo_system_prompt = f"""You are an expert assistant for the Hugging Face repository '{repo_id}'. | |
You have comprehensive knowledge about this repository based on detailed analysis of all its files and components. | |
Use the following comprehensive analysis to answer user questions accurately and helpfully: | |
{repo_context_summary} | |
Instructions: | |
- Answer questions clearly and conversationally about this specific repository | |
- Reference specific components, functions, or features when relevant | |
- Provide practical guidance on installation, usage, and implementation | |
- If asked about code details, refer to the analysis above | |
- Be helpful and informative while staying focused on this repository | |
- If something isn't covered in the analysis, acknowledge the limitation | |
Answer the user's question based on your comprehensive knowledge of this repository.""" | |
try: | |
from openai import OpenAI | |
client = OpenAI(api_key=os.getenv("modal_api")) | |
client.base_url = os.getenv("base_url") | |
response = client.chat.completions.create( | |
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", | |
messages=[ | |
{"role": "system", "content": repo_system_prompt}, | |
{"role": "user", "content": user_message} | |
], | |
max_tokens=1024, | |
temperature=0.7 | |
) | |
bot_response = response.choices[0].message.content | |
history.append({"role": "assistant", "content": bot_response}) | |
except Exception as e: | |
logger.error(f"Error generating repo bot response: {e}") | |
error_response = f"I apologize, but I encountered an error while processing your question: {e}" | |
history.append({"role": "assistant", "content": error_response}) | |
return history | |
def setup_repo_explorer_events(components: Dict[str, gr.components.Component], states: Dict[str, gr.State]): | |
"""Setup event handlers for the repo explorer components.""" | |
# Load repository event | |
components["load_repo_btn"].click( | |
fn=handle_load_repository, | |
inputs=[components["repo_explorer_input"]], | |
outputs=[components["repo_status_display"], states["repo_context_summary"]] | |
).then( | |
fn=lambda repo_id: repo_id, | |
inputs=[components["repo_explorer_input"]], | |
outputs=[states["current_repo_id"]] | |
) | |
# Chat message submission events | |
components["repo_msg_input"].submit( | |
fn=handle_repo_user_message, | |
inputs=[components["repo_msg_input"], components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]], | |
outputs=[components["repo_chatbot"], components["repo_msg_input"]] | |
).then( | |
fn=handle_repo_bot_response, | |
inputs=[components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]], | |
outputs=[components["repo_chatbot"]] | |
) | |
components["repo_send_btn"].click( | |
fn=handle_repo_user_message, | |
inputs=[components["repo_msg_input"], components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]], | |
outputs=[components["repo_chatbot"], components["repo_msg_input"]] | |
).then( | |
fn=handle_repo_bot_response, | |
inputs=[components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]], | |
outputs=[components["repo_chatbot"]] | |
) |