HF_RepoSense

Sleeping

App Files Files Community

HF_RepoSense / repo_explorer.py

naman1102

new

adcb6a8 6 months ago

raw

history blame

14.9 kB

	import gradio as gr
	import os
	import logging
	from typing import List, Dict, Tuple
	from analyzer import combine_repo_files_for_llm
	from hf_utils import download_filtered_space_files

	# Setup logger
	logger = logging.getLogger(__name__)

	def analyze_repo_chunk_for_context(chunk: str, repo_id: str) -> str:
	"""
	Analyze a repository chunk to create conversational context for the chatbot.
	This creates summaries focused on helping users understand the repository.
	"""
	try:
	from openai import OpenAI
	client = OpenAI(api_key=os.getenv("modal_api"))
	client.base_url = os.getenv("base_url")

	context_prompt = f"""You are analyzing a chunk of code from the repository '{repo_id}' to create a conversational summary for a chatbot assistant.

	Create a concise but informative summary that helps understand:
	- What this code section does
	- Key functions, classes, or components
	- Important features or capabilities
	- How it relates to the overall repository purpose
	- Any notable patterns or technologies used

	Focus on information that would be useful for answering user questions about the repository.

	Repository chunk:
	{chunk}

	Provide a clear, conversational summary in 2-3 paragraphs:"""

	response = client.chat.completions.create(
	model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
	messages=[
	{"role": "system", "content": "You are an expert code analyst creating conversational summaries for a repository assistant chatbot."},
	{"role": "user", "content": context_prompt}
	],
	max_tokens=600, # Increased for more detailed analysis with larger chunks
	temperature=0.3
	)

	return response.choices[0].message.content

	except Exception as e:
	logger.error(f"Error analyzing chunk for context: {e}")
	return f"Code section analysis unavailable: {e}"

	def create_repo_context_summary(repo_content: str, repo_id: str) -> str:
	"""
	Create a comprehensive context summary by analyzing the repository in chunks.
	Returns a detailed summary that the chatbot can use to answer questions.
	"""
	try:
	lines = repo_content.split('\n')
	chunk_size = 1200 # Increased for better context and fewer API calls
	chunk_summaries = []

	logger.info(f"Analyzing repository {repo_id} in chunks for chatbot context")

	for i in range(0, len(lines), chunk_size):
	chunk = '\n'.join(lines[i:i+chunk_size])
	if chunk.strip(): # Only analyze non-empty chunks
	summary = analyze_repo_chunk_for_context(chunk, repo_id)
	chunk_summaries.append(f"=== Section {len(chunk_summaries) + 1} ===\n{summary}")

	# Create final comprehensive summary
	try:
	from openai import OpenAI
	client = OpenAI(api_key=os.getenv("modal_api"))
	client.base_url = os.getenv("base_url")

	final_prompt = f"""Based on the following section summaries of repository '{repo_id}', create a comprehensive overview that a chatbot can use to answer user questions.

	Section Summaries:
	{chr(10).join(chunk_summaries)}

	Create a well-structured overview covering:
	1. Repository Purpose & Main Functionality
	2. Key Components & Architecture
	3. Important Features & Capabilities
	4. Technology Stack & Dependencies
	5. Usage Patterns & Examples

	Make this comprehensive but conversational - it will be used by a chatbot to answer user questions about the repository."""

	response = client.chat.completions.create(
	model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
	messages=[
	{"role": "system", "content": "You are creating a comprehensive repository summary for a chatbot assistant."},
	{"role": "user", "content": final_prompt}
	],
	max_tokens=1500, # Increased for more comprehensive summaries
	temperature=0.3
	)

	final_summary = response.choices[0].message.content

	# Combine everything for the chatbot context
	full_context = f"""=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===

	{final_summary}

	=== DETAILED SECTION SUMMARIES ===
	{chr(10).join(chunk_summaries)}"""

	logger.info(f"Created comprehensive context summary for {repo_id}")
	return full_context

	except Exception as e:
	logger.error(f"Error creating final summary: {e}")
	# Fallback to just section summaries
	return f"=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===\n\n" + '\n\n'.join(chunk_summaries)

	except Exception as e:
	logger.error(f"Error creating repo context summary: {e}")
	return f"Repository analysis unavailable: {e}"

	def create_repo_explorer_tab() -> Tuple[Dict[str, gr.components.Component], Dict[str, gr.State]]:
	"""
	Creates the Repo Explorer tab content and returns the component references and state variables.
	"""

	# State variables for repo explorer
	states = {
	"repo_context_summary": gr.State(""),
	"current_repo_id": gr.State("")
	}

	gr.Markdown("### 🗂️ Deep Dive into a Specific Repository")

	with gr.Row():
	with gr.Column(scale=2):
	repo_explorer_input = gr.Textbox(
	label="📁 Repository ID",
	placeholder="microsoft/DialoGPT-medium",
	info="Enter a Hugging Face repository ID to explore"
	)
	with gr.Column(scale=1):
	load_repo_btn = gr.Button("🚀 Load Repository", variant="primary", size="lg")

	with gr.Row():
	repo_status_display = gr.Textbox(
	label="📊 Repository Status",
	interactive=False,
	lines=3,
	info="Current repository loading status and basic info"
	)

	with gr.Row():
	with gr.Column(scale=2):
	repo_chatbot = gr.Chatbot(
	label="🤖 Repository Assistant",
	height=400,
	type="messages",
	avatar_images=(
	"https://cdn-icons-png.flaticon.com/512/149/149071.png",
	"https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png"
	),
	show_copy_button=True,
	value=[{"role": "assistant", "content": "👋 Welcome to the Repository Explorer! \n\n🔍 How to get started:\n1. Enter a Hugging Face repository ID above (e.g., 'microsoft/DialoGPT-medium')\n2. Click '🚀 Load Repository' to download and analyze the repository\n3. Once loaded, I'll have comprehensive knowledge of all the files and can answer questions about:\n • What the repository does\n • How to install and use it\n • Code structure and architecture\n • Key features and capabilities\n • Examples and usage patterns\n\n💡 Tip: I analyze repositories in chunks to understand the entire codebase, not just a summary!\n\nPlease load a repository to begin exploring! 🚀"}]
	)

	with gr.Row():
	repo_msg_input = gr.Textbox(
	label="💭 Ask about this repository",
	placeholder="What does this repository do? How do I use it?",
	lines=1,
	scale=4,
	info="Ask anything about the loaded repository"
	)
	repo_send_btn = gr.Button("📤 Send", variant="primary", scale=1)

	# with gr.Column(scale=1):
	# # Repository content preview
	# repo_content_display = gr.Textbox(
	# label="📄 Repository Content Preview",
	# lines=20,
	# show_copy_button=True,
	# interactive=False,
	# info="Overview of the loaded repository structure and content"
	# )

	# Component references
	components = {
	"repo_explorer_input": repo_explorer_input,
	"load_repo_btn": load_repo_btn,
	"repo_status_display": repo_status_display,
	"repo_chatbot": repo_chatbot,
	"repo_msg_input": repo_msg_input,
	"repo_send_btn": repo_send_btn,
	# "repo_content_display": repo_content_display
	}

	return components, states

	def handle_load_repository(repo_id: str) -> Tuple[str, str]:
	"""Load a specific repository and prepare it for exploration with chunk-based analysis."""
	if not repo_id.strip():
	return "Status: Please enter a repository ID.", ""

	try:
	logger.info(f"Loading repository for exploration: {repo_id}")

	# Download and process the repository
	try:
	download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
	combined_text_path = combine_repo_files_for_llm()

	except Exception as e:
	logger.error(f"Error downloading repository {repo_id}: {e}")
	error_status = f"❌ Error downloading repository: {e}"
	return error_status, ""

	with open(combined_text_path, "r", encoding="utf-8") as f:
	repo_content = f.read()

	status = f"✅ Repository '{repo_id}' loaded successfully!\n📁 Files processed and ready for exploration.\n🔄 Analyzing repository in chunks for comprehensive context...\n💬 You can now ask questions about this repository."

	# Create comprehensive context summary using chunk analysis
	logger.info(f"Creating context summary for {repo_id}")
	context_summary = create_repo_context_summary(repo_content, repo_id)

	logger.info(f"Repository {repo_id} loaded and analyzed successfully for exploration")
	return status, context_summary

	except Exception as e:
	logger.error(f"Error loading repository {repo_id}: {e}")
	error_status = f"❌ Error loading repository: {e}"
	return error_status, ""

	def handle_repo_user_message(user_message: str, history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> Tuple[List[Dict[str, str]], str]:
	"""Handle user messages in the repo-specific chatbot."""
	if not repo_context_summary.strip():
	return history, ""

	# Initialize with repository-specific welcome message if empty
	if not history:
	welcome_msg = f"Hello! I'm your assistant for the '{repo_id}' repository. I have analyzed all the files and created a comprehensive understanding of this repository. I'm ready to answer any questions about its functionality, usage, architecture, and more. What would you like to know?"
	history = [{"role": "assistant", "content": welcome_msg}]

	if user_message:
	history.append({"role": "user", "content": user_message})
	return history, ""

	def handle_repo_bot_response(history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> List[Dict[str, str]]:
	"""Generate bot response for repo-specific questions using comprehensive context."""
	if not history or history[-1]["role"] != "user" or not repo_context_summary.strip():
	return history

	user_message = history[-1]["content"]

	# Create a specialized prompt using the comprehensive context summary
	repo_system_prompt = f"""You are an expert assistant for the Hugging Face repository '{repo_id}'.
	You have comprehensive knowledge about this repository based on detailed analysis of all its files and components.

	Use the following comprehensive analysis to answer user questions accurately and helpfully:

	{repo_context_summary}

	Instructions:
	- Answer questions clearly and conversationally about this specific repository
	- Reference specific components, functions, or features when relevant
	- Provide practical guidance on installation, usage, and implementation
	- If asked about code details, refer to the analysis above
	- Be helpful and informative while staying focused on this repository
	- If something isn't covered in the analysis, acknowledge the limitation

	Answer the user's question based on your comprehensive knowledge of this repository."""

	try:
	from openai import OpenAI
	client = OpenAI(api_key=os.getenv("modal_api"))
	client.base_url = os.getenv("base_url")

	response = client.chat.completions.create(
	model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
	messages=[
	{"role": "system", "content": repo_system_prompt},
	{"role": "user", "content": user_message}
	],
	max_tokens=1024,
	temperature=0.7
	)

	bot_response = response.choices[0].message.content
	history.append({"role": "assistant", "content": bot_response})

	except Exception as e:
	logger.error(f"Error generating repo bot response: {e}")
	error_response = f"I apologize, but I encountered an error while processing your question: {e}"
	history.append({"role": "assistant", "content": error_response})

	return history

	def setup_repo_explorer_events(components: Dict[str, gr.components.Component], states: Dict[str, gr.State]):
	"""Setup event handlers for the repo explorer components."""

	# Load repository event
	components["load_repo_btn"].click(
	fn=handle_load_repository,
	inputs=[components["repo_explorer_input"]],
	outputs=[components["repo_status_display"], states["repo_context_summary"]]
	).then(
	fn=lambda repo_id: repo_id,
	inputs=[components["repo_explorer_input"]],
	outputs=[states["current_repo_id"]]
	)

	# Chat message submission events
	components["repo_msg_input"].submit(
	fn=handle_repo_user_message,
	inputs=[components["repo_msg_input"], components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]],
	outputs=[components["repo_chatbot"], components["repo_msg_input"]]
	).then(
	fn=handle_repo_bot_response,
	inputs=[components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]],
	outputs=[components["repo_chatbot"]]
	)

	components["repo_send_btn"].click(
	fn=handle_repo_user_message,
	inputs=[components["repo_msg_input"], components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]],
	outputs=[components["repo_chatbot"], components["repo_msg_input"]]
	).then(
	fn=handle_repo_bot_response,
	inputs=[components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]],
	outputs=[components["repo_chatbot"]]
	)