|
import gradio as gr |
|
import os |
|
import logging |
|
from typing import List, Dict, Tuple |
|
from analyzer import combine_repo_files_for_llm |
|
from hf_utils import download_space_repo |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
def analyze_repo_chunk_for_context(chunk: str, repo_id: str) -> str: |
|
""" |
|
Analyze a repository chunk to create conversational context for the chatbot. |
|
This creates summaries focused on helping users understand the repository. |
|
""" |
|
try: |
|
from openai import OpenAI |
|
client = OpenAI(api_key=os.getenv("modal_api")) |
|
client.base_url = os.getenv("base_url") |
|
|
|
context_prompt = f"""You are analyzing a chunk of code from the repository '{repo_id}' to create a conversational summary for a chatbot assistant. |
|
|
|
Create a concise but informative summary that helps understand: |
|
- What this code section does |
|
- Key functions, classes, or components |
|
- Important features or capabilities |
|
- How it relates to the overall repository purpose |
|
- Any notable patterns or technologies used |
|
|
|
Focus on information that would be useful for answering user questions about the repository. |
|
|
|
Repository chunk: |
|
{chunk} |
|
|
|
Provide a clear, conversational summary in 2-3 paragraphs:""" |
|
|
|
response = client.chat.completions.create( |
|
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", |
|
messages=[ |
|
{"role": "system", "content": "You are an expert code analyst creating conversational summaries for a repository assistant chatbot."}, |
|
{"role": "user", "content": context_prompt} |
|
], |
|
max_tokens=600, |
|
temperature=0.3 |
|
) |
|
|
|
return response.choices[0].message.content |
|
|
|
except Exception as e: |
|
logger.error(f"Error analyzing chunk for context: {e}") |
|
return f"Code section analysis unavailable: {e}" |
|
|
|
def create_repo_context_summary(repo_content: str, repo_id: str) -> str: |
|
""" |
|
Create a comprehensive context summary by analyzing the repository in chunks. |
|
Returns a detailed summary that the chatbot can use to answer questions. |
|
""" |
|
try: |
|
lines = repo_content.split('\n') |
|
chunk_size = 1200 |
|
chunk_summaries = [] |
|
|
|
logger.info(f"Analyzing repository {repo_id} in chunks for chatbot context") |
|
|
|
for i in range(0, len(lines), chunk_size): |
|
chunk = '\n'.join(lines[i:i+chunk_size]) |
|
if chunk.strip(): |
|
summary = analyze_repo_chunk_for_context(chunk, repo_id) |
|
chunk_summaries.append(f"=== Section {len(chunk_summaries) + 1} ===\n{summary}") |
|
|
|
|
|
try: |
|
from openai import OpenAI |
|
client = OpenAI(api_key=os.getenv("modal_api")) |
|
client.base_url = os.getenv("base_url") |
|
|
|
final_prompt = f"""Based on the following section summaries of repository '{repo_id}', create a comprehensive overview that a chatbot can use to answer user questions. |
|
|
|
Section Summaries: |
|
{chr(10).join(chunk_summaries)} |
|
|
|
Create a well-structured overview covering: |
|
1. Repository Purpose & Main Functionality |
|
2. Key Components & Architecture |
|
3. Important Features & Capabilities |
|
4. Technology Stack & Dependencies |
|
5. Usage Patterns & Examples |
|
|
|
Make this comprehensive but conversational - it will be used by a chatbot to answer user questions about the repository.""" |
|
|
|
response = client.chat.completions.create( |
|
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", |
|
messages=[ |
|
{"role": "system", "content": "You are creating a comprehensive repository summary for a chatbot assistant."}, |
|
{"role": "user", "content": final_prompt} |
|
], |
|
max_tokens=1500, |
|
temperature=0.3 |
|
) |
|
|
|
final_summary = response.choices[0].message.content |
|
|
|
|
|
full_context = f"""=== REPOSITORY ANALYSIS FOR {repo_id.upper()} === |
|
|
|
{final_summary} |
|
|
|
=== DETAILED SECTION SUMMARIES === |
|
{chr(10).join(chunk_summaries)}""" |
|
|
|
logger.info(f"Created comprehensive context summary for {repo_id}") |
|
return full_context |
|
|
|
except Exception as e: |
|
logger.error(f"Error creating final summary: {e}") |
|
|
|
return f"=== REPOSITORY ANALYSIS FOR {repo_id.upper()} ===\n\n" + '\n\n'.join(chunk_summaries) |
|
|
|
except Exception as e: |
|
logger.error(f"Error creating repo context summary: {e}") |
|
return f"Repository analysis unavailable: {e}" |
|
|
|
def create_repo_explorer_tab() -> Tuple[Dict[str, gr.components.Component], Dict[str, gr.State]]: |
|
""" |
|
Creates the Repo Explorer tab content and returns the component references and state variables. |
|
""" |
|
|
|
|
|
states = { |
|
"repo_context_summary": gr.State(""), |
|
"current_repo_id": gr.State("") |
|
} |
|
|
|
gr.Markdown("### ποΈ Deep Dive into a Specific Repository") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
repo_explorer_input = gr.Textbox( |
|
label="π Repository ID", |
|
placeholder="microsoft/DialoGPT-medium", |
|
info="Enter a Hugging Face repository ID to explore" |
|
) |
|
with gr.Column(scale=1): |
|
load_repo_btn = gr.Button("π Load Repository", variant="primary", size="lg") |
|
|
|
with gr.Row(): |
|
repo_status_display = gr.Textbox( |
|
label="π Repository Status", |
|
interactive=False, |
|
lines=3, |
|
info="Current repository loading status and basic info" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
repo_chatbot = gr.Chatbot( |
|
label="π€ Repository Assistant", |
|
height=400, |
|
type="messages", |
|
avatar_images=( |
|
"https://cdn-icons-png.flaticon.com/512/149/149071.png", |
|
"https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png" |
|
), |
|
show_copy_button=True, |
|
value=[{"role": "assistant", "content": "π Welcome to the Repository Explorer! \n\nπ **How to get started:**\n1. Enter a Hugging Face repository ID above (e.g., 'microsoft/DialoGPT-medium')\n2. Click 'π Load Repository' to download and analyze the repository\n3. Once loaded, I'll have comprehensive knowledge of all the files and can answer questions about:\n β’ What the repository does\n β’ How to install and use it\n β’ Code structure and architecture\n β’ Key features and capabilities\n β’ Examples and usage patterns\n\nπ‘ **Tip:** I analyze repositories in chunks to understand the entire codebase, not just a summary!\n\nPlease load a repository to begin exploring! π"}] |
|
) |
|
|
|
with gr.Row(): |
|
repo_msg_input = gr.Textbox( |
|
label="π Ask about this repository", |
|
placeholder="What does this repository do? How do I use it?", |
|
lines=1, |
|
scale=4, |
|
info="Ask anything about the loaded repository" |
|
) |
|
repo_send_btn = gr.Button("π€ Send", variant="primary", scale=1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
components = { |
|
"repo_explorer_input": repo_explorer_input, |
|
"load_repo_btn": load_repo_btn, |
|
"repo_status_display": repo_status_display, |
|
"repo_chatbot": repo_chatbot, |
|
"repo_msg_input": repo_msg_input, |
|
"repo_send_btn": repo_send_btn, |
|
|
|
} |
|
|
|
return components, states |
|
|
|
def handle_load_repository(repo_id: str) -> Tuple[str, str]: |
|
"""Load a specific repository and prepare it for exploration with chunk-based analysis.""" |
|
if not repo_id.strip(): |
|
return "Status: Please enter a repository ID.", "" |
|
|
|
try: |
|
logger.info(f"Loading repository for exploration: {repo_id}") |
|
|
|
|
|
download_space_repo(repo_id, local_dir="repo_files") |
|
txt_path = combine_repo_files_for_llm() |
|
|
|
with open(txt_path, "r", encoding="utf-8") as f: |
|
repo_content = f.read() |
|
|
|
status = f"β
Repository '{repo_id}' loaded successfully!\nπ Files processed and ready for exploration.\nπ Analyzing repository in chunks for comprehensive context...\nπ¬ You can now ask questions about this repository." |
|
|
|
|
|
logger.info(f"Creating context summary for {repo_id}") |
|
context_summary = create_repo_context_summary(repo_content, repo_id) |
|
|
|
logger.info(f"Repository {repo_id} loaded and analyzed successfully for exploration") |
|
return status, context_summary |
|
|
|
except Exception as e: |
|
logger.error(f"Error loading repository {repo_id}: {e}") |
|
error_status = f"β Error loading repository: {e}" |
|
return error_status, "" |
|
|
|
def handle_repo_user_message(user_message: str, history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> Tuple[List[Dict[str, str]], str]: |
|
"""Handle user messages in the repo-specific chatbot.""" |
|
if not repo_context_summary.strip(): |
|
return history, "" |
|
|
|
|
|
if not history: |
|
welcome_msg = f"Hello! I'm your assistant for the '{repo_id}' repository. I have analyzed all the files and created a comprehensive understanding of this repository. I'm ready to answer any questions about its functionality, usage, architecture, and more. What would you like to know?" |
|
history = [{"role": "assistant", "content": welcome_msg}] |
|
|
|
if user_message: |
|
history.append({"role": "user", "content": user_message}) |
|
return history, "" |
|
|
|
def handle_repo_bot_response(history: List[Dict[str, str]], repo_context_summary: str, repo_id: str) -> List[Dict[str, str]]: |
|
"""Generate bot response for repo-specific questions using comprehensive context.""" |
|
if not history or history[-1]["role"] != "user" or not repo_context_summary.strip(): |
|
return history |
|
|
|
user_message = history[-1]["content"] |
|
|
|
|
|
repo_system_prompt = f"""You are an expert assistant for the Hugging Face repository '{repo_id}'. |
|
You have comprehensive knowledge about this repository based on detailed analysis of all its files and components. |
|
|
|
Use the following comprehensive analysis to answer user questions accurately and helpfully: |
|
|
|
{repo_context_summary} |
|
|
|
Instructions: |
|
- Answer questions clearly and conversationally about this specific repository |
|
- Reference specific components, functions, or features when relevant |
|
- Provide practical guidance on installation, usage, and implementation |
|
- If asked about code details, refer to the analysis above |
|
- Be helpful and informative while staying focused on this repository |
|
- If something isn't covered in the analysis, acknowledge the limitation |
|
|
|
Answer the user's question based on your comprehensive knowledge of this repository.""" |
|
|
|
try: |
|
from openai import OpenAI |
|
client = OpenAI(api_key=os.getenv("modal_api")) |
|
client.base_url = os.getenv("base_url") |
|
|
|
response = client.chat.completions.create( |
|
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", |
|
messages=[ |
|
{"role": "system", "content": repo_system_prompt}, |
|
{"role": "user", "content": user_message} |
|
], |
|
max_tokens=1024, |
|
temperature=0.7 |
|
) |
|
|
|
bot_response = response.choices[0].message.content |
|
history.append({"role": "assistant", "content": bot_response}) |
|
|
|
except Exception as e: |
|
logger.error(f"Error generating repo bot response: {e}") |
|
error_response = f"I apologize, but I encountered an error while processing your question: {e}" |
|
history.append({"role": "assistant", "content": error_response}) |
|
|
|
return history |
|
|
|
def setup_repo_explorer_events(components: Dict[str, gr.components.Component], states: Dict[str, gr.State]): |
|
"""Setup event handlers for the repo explorer components.""" |
|
|
|
|
|
components["load_repo_btn"].click( |
|
fn=handle_load_repository, |
|
inputs=[components["repo_explorer_input"]], |
|
outputs=[components["repo_status_display"], states["repo_context_summary"]] |
|
).then( |
|
fn=lambda repo_id: repo_id, |
|
inputs=[components["repo_explorer_input"]], |
|
outputs=[states["current_repo_id"]] |
|
) |
|
|
|
|
|
components["repo_msg_input"].submit( |
|
fn=handle_repo_user_message, |
|
inputs=[components["repo_msg_input"], components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]], |
|
outputs=[components["repo_chatbot"], components["repo_msg_input"]] |
|
).then( |
|
fn=handle_repo_bot_response, |
|
inputs=[components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]], |
|
outputs=[components["repo_chatbot"]] |
|
) |
|
|
|
components["repo_send_btn"].click( |
|
fn=handle_repo_user_message, |
|
inputs=[components["repo_msg_input"], components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]], |
|
outputs=[components["repo_chatbot"], components["repo_msg_input"]] |
|
).then( |
|
fn=handle_repo_bot_response, |
|
inputs=[components["repo_chatbot"], states["repo_context_summary"], states["current_repo_id"]], |
|
outputs=[components["repo_chatbot"]] |
|
) |