|
import gradio as gr |
|
import regex as re |
|
import csv |
|
import pandas as pd |
|
from typing import List, Dict, Tuple, Any |
|
import logging |
|
import os |
|
import time |
|
|
|
|
|
from analyzer import ( |
|
combine_repo_files_for_llm, |
|
parse_llm_json_response, |
|
analyze_combined_file, |
|
handle_load_repository |
|
) |
|
from repo_explorer import handle_load_repository_with_vectorization |
|
from hf_utils import download_filtered_space_files, search_top_spaces |
|
from chatbot_page import chat_with_user, extract_keywords_from_conversation |
|
from repo_explorer import create_repo_explorer_tab, setup_repo_explorer_events, initialize_repo_chatbot |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
logger = logging.getLogger(__name__) |
|
|
|
CSV_FILE = "repo_ids.csv" |
|
CHATBOT_SYSTEM_PROMPT = ( |
|
"You are a helpful assistant whose ONLY job is to gather information about the user's ideal repository requirements. " |
|
"DO NOT suggest any specific repositories or give repository recommendations. " |
|
"Your role is to ask clarifying questions to understand exactly what the user is looking for. " |
|
"Ask about their use case, preferred programming language, specific features needed, project type, etc. " |
|
"When you feel you have gathered enough detailed information about their requirements, " |
|
"tell the user: 'I think I have enough information about your requirements. I'll now search for relevant repositories automatically.' " |
|
"Focus on understanding their needs, not providing solutions." |
|
) |
|
CHATBOT_INITIAL_MESSAGE = "Hello! I'm here to help you find the perfect Hugging Face repository. Tell me about your project - what are you trying to build? I'll ask some questions to understand your needs and then automatically find relevant repositories for you." |
|
|
|
|
|
|
|
def is_repo_id_format(text: str) -> bool: |
|
"""Check if text looks like repository IDs (contains forward slashes).""" |
|
lines = [line.strip() for line in re.split(r'[\n,]+', text) if line.strip()] |
|
if not lines: |
|
return False |
|
|
|
|
|
slash_count = sum(1 for line in lines if '/' in line) |
|
return slash_count >= len(lines) * 0.5 |
|
|
|
def should_auto_extract_keywords(history: List[Dict[str, str]]) -> bool: |
|
"""Determine if we should automatically extract keywords from conversation.""" |
|
if not history or len(history) < 4: |
|
return False |
|
|
|
|
|
last_assistant_msg = "" |
|
for msg in reversed(history): |
|
if msg.get('role') == 'assistant': |
|
last_assistant_msg = msg.get('content', '').lower() |
|
break |
|
|
|
|
|
ready_phrases = [ |
|
"enough information", |
|
"search for repositories", |
|
"find repositories", |
|
"look for repositories", |
|
"automatically", |
|
"ready to search" |
|
] |
|
|
|
return any(phrase in last_assistant_msg for phrase in ready_phrases) |
|
|
|
def get_top_relevant_repos(df: pd.DataFrame, user_requirements: str, top_n: int = 3) -> pd.DataFrame: |
|
""" |
|
Uses LLM to select the top 3 most relevant repositories based on user requirements and analysis data. |
|
""" |
|
try: |
|
if df.empty: |
|
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"]) |
|
|
|
|
|
analyzed_df = df.copy() |
|
analyzed_df = analyzed_df[ |
|
(analyzed_df['strength'].str.strip() != '') | |
|
(analyzed_df['weaknesses'].str.strip() != '') | |
|
(analyzed_df['speciality'].str.strip() != '') | |
|
(analyzed_df['relevance rating'].str.strip() != '') |
|
] |
|
|
|
if analyzed_df.empty: |
|
logger.warning("No analyzed repositories found for LLM selection") |
|
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"]) |
|
|
|
|
|
csv_data = "" |
|
for idx, row in analyzed_df.iterrows(): |
|
csv_data += f"Repository: {row['repo id']}\n" |
|
csv_data += f"Strengths: {row['strength']}\n" |
|
csv_data += f"Weaknesses: {row['weaknesses']}\n" |
|
csv_data += f"Speciality: {row['speciality']}\n" |
|
csv_data += f"Relevance: {row['relevance rating']}\n\n" |
|
|
|
user_context = user_requirements if user_requirements.strip() else "General repository recommendation" |
|
|
|
prompt = f"""Based on the user's requirements and the analysis of repositories below, select the top {top_n} most relevant repositories. |
|
|
|
User Requirements: |
|
{user_context} |
|
|
|
Repository Analysis Data: |
|
{csv_data} |
|
|
|
Please analyze all repositories and select the {top_n} most relevant ones based on: |
|
1. How well they match the user's specific requirements |
|
2. Their strengths and capabilities |
|
3. Their relevance rating |
|
4. Their speciality alignment with user needs |
|
|
|
Return ONLY a JSON list of the repository IDs in order of relevance (most relevant first). Example format: |
|
["repo1", "repo2", "repo3"] |
|
|
|
Selected repositories:""" |
|
|
|
try: |
|
from openai import OpenAI |
|
client = OpenAI(api_key=os.getenv("modal_api")) |
|
client.base_url = os.getenv("base_url") |
|
|
|
response = client.chat.completions.create( |
|
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", |
|
messages=[ |
|
{"role": "system", "content": "You are an expert at analyzing and ranking repositories based on user requirements. Always return valid JSON."}, |
|
{"role": "user", "content": prompt} |
|
], |
|
max_tokens=200, |
|
temperature=0.3 |
|
) |
|
|
|
llm_response = response.choices[0].message.content.strip() |
|
logger.info(f"LLM response for top repos: {llm_response}") |
|
|
|
|
|
import json |
|
import re |
|
|
|
|
|
json_match = re.search(r'\[.*\]', llm_response) |
|
if json_match: |
|
selected_repos = json.loads(json_match.group()) |
|
logger.info(f"LLM selected repositories: {selected_repos}") |
|
|
|
|
|
top_repos_list = [] |
|
for repo_id in selected_repos[:top_n]: |
|
matching_rows = analyzed_df[analyzed_df['repo id'] == repo_id] |
|
if not matching_rows.empty: |
|
top_repos_list.append(matching_rows.iloc[0]) |
|
|
|
if top_repos_list: |
|
top_repos = pd.DataFrame(top_repos_list) |
|
logger.info(f"Successfully selected {len(top_repos)} repositories using LLM") |
|
return top_repos |
|
|
|
|
|
logger.warning("Failed to parse LLM response, using fallback selection") |
|
return analyzed_df.head(top_n) |
|
|
|
except Exception as llm_error: |
|
logger.error(f"LLM selection failed: {llm_error}") |
|
|
|
return analyzed_df.head(top_n) |
|
|
|
except Exception as e: |
|
logger.error(f"Error in LLM-based repo selection: {e}") |
|
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"]) |
|
|
|
def write_repos_to_csv(repo_ids: List[str]) -> None: |
|
"""Writes a list of repo IDs to the CSV file, overwriting the previous content.""" |
|
try: |
|
with open(CSV_FILE, mode="w", newline='', encoding="utf-8") as csvfile: |
|
writer = csv.writer(csvfile) |
|
writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"]) |
|
for repo_id in repo_ids: |
|
writer.writerow([repo_id, "", "", "", ""]) |
|
logger.info(f"Wrote {len(repo_ids)} repo IDs to {CSV_FILE}") |
|
except Exception as e: |
|
logger.error(f"Error writing to CSV: {e}") |
|
|
|
def format_text_for_dataframe(text: str, max_length: int = 200) -> str: |
|
"""Format text for better display in dataframe by truncating and cleaning.""" |
|
if not text or pd.isna(text): |
|
return "" |
|
|
|
|
|
text = str(text).strip() |
|
|
|
|
|
text = re.sub(r'\s+', ' ', text) |
|
|
|
|
|
if len(text) > max_length: |
|
text = text[:max_length-3] + "..." |
|
|
|
return text |
|
|
|
def read_csv_to_dataframe() -> pd.DataFrame: |
|
"""Reads the CSV file into a pandas DataFrame with full text preserved.""" |
|
try: |
|
df = pd.read_csv(CSV_FILE, dtype=str).fillna('') |
|
|
|
|
|
|
|
|
|
return df |
|
except FileNotFoundError: |
|
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"]) |
|
except Exception as e: |
|
logger.error(f"Error reading CSV: {e}") |
|
return pd.DataFrame() |
|
|
|
def format_dataframe_for_display(df: pd.DataFrame) -> pd.DataFrame: |
|
"""Returns dataframe with full text (no truncation) for display.""" |
|
if df.empty: |
|
return df |
|
|
|
|
|
|
|
return df.copy() |
|
|
|
def analyze_and_update_single_repo(repo_id: str, user_requirements: str = "") -> Tuple[str, str, pd.DataFrame]: |
|
""" |
|
Downloads, analyzes a single repo, updates the CSV, and returns results. |
|
Now includes user requirements for better relevance rating. |
|
This function combines the logic of downloading, analyzing, and updating the CSV for one repo. |
|
""" |
|
try: |
|
logger.info(f"Starting analysis for repo: {repo_id}") |
|
download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt']) |
|
txt_path = combine_repo_files_for_llm() |
|
|
|
with open(txt_path, "r", encoding="utf-8") as f: |
|
combined_content = f.read() |
|
|
|
llm_output = analyze_combined_file(txt_path, user_requirements) |
|
|
|
last_start = llm_output.rfind('{') |
|
last_end = llm_output.rfind('}') |
|
final_json_str = llm_output[last_start:last_end+1] if last_start != -1 and last_end != -1 else "{}" |
|
|
|
llm_json = parse_llm_json_response(final_json_str) |
|
|
|
summary = "" |
|
if isinstance(llm_json, dict) and "error" not in llm_json: |
|
strengths = llm_json.get("strength", "N/A") |
|
weaknesses = llm_json.get("weaknesses", "N/A") |
|
relevance = llm_json.get("relevance rating", "N/A") |
|
summary = f"JSON extraction: SUCCESS\n\nStrengths:\n{strengths}\n\nWeaknesses:\n{weaknesses}\n\nRelevance: {relevance}" |
|
else: |
|
summary = f"JSON extraction: FAILED\nRaw response might not be valid JSON." |
|
|
|
|
|
df = read_csv_to_dataframe() |
|
repo_found_in_df = False |
|
for idx, row in df.iterrows(): |
|
if row["repo id"] == repo_id: |
|
if isinstance(llm_json, dict): |
|
df.at[idx, "strength"] = llm_json.get("strength", "") |
|
df.at[idx, "weaknesses"] = llm_json.get("weaknesses", "") |
|
df.at[idx, "speciality"] = llm_json.get("speciality", "") |
|
df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "") |
|
repo_found_in_df = True |
|
break |
|
|
|
if not repo_found_in_df: |
|
logger.warning(f"Repo ID {repo_id} not found in CSV for updating.") |
|
|
|
|
|
try: |
|
df.to_csv(CSV_FILE, index=False) |
|
|
|
os.sync() if hasattr(os, 'sync') else None |
|
logger.info(f"Successfully updated CSV for {repo_id}") |
|
except Exception as csv_error: |
|
logger.error(f"Failed to write CSV for {repo_id}: {csv_error}") |
|
|
|
time.sleep(0.2) |
|
try: |
|
df.to_csv(CSV_FILE, index=False) |
|
logger.info(f"Successfully updated CSV for {repo_id} on retry") |
|
except Exception as retry_error: |
|
logger.error(f"Failed to write CSV for {repo_id} on retry: {retry_error}") |
|
|
|
logger.info(f"Successfully analyzed and updated CSV for {repo_id}") |
|
return combined_content, summary, df |
|
|
|
except Exception as e: |
|
logger.error(f"An error occurred during analysis of {repo_id}: {e}") |
|
error_summary = f"Error analyzing repo: {e}" |
|
return "", error_summary, format_dataframe_for_display(read_csv_to_dataframe()) |
|
|
|
|
|
def convert_messages_to_tuples(history: List[Dict[str, str]]) -> List[Tuple[str, str]]: |
|
""" |
|
Converts Gradio's 'messages' format to the old 'tuple' format for compatibility. |
|
This robust version correctly handles histories that start with an assistant message. |
|
""" |
|
tuple_history = [] |
|
|
|
for i, msg in enumerate(history): |
|
if msg['role'] == 'user': |
|
|
|
if i + 1 < len(history) and history[i+1]['role'] == 'assistant': |
|
user_content = msg['content'] |
|
assistant_content = history[i+1]['content'] |
|
tuple_history.append((user_content, assistant_content)) |
|
return tuple_history |
|
|
|
|
|
|
|
def create_ui() -> gr.Blocks: |
|
"""Creates and configures the entire Gradio interface.""" |
|
|
|
css = """ |
|
/* Modern sleek design */ |
|
.gradio-container { |
|
font-family: 'Inter', 'system-ui', sans-serif; |
|
background: linear-gradient(135deg, #0a0a0a 0%, #1a1a1a 100%); |
|
min-height: 100vh; |
|
} |
|
|
|
.gr-form { |
|
background: rgba(255, 255, 255, 0.95); |
|
backdrop-filter: blur(10px); |
|
border-radius: 16px; |
|
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1); |
|
padding: 24px; |
|
margin: 16px; |
|
border: 1px solid rgba(255, 255, 255, 0.2); |
|
} |
|
|
|
.gr-button { |
|
background: linear-gradient(45deg, #667eea, #764ba2); |
|
border: none; |
|
border-radius: 12px; |
|
color: white; |
|
font-weight: 600; |
|
padding: 12px 24px; |
|
transition: all 0.3s ease; |
|
box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4); |
|
} |
|
|
|
.gr-button:hover { |
|
transform: translateY(-2px); |
|
box-shadow: 0 6px 20px rgba(102, 126, 234, 0.6); |
|
} |
|
|
|
.gr-textbox { |
|
border: 2px solid rgba(102, 126, 234, 0.2); |
|
border-radius: 12px; |
|
background: rgba(255, 255, 255, 0.9); |
|
transition: all 0.3s ease; |
|
} |
|
|
|
.gr-textbox:focus { |
|
border-color: #667eea; |
|
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1); |
|
} |
|
|
|
.gr-panel { |
|
background: rgba(255, 255, 255, 0.95); |
|
border-radius: 16px; |
|
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1); |
|
border: 1px solid rgba(255, 255, 255, 0.2); |
|
} |
|
|
|
.gr-tab-nav { |
|
background: rgba(255, 255, 255, 0.95); |
|
border-radius: 12px 12px 0 0; |
|
backdrop-filter: blur(10px); |
|
} |
|
|
|
.gr-tab-nav button { |
|
background: transparent; |
|
border: none; |
|
padding: 16px 24px; |
|
font-weight: 600; |
|
color: #666; |
|
transition: all 0.3s ease; |
|
} |
|
|
|
.gr-tab-nav button.selected { |
|
background: linear-gradient(45deg, #667eea, #764ba2); |
|
color: white; |
|
border-radius: 8px; |
|
} |
|
|
|
.chatbot { |
|
border-radius: 16px; |
|
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1); |
|
} |
|
|
|
/* Hide Gradio footer */ |
|
footer { |
|
display: none !important; |
|
} |
|
|
|
/* Custom scrollbar */ |
|
::-webkit-scrollbar { |
|
width: 8px; |
|
} |
|
|
|
::-webkit-scrollbar-track { |
|
background: rgba(255, 255, 255, 0.1); |
|
border-radius: 4px; |
|
} |
|
|
|
::-webkit-scrollbar-thumb { |
|
background: linear-gradient(45deg, #667eea, #764ba2); |
|
border-radius: 4px; |
|
} |
|
|
|
/* Improved dataframe styling for full text display */ |
|
.gr-dataframe { |
|
border-radius: 12px; |
|
overflow: hidden; |
|
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1); |
|
background: rgba(255, 255, 255, 0.98); |
|
} |
|
|
|
.gr-dataframe table { |
|
width: 100%; |
|
table-layout: fixed; |
|
border-collapse: collapse; |
|
} |
|
|
|
/* Column width specifications for both dataframes */ |
|
.gr-dataframe th, |
|
.gr-dataframe td { |
|
padding: 12px 16px; |
|
text-align: left; |
|
border-bottom: 1px solid rgba(0, 0, 0, 0.1); |
|
font-size: 0.95rem; |
|
line-height: 1.4; |
|
} |
|
|
|
/* Specific column widths - applying to both dataframes */ |
|
.gr-dataframe th:nth-child(1), |
|
.gr-dataframe td:nth-child(1) { width: 16.67% !important; min-width: 16.67% !important; max-width: 16.67% !important; } |
|
.gr-dataframe th:nth-child(2), |
|
.gr-dataframe td:nth-child(2) { width: 25% !important; min-width: 25% !important; max-width: 25% !important; } |
|
.gr-dataframe th:nth-child(3), |
|
.gr-dataframe td:nth-child(3) { width: 25% !important; min-width: 25% !important; max-width: 25% !important; } |
|
.gr-dataframe th:nth-child(4), |
|
.gr-dataframe td:nth-child(4) { width: 20.83% !important; min-width: 20.83% !important; max-width: 20.83% !important; } |
|
.gr-dataframe th:nth-child(5), |
|
.gr-dataframe td:nth-child(5) { width: 12.5% !important; min-width: 12.5% !important; max-width: 12.5% !important; } |
|
|
|
/* Additional specific targeting for both dataframes */ |
|
div[data-testid="dataframe"] table th:nth-child(1), |
|
div[data-testid="dataframe"] table td:nth-child(1) { width: 16.67% !important; } |
|
div[data-testid="dataframe"] table th:nth-child(2), |
|
div[data-testid="dataframe"] table td:nth-child(2) { width: 25% !important; } |
|
div[data-testid="dataframe"] table th:nth-child(3), |
|
div[data-testid="dataframe"] table td:nth-child(3) { width: 25% !important; } |
|
div[data-testid="dataframe"] table th:nth-child(4), |
|
div[data-testid="dataframe"] table td:nth-child(4) { width: 20.83% !important; } |
|
div[data-testid="dataframe"] table th:nth-child(5), |
|
div[data-testid="dataframe"] table td:nth-child(5) { width: 12.5% !important; } |
|
|
|
/* Make repository names clickable */ |
|
.gr-dataframe td:nth-child(1) { |
|
cursor: pointer; |
|
color: #667eea; |
|
font-weight: 600; |
|
transition: all 0.3s ease; |
|
} |
|
|
|
.gr-dataframe td:nth-child(1):hover { |
|
background-color: rgba(102, 126, 234, 0.1); |
|
color: #764ba2; |
|
transform: scale(1.02); |
|
} |
|
|
|
/* Content columns - readable styling with scroll for long text */ |
|
.gr-dataframe td:nth-child(2), |
|
.gr-dataframe td:nth-child(3), |
|
.gr-dataframe td:nth-child(4), |
|
.gr-dataframe td:nth-child(5) { |
|
cursor: default; |
|
font-size: 0.9rem; |
|
} |
|
|
|
.gr-dataframe tbody tr:hover { |
|
background-color: rgba(102, 126, 234, 0.05); |
|
} |
|
|
|
/* JavaScript for auto-scroll to top on tab change */ |
|
<script> |
|
document.addEventListener('DOMContentLoaded', function() { |
|
// Function to scroll to top |
|
function scrollToTop() { |
|
window.scrollTo({ |
|
top: 0, |
|
behavior: 'smooth' |
|
}); |
|
} |
|
|
|
// Observer for tab changes |
|
const observer = new MutationObserver(function(mutations) { |
|
mutations.forEach(function(mutation) { |
|
if (mutation.type === 'attributes' && mutation.attributeName === 'class') { |
|
const target = mutation.target; |
|
if (target.classList && target.classList.contains('selected')) { |
|
// Tab was selected, scroll to top |
|
setTimeout(scrollToTop, 100); |
|
} |
|
} |
|
}); |
|
}); |
|
|
|
// Observe tab navigation buttons |
|
const tabButtons = document.querySelectorAll('.gr-tab-nav button'); |
|
tabButtons.forEach(button => { |
|
observer.observe(button, { attributes: true }); |
|
|
|
// Also add click listener for immediate scroll |
|
button.addEventListener('click', function() { |
|
setTimeout(scrollToTop, 150); |
|
}); |
|
}); |
|
|
|
// Enhanced listener for programmatic tab changes (button-triggered navigation) |
|
let lastSelectedTab = null; |
|
const checkInterval = setInterval(function() { |
|
const currentSelectedTab = document.querySelector('.gr-tab-nav button.selected'); |
|
if (currentSelectedTab && currentSelectedTab !== lastSelectedTab) { |
|
lastSelectedTab = currentSelectedTab; |
|
setTimeout(scrollToTop, 100); |
|
} |
|
}, 100); |
|
|
|
// Additional scroll trigger for repo explorer navigation |
|
window.addEventListener('repoExplorerNavigation', function() { |
|
setTimeout(scrollToTop, 200); |
|
}); |
|
|
|
// Watch for specific tab transitions to repo explorer |
|
const repoExplorerObserver = new MutationObserver(function(mutations) { |
|
mutations.forEach(function(mutation) { |
|
if (mutation.type === 'attributes' && mutation.attributeName === 'class') { |
|
const target = mutation.target; |
|
if (target.textContent && target.textContent.includes('🔍 Repo Explorer') && target.classList.contains('selected')) { |
|
setTimeout(scrollToTop, 150); |
|
} |
|
} |
|
}); |
|
}); |
|
|
|
// Start observing for repo explorer specific changes |
|
setTimeout(function() { |
|
const repoExplorerTab = Array.from(document.querySelectorAll('.gr-tab-nav button')).find(btn => |
|
btn.textContent && btn.textContent.includes('🔍 Repo Explorer') |
|
); |
|
if (repoExplorerTab) { |
|
repoExplorerObserver.observe(repoExplorerTab, { attributes: true }); |
|
} |
|
}, 1000); |
|
}); |
|
</script> |
|
""" |
|
|
|
with gr.Blocks( |
|
theme=gr.themes.Soft( |
|
primary_hue="blue", |
|
secondary_hue="purple", |
|
neutral_hue="gray", |
|
font=["Inter", "system-ui", "sans-serif"] |
|
), |
|
css=css, |
|
title="🚀 HF RepoSense - AI Repository Intelligence" |
|
) as app: |
|
|
|
|
|
|
|
repo_ids_state = gr.State([]) |
|
current_repo_idx_state = gr.State(0) |
|
user_requirements_state = gr.State("") |
|
loaded_repo_content_state = gr.State("") |
|
current_repo_id_state = gr.State("") |
|
selected_repo_id_state = gr.State("") |
|
|
|
gr.Markdown( |
|
""" |
|
<div style="text-align: center; padding: 40px 20px; background: rgba(255, 255, 255, 0.1); border-radius: 20px; margin: 20px auto; max-width: 900px; backdrop-filter: blur(10px);"> |
|
<h1 style="font-size: 3.5rem; font-weight: 800; margin: 0; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text;"> |
|
🚀 HF RepoSense |
|
</h1> |
|
<p style="font-size: 1.3rem; color: rgba(255, 255, 255, 0.9); margin: 16px 0 0 0; font-weight: 400; line-height: 1.6;"> |
|
AI-powered HuggingFace repository intelligence |
|
</p> |
|
<div style="height: 4px; width: 80px; background: linear-gradient(45deg, #667eea, #764ba2); margin: 24px auto; border-radius: 2px;"></div> |
|
</div> |
|
""" |
|
) |
|
|
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
pass |
|
with gr.Column(scale=2): |
|
with gr.Row(): |
|
help_btn = gr.Button("❓ Help", variant="secondary", size="lg", scale=1) |
|
reset_all_btn = gr.Button("🔄 Reset Everything", variant="stop", size="lg", scale=1) |
|
with gr.Column(scale=1): |
|
pass |
|
|
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
help_modal = gr.Column(visible=False) |
|
with help_modal: |
|
gr.Markdown( |
|
""" |
|
<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; border-radius: 16px; text-align: center; margin-bottom: 20px;"> |
|
<h2 style="color: white; margin: 0; font-size: 2rem;">📚 How to Use HF RepoSense</h2> |
|
<p style="color: rgba(255,255,255,0.9); margin: 10px 0 0 0;">Step-by-step guide to find and analyze repositories</p> |
|
</div> |
|
""" |
|
) |
|
|
|
with gr.Accordion("🚀 Method 1: AI Assistant (Recommended)", open=True): |
|
gr.Markdown( |
|
""" |
|
### **Step 1: Start Conversation** |
|
- Go to the **🤖 AI Assistant** tab |
|
- Describe your project: *"I'm building a sentiment analysis tool"* |
|
- The AI will ask clarifying questions about your needs |
|
|
|
### **Step 2: Let AI Work Its Magic** |
|
- Answer the AI's questions about your requirements |
|
- When ready, the AI will automatically: |
|
- Extract keywords from your conversation |
|
- Search for matching repositories |
|
- Analyze and rank them by relevance |
|
|
|
### **Step 3: Review Results** |
|
- Interface automatically switches to **🔬 Analysis & Results** |
|
- View **Top 3** most relevant repositories |
|
- Browse detailed analysis with strengths/weaknesses |
|
- Click repository names to visit or explore them |
|
|
|
**💡 Tip**: This method gives the best personalized results! |
|
""" |
|
) |
|
|
|
with gr.Accordion("📝 Method 2: Smart Search (Direct Input)", open=False): |
|
gr.Markdown( |
|
""" |
|
### **Step 1: Choose Input Type** |
|
Go to **📝 Smart Search** tab and enter either: |
|
|
|
**Repository IDs** (with `/`): |
|
``` |
|
microsoft/DialoGPT-medium |
|
openai/whisper |
|
huggingface/transformers |
|
``` |
|
|
|
**Keywords** (no `/`): |
|
``` |
|
text generation |
|
image classification |
|
sentiment analysis |
|
``` |
|
|
|
### **Step 2: Auto-Detection & Processing** |
|
- System automatically detects input type |
|
- Repository IDs → Direct analysis |
|
- Keywords → Search + analysis |
|
- Enable **🚀 Auto-analyze** for instant results |
|
|
|
### **Step 3: Get Results** |
|
- Click **🔍 Find & Process Repositories** |
|
- View results in **🔬 Analysis & Results** tab |
|
""" |
|
) |
|
|
|
with gr.Accordion("🔬 Understanding Analysis Results", open=False): |
|
gr.Markdown( |
|
""" |
|
### **🏆 Top 3 Repositories** |
|
- AI-selected most relevant for your needs |
|
- Ranked by requirement matching and quality |
|
|
|
### **📊 Detailed Analysis Table** |
|
- **Repository**: Click names to visit/explore |
|
- **Strengths**: Key capabilities and advantages |
|
- **Weaknesses**: Limitations and considerations |
|
- **Speciality**: Primary use case and domain |
|
- **Relevance**: How well it matches your needs |
|
|
|
### **🔗 Quick Actions** |
|
Click repository names to: |
|
- **🌐 Visit Hugging Face Space**: See live demo |
|
- **🔍 Open in Repo Explorer**: Deep dive analysis |
|
""" |
|
) |
|
|
|
with gr.Accordion("🔍 Repository Explorer Deep Dive", open=False): |
|
gr.Markdown( |
|
""" |
|
### **Access Repository Explorer** |
|
- Click **🔍 Open in Repo Explorer** from results |
|
- Or manually enter repo ID in **🔍 Repo Explorer** tab |
|
|
|
### **Features Available** |
|
- **Auto-loading**: Repository content analysis |
|
- **AI Chat**: Ask questions about the code |
|
- **File Exploration**: Browse repository structure |
|
- **Code Analysis**: Get explanations and insights |
|
|
|
### **Sample Questions to Ask** |
|
- *"How do I use this repository?"* |
|
- *"What are the main functions?"* |
|
- *"Show me example usage"* |
|
- *"Explain the architecture"* |
|
""" |
|
) |
|
|
|
with gr.Accordion("🎯 Pro Tips & Best Practices", open=False): |
|
gr.Markdown( |
|
""" |
|
### **🤖 Getting Better AI Results** |
|
- Be specific about your use case |
|
- Mention programming language preferences |
|
- Describe your experience level |
|
- Include performance requirements |
|
|
|
### **🔍 Search Optimization** |
|
- Use multiple relevant keywords |
|
- Try different keyword combinations |
|
- Check both general and specific terms |
|
|
|
### **📊 Analyzing Results** |
|
- Read both strengths AND weaknesses |
|
- Check speciality alignment with your needs |
|
- Use Repository Explorer for detailed investigation |
|
- Compare multiple options before deciding |
|
|
|
### **🔄 Workflow Tips** |
|
- Start with AI Assistant for personalized results |
|
- Use Smart Search for known repositories |
|
- Explore multiple repositories before choosing |
|
- Save interesting repositories for later comparison |
|
""" |
|
) |
|
|
|
with gr.Accordion("⚠️ Important Notice: Server Startup Times", open=True): |
|
gr.Markdown( |
|
""" |
|
<div style="background: linear-gradient(135deg, #ff9a56 0%, #ff6b6b 100%); padding: 15px; border-radius: 12px; margin: 10px 0;"> |
|
<h3 style="color: white; margin: 0 0 10px 0; font-size: 1.3rem;">🕐 Model Response Times</h3> |
|
<p style="color: white; margin: 0; font-size: 1rem; line-height: 1.5;"> |
|
<strong>If the AI model takes longer than 5 minutes to respond:</strong><br/> |
|
📡 The servers are starting up from sleep mode<br/> |
|
⏳ This happens when the service hasn't been used recently<br/> |
|
🚀 Once live, responses will be fast and smooth<br/> |
|
💝 Thank you for your patience! |
|
</p> |
|
</div> |
|
|
|
### **What to Expect** |
|
- **First request**: May take 3-7 minutes (server startup) |
|
- **Subsequent requests**: Fast responses (10-30 seconds) |
|
- **If timeout occurs**: Simply retry your request |
|
|
|
### **Best Practices During Startup** |
|
- Start with a simple conversation or small repository list |
|
- Avoid analyzing many repositories simultaneously on first use |
|
- Once the first response comes through, normal speed resumes |
|
""" |
|
) |
|
|
|
with gr.Row(): |
|
close_help_btn = gr.Button("✅ Got It, Let's Start!", variant="primary", size="lg") |
|
|
|
with gr.Tabs() as tabs: |
|
|
|
with gr.TabItem("🤖 AI Assistant", id="chatbot_tab"): |
|
gr.Markdown("### 💬 Intelligent Repository Discovery Assistant") |
|
gr.Markdown("🎯 **Tell me what you're building, and I'll automatically find the best repositories for you!**") |
|
|
|
chatbot = gr.Chatbot( |
|
label="🤖 AI Assistant", |
|
height=500, |
|
type="messages", |
|
avatar_images=( |
|
"https://cdn-icons-png.flaticon.com/512/149/149071.png", |
|
"https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png" |
|
), |
|
show_copy_button=True |
|
) |
|
|
|
with gr.Row(): |
|
msg_input = gr.Textbox( |
|
label="💭 Your Message", |
|
placeholder="Tell me about your project...", |
|
lines=1, |
|
scale=5, |
|
info="Describe what you're building and I'll find the perfect repositories" |
|
) |
|
send_btn = gr.Button("📤", variant="primary", scale=1) |
|
|
|
with gr.Row(): |
|
extract_analyze_btn = gr.Button("🎯 Extract Keywords & Analyze Now", variant="secondary", size="lg") |
|
|
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
chat_status = gr.Textbox( |
|
label="🎯 Chat Status", |
|
interactive=False, |
|
lines=2, |
|
info="Conversation progress and auto-actions" |
|
) |
|
with gr.Column(): |
|
extracted_keywords_output = gr.Textbox( |
|
label="🏷️ Auto-Extracted Keywords", |
|
interactive=False, |
|
show_copy_button=True, |
|
info="Keywords automatically extracted and used for search" |
|
) |
|
|
|
|
|
with gr.TabItem("📝 Smart Search", id="input_tab"): |
|
gr.Markdown("### 🔍 Intelligent Repository Discovery") |
|
gr.Markdown("💡 **Enter repository IDs (owner/repo) or keywords - I'll automatically detect which type and process accordingly!**") |
|
|
|
with gr.Row(): |
|
smart_input = gr.Textbox( |
|
label="Repository IDs or Keywords", |
|
lines=6, |
|
placeholder="Examples:\n• Repository IDs: microsoft/DialoGPT-medium, openai/whisper\n• Keywords: text generation, image classification, sentiment analysis", |
|
info="Smart detection: Use / for repo IDs, or enter keywords for search" |
|
) |
|
|
|
with gr.Row(): |
|
auto_analyze_checkbox = gr.Checkbox( |
|
label="🚀 Auto-analyze repositories", |
|
value=True, |
|
info="Automatically start analysis when repositories are found" |
|
) |
|
smart_submit_btn = gr.Button("🔍 Find & Process Repositories", variant="primary", size="lg", scale=1) |
|
|
|
status_box_input = gr.Textbox(label="📊 Status", interactive=False, lines=2) |
|
|
|
|
|
with gr.TabItem("🔬 Analysis & Results", id="analysis_tab"): |
|
gr.Markdown("### 🧪 Repository Analysis Results") |
|
|
|
|
|
with gr.Row(): |
|
current_requirements_display = gr.Textbox( |
|
label="📋 Active Requirements Context", |
|
interactive=False, |
|
lines=2, |
|
info="Requirements from AI chat for better relevance scoring" |
|
) |
|
|
|
|
|
with gr.Row(visible=False) as manual_analysis_row: |
|
analyze_all_btn = gr.Button("🚀 Analyze All Repositories", variant="primary", size="lg") |
|
status_box_analysis = gr.Textbox(label="📈 Analysis Status", interactive=False, lines=2) |
|
|
|
|
|
analysis_progress = gr.Progress() |
|
|
|
gr.Markdown("### 📊 Results Dashboard") |
|
|
|
|
|
with gr.Column(visible=False) as top_repos_section: |
|
gr.Markdown("### 🏆 Top 3 Most Relevant Repositories") |
|
gr.Markdown("🎯 **Click repository names to visit them directly on Hugging Face:**") |
|
top_repos_df = gr.Dataframe( |
|
headers=["Repository", "Strengths", "Weaknesses", "Speciality", "Relevance"], |
|
column_widths=["16.67%", "25%", "25%", "20.83%", "12.5%"], |
|
wrap=True, |
|
interactive=False |
|
) |
|
|
|
|
|
with gr.Row(): |
|
top_repo_links = gr.HTML( |
|
value="", |
|
label="🔗 Quick Links", |
|
visible=False |
|
) |
|
|
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
repo_action_modal = gr.Column(visible=False) |
|
with repo_action_modal: |
|
gr.Markdown("### 🔗 Repository Actions") |
|
selected_repo_display = gr.Textbox( |
|
label="Selected Repository", |
|
interactive=False, |
|
info="Choose what you'd like to do with this repository" |
|
) |
|
with gr.Row(): |
|
visit_repo_btn = gr.Button("🌐 Visit Hugging Face Space", variant="primary", size="lg") |
|
explore_repo_btn = gr.Button("🔍 Open in Repo Explorer", variant="secondary", size="lg") |
|
cancel_modal_btn = gr.Button("❌ Cancel", size="lg") |
|
|
|
gr.Markdown("### 📋 All Analysis Results") |
|
gr.Markdown("💡 **Click repository names to visit them on Hugging Face**") |
|
df_output = gr.Dataframe( |
|
headers=["Repository", "Strengths", "Weaknesses", "Speciality", "Relevance"], |
|
column_widths=["16.67%", "25%", "25%", "20.83%", "12.5%"], |
|
wrap=True, |
|
interactive=False |
|
) |
|
|
|
|
|
with gr.Row(): |
|
all_repo_links = gr.HTML( |
|
value="", |
|
label="🔗 Repository Quick Links" |
|
) |
|
|
|
|
|
with gr.TabItem("🔍 Repo Explorer", id="repo_explorer_tab"): |
|
repo_components, repo_states = create_repo_explorer_tab() |
|
|
|
|
|
gr.Markdown( |
|
""" |
|
<div style="text-align: center; padding: 30px 20px; margin-top: 40px; background: rgba(255, 255, 255, 0.1); border-radius: 16px; backdrop-filter: blur(10px);"> |
|
<p style="margin: 0; color: rgba(255, 255, 255, 0.8); font-size: 0.95rem; font-weight: 500;"> |
|
🚀 <span style="background: linear-gradient(45deg, #667eea, #764ba2); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 700;">HF RepoSense</span> - Powered by |
|
<span style="background: linear-gradient(45deg, #667eea, #764ba2); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 700;">Gradio</span> |
|
& <span style="background: linear-gradient(45deg, #667eea, #764ba2); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 700;">Hugging Face</span> |
|
</p> |
|
<div style="height: 2px; width: 60px; background: linear-gradient(45deg, #667eea, #764ba2); margin: 16px auto; border-radius: 1px;"></div> |
|
</div> |
|
""" |
|
) |
|
|
|
|
|
|
|
def handle_smart_input(text: str, auto_analyze: bool) -> Tuple[List[str], int, pd.DataFrame, str, Any, str]: |
|
"""Smart input handler that detects if input is repo IDs or keywords and processes accordingly.""" |
|
if not text.strip(): |
|
return [], 0, pd.DataFrame(), "Status: Please enter repository IDs or keywords.", gr.update(selected="input_tab"), "" |
|
|
|
|
|
if is_repo_id_format(text): |
|
|
|
repo_ids = list(dict.fromkeys([repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()])) |
|
write_repos_to_csv(repo_ids) |
|
df = format_dataframe_for_display(read_csv_to_dataframe()) |
|
status = f"✅ Found {len(repo_ids)} repository IDs. " |
|
|
|
if auto_analyze: |
|
status += "Starting automatic analysis..." |
|
return repo_ids, 0, df, status, gr.update(selected="analysis_tab"), "auto_analyze" |
|
else: |
|
status += "Ready for manual analysis." |
|
return repo_ids, 0, df, status, gr.update(selected="analysis_tab"), "" |
|
else: |
|
|
|
keyword_list = [k.strip() for k in re.split(r'[\n,]+', text) if k.strip()] |
|
repo_ids = [] |
|
for kw in keyword_list: |
|
repo_ids.extend(search_top_spaces(kw, limit=5)) |
|
|
|
unique_repo_ids = list(dict.fromkeys(repo_ids)) |
|
write_repos_to_csv(unique_repo_ids) |
|
df = format_dataframe_for_display(read_csv_to_dataframe()) |
|
status = f"🔍 Found {len(unique_repo_ids)} repositories from keywords. " |
|
|
|
if auto_analyze: |
|
status += "Starting automatic analysis..." |
|
return unique_repo_ids, 0, df, status, gr.update(selected="analysis_tab"), "auto_analyze" |
|
else: |
|
status += "Ready for manual analysis." |
|
return unique_repo_ids, 0, df, status, gr.update(selected="analysis_tab"), "" |
|
|
|
def handle_auto_analyze_toggle(auto_analyze: bool) -> Any: |
|
"""Show/hide manual analysis controls based on auto-analyze setting.""" |
|
return gr.update(visible=not auto_analyze) |
|
|
|
def handle_user_message(user_message: str, history: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], str]: |
|
"""Appends the user's message to the history, preparing for the bot's response.""" |
|
|
|
if not history: |
|
history = [{"role": "assistant", "content": CHATBOT_INITIAL_MESSAGE}] |
|
|
|
if user_message: |
|
history.append({"role": "user", "content": user_message}) |
|
return history, "" |
|
|
|
def handle_bot_response(history: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], str, str, str, List[str], int, pd.DataFrame, Any]: |
|
"""Generates bot response and automatically extracts keywords if conversation is ready.""" |
|
if not history or history[-1]["role"] != "user": |
|
return history, "", "", "", [], 0, pd.DataFrame(), gr.update() |
|
|
|
user_message = history[-1]["content"] |
|
|
|
tuple_history_for_api = convert_messages_to_tuples(history[:-1]) |
|
|
|
response = chat_with_user(user_message, tuple_history_for_api) |
|
history.append({"role": "assistant", "content": response}) |
|
|
|
|
|
if should_auto_extract_keywords(history): |
|
|
|
tuple_history = convert_messages_to_tuples(history) |
|
raw_keywords_str = extract_keywords_from_conversation(tuple_history) |
|
|
|
|
|
cleaned_keywords = re.findall(r'[\w\s-]+', raw_keywords_str) |
|
cleaned_keywords = [kw.strip() for kw in cleaned_keywords if kw.strip()] |
|
|
|
if cleaned_keywords: |
|
final_keywords_str = ", ".join(cleaned_keywords) |
|
|
|
|
|
user_requirements = extract_user_requirements_from_chat(history) |
|
|
|
|
|
repo_ids = [] |
|
for kw in cleaned_keywords[:3]: |
|
repo_ids.extend(search_top_spaces(kw, limit=5)) |
|
|
|
unique_repo_ids = list(dict.fromkeys(repo_ids)) |
|
write_repos_to_csv(unique_repo_ids) |
|
df = format_dataframe_for_display(read_csv_to_dataframe()) |
|
|
|
chat_status = f"🎯 Auto-extracted keywords and found {len(unique_repo_ids)} repositories. Analysis starting automatically..." |
|
|
|
return history, chat_status, final_keywords_str, user_requirements, unique_repo_ids, 0, df, gr.update(selected="analysis_tab") |
|
|
|
return history, "💬 Conversation continuing...", "", "", [], 0, pd.DataFrame(), gr.update() |
|
|
|
def handle_dataframe_select(evt: gr.SelectData, df_data) -> Tuple[str, Any, str]: |
|
"""Handle dataframe row selection - show modal for repo ID (column 0) clicks.""" |
|
if evt is None: |
|
return "", gr.update(visible=False), "" |
|
|
|
try: |
|
|
|
row_idx = evt.index[0] |
|
col_idx = evt.index[1] |
|
|
|
|
|
if isinstance(df_data, pd.DataFrame) and not df_data.empty and row_idx < len(df_data): |
|
|
|
if col_idx == 0: |
|
repo_id = df_data.iloc[row_idx, 0] |
|
|
|
if repo_id and str(repo_id).strip() and str(repo_id).strip() != 'nan': |
|
clean_repo_id = str(repo_id).strip() |
|
logger.info(f"Showing modal for repository: {clean_repo_id}") |
|
return clean_repo_id, gr.update(visible=True), clean_repo_id |
|
|
|
|
|
else: |
|
return "", gr.update(visible=False), "" |
|
|
|
except Exception as e: |
|
logger.error(f"Error handling dataframe selection: {e}") |
|
|
|
return "", gr.update(visible=False), "" |
|
|
|
def handle_visit_repo(repo_id: str) -> Tuple[Any, str]: |
|
"""Handle visiting the Hugging Face Space for the repository.""" |
|
if repo_id and repo_id.strip(): |
|
hf_url = f"https://huggingface.co/spaces/{repo_id.strip()}" |
|
logger.info(f"User chose to visit: {hf_url}") |
|
return gr.update(visible=False), hf_url |
|
return gr.update(visible=False), "" |
|
|
|
def handle_explore_repo(selected_repo_id: str) -> Tuple[Any, Any, Any, str, str]: |
|
"""Handle navigating to the repo explorer and automatically load the repository.""" |
|
|
|
if selected_repo_id and selected_repo_id.strip() and selected_repo_id.strip() != 'nan': |
|
clean_repo_id = selected_repo_id.strip() |
|
return ( |
|
gr.update(visible=False), |
|
gr.update(selected="repo_explorer_tab"), |
|
gr.update(value=clean_repo_id), |
|
clean_repo_id, |
|
"auto_load" |
|
) |
|
else: |
|
return ( |
|
gr.update(visible=False), |
|
gr.update(selected="repo_explorer_tab"), |
|
gr.update(), |
|
"", |
|
"" |
|
) |
|
|
|
def handle_cancel_modal() -> Any: |
|
"""Handle closing the modal.""" |
|
return gr.update(visible=False) |
|
|
|
def generate_repo_links_html(df: pd.DataFrame) -> str: |
|
"""Generate HTML with clickable links for repositories.""" |
|
if df.empty: |
|
return "" |
|
|
|
html_links = [] |
|
for idx, row in df.iterrows(): |
|
repo_id = row.get('repo id', '') if hasattr(row, 'get') else row[0] |
|
if repo_id and str(repo_id).strip() and str(repo_id).strip() != 'nan': |
|
clean_repo_id = str(repo_id).strip() |
|
hf_url = f"https://huggingface.co/spaces/{clean_repo_id}" |
|
html_links.append(f'<a href="{hf_url}" target="_blank" style="display: inline-block; margin: 5px 10px; padding: 8px 16px; background: linear-gradient(45deg, #667eea, #764ba2); color: white; text-decoration: none; border-radius: 8px; font-weight: 600; transition: all 0.3s ease;">{clean_repo_id}</a>') |
|
|
|
if html_links: |
|
return f'<div style="margin: 10px 0; padding: 15px; background: rgba(255, 255, 255, 0.1); border-radius: 12px; backdrop-filter: blur(10px);">{"".join(html_links)}</div>' |
|
return "" |
|
|
|
def handle_extract_and_analyze(history: List[Dict[str, str]]) -> Tuple[str, str, str, List[str], int, pd.DataFrame, Any, pd.DataFrame, str, Any, str, str]: |
|
"""Extract keywords from chat, search repositories, and immediately start analysis.""" |
|
if not history: |
|
return "❌ No conversation to extract from.", "", "", [], 0, pd.DataFrame(), gr.update(), pd.DataFrame(), "", gr.update(visible=False), "", "" |
|
|
|
|
|
tuple_history = convert_messages_to_tuples(history) |
|
if not tuple_history: |
|
return "❌ No completed conversations to analyze.", "", "", [], 0, pd.DataFrame(), gr.update(), pd.DataFrame(), "", gr.update(visible=False), "", "" |
|
|
|
|
|
raw_keywords_str = extract_keywords_from_conversation(tuple_history) |
|
|
|
|
|
cleaned_keywords = re.findall(r'[\w\s-]+', raw_keywords_str) |
|
cleaned_keywords = [kw.strip() for kw in cleaned_keywords if kw.strip()] |
|
|
|
if not cleaned_keywords: |
|
return f"❌ Could not extract valid keywords. Raw output: '{raw_keywords_str}'", "", "", [], 0, pd.DataFrame(), gr.update(), pd.DataFrame(), "", gr.update(visible=False), "", "" |
|
|
|
|
|
final_keywords_str = ", ".join(cleaned_keywords) |
|
|
|
|
|
user_requirements = extract_user_requirements_from_chat(history) |
|
|
|
|
|
repo_ids = [] |
|
for kw in cleaned_keywords[:3]: |
|
repo_ids.extend(search_top_spaces(kw, limit=5)) |
|
|
|
unique_repo_ids = list(dict.fromkeys(repo_ids)) |
|
|
|
if not unique_repo_ids: |
|
return f"❌ No repositories found for keywords: {final_keywords_str}", final_keywords_str, user_requirements, [], 0, pd.DataFrame(), gr.update(), pd.DataFrame(), "", gr.update(visible=False), "", "" |
|
|
|
write_repos_to_csv(unique_repo_ids) |
|
df = format_dataframe_for_display(read_csv_to_dataframe()) |
|
|
|
|
|
try: |
|
analyzed_df, analysis_status, top_repos, top_section_update, all_links, top_links = handle_analyze_all_repos(unique_repo_ids, user_requirements) |
|
|
|
chat_status = f"🎉 Extracted keywords → Found {len(unique_repo_ids)} repositories → Analysis complete!" |
|
|
|
return chat_status, final_keywords_str, user_requirements, unique_repo_ids, 0, analyzed_df, gr.update(selected="analysis_tab"), top_repos, analysis_status, top_section_update, all_links, top_links |
|
|
|
except Exception as e: |
|
logger.error(f"Error during extract and analyze: {e}") |
|
error_status = f"✅ Found {len(unique_repo_ids)} repositories, but analysis failed: {e}" |
|
return error_status, final_keywords_str, user_requirements, unique_repo_ids, 0, df, gr.update(selected="analysis_tab"), pd.DataFrame(), "", gr.update(visible=False), "", "" |
|
|
|
def extract_user_requirements_from_chat(history: List[Dict[str, str]]) -> str: |
|
"""Extract user requirements from chatbot conversation.""" |
|
if not history: |
|
return "" |
|
|
|
user_messages = [] |
|
for msg in history: |
|
if msg.get('role') == 'user': |
|
user_messages.append(msg.get('content', '')) |
|
|
|
if not user_messages: |
|
return "" |
|
|
|
|
|
requirements = "\n".join([f"- {msg}" for msg in user_messages if msg.strip()]) |
|
return requirements |
|
|
|
def handle_analyze_all_repos(repo_ids: List[str], user_requirements: str, progress=gr.Progress()) -> Tuple[pd.DataFrame, str, pd.DataFrame, Any, str, str]: |
|
"""Analyzes all repositories in the CSV file with progress tracking.""" |
|
if not repo_ids: |
|
return pd.DataFrame(), "Status: No repositories to analyze. Please submit repo IDs first.", pd.DataFrame(), gr.update(visible=False), "", "" |
|
|
|
total_repos = len(repo_ids) |
|
|
|
try: |
|
|
|
progress(0, desc="Initializing batch analysis...") |
|
|
|
successful_analyses = 0 |
|
failed_analyses = 0 |
|
csv_update_failures = 0 |
|
|
|
for i, repo_id in enumerate(repo_ids): |
|
|
|
progress_percent = (i / total_repos) |
|
progress(progress_percent, desc=f"Analyzing {repo_id} ({i+1}/{total_repos})") |
|
|
|
try: |
|
logger.info(f"Batch analysis: Processing {repo_id} ({i+1}/{total_repos})") |
|
|
|
|
|
content, summary, df = analyze_and_update_single_repo(repo_id, user_requirements) |
|
|
|
|
|
updated_df = read_csv_to_dataframe() |
|
repo_updated = False |
|
|
|
for idx, row in updated_df.iterrows(): |
|
if row["repo id"] == repo_id: |
|
|
|
if (row.get("strength", "").strip() or |
|
row.get("weaknesses", "").strip() or |
|
row.get("speciality", "").strip() or |
|
row.get("relevance rating", "").strip()): |
|
repo_updated = True |
|
break |
|
|
|
if repo_updated: |
|
successful_analyses += 1 |
|
else: |
|
|
|
logger.warning(f"CSV update failed for {repo_id}, attempting retry...") |
|
time.sleep(0.5) |
|
|
|
|
|
df_retry = read_csv_to_dataframe() |
|
retry_success = False |
|
|
|
|
|
if summary and "JSON extraction: SUCCESS" in summary: |
|
|
|
logger.info(f"Attempting to re-update CSV for {repo_id}") |
|
content_retry, summary_retry, df_retry = analyze_and_update_single_repo(repo_id, user_requirements) |
|
|
|
|
|
final_df = read_csv_to_dataframe() |
|
for idx, row in final_df.iterrows(): |
|
if row["repo id"] == repo_id: |
|
if (row.get("strength", "").strip() or |
|
row.get("weaknesses", "").strip() or |
|
row.get("speciality", "").strip() or |
|
row.get("relevance rating", "").strip()): |
|
retry_success = True |
|
break |
|
|
|
if retry_success: |
|
successful_analyses += 1 |
|
else: |
|
csv_update_failures += 1 |
|
|
|
|
|
time.sleep(0.3) |
|
|
|
except Exception as e: |
|
logger.error(f"Error analyzing {repo_id}: {e}") |
|
failed_analyses += 1 |
|
|
|
time.sleep(0.2) |
|
|
|
|
|
progress(1.0, desc="Batch analysis completed!") |
|
|
|
|
|
updated_df = read_csv_to_dataframe() |
|
|
|
|
|
analyzed_df = updated_df.copy() |
|
analyzed_df = analyzed_df[ |
|
(analyzed_df['strength'].str.strip() != '') | |
|
(analyzed_df['weaknesses'].str.strip() != '') | |
|
(analyzed_df['speciality'].str.strip() != '') | |
|
(analyzed_df['relevance rating'].str.strip() != '') |
|
] |
|
|
|
|
|
top_repos = get_top_relevant_repos(updated_df, user_requirements, top_n=3) |
|
|
|
|
|
all_links_html = generate_repo_links_html(analyzed_df) |
|
top_links_html = generate_repo_links_html(top_repos) if not top_repos.empty else "" |
|
|
|
|
|
final_status = f"🎉 Batch Analysis Complete!\n✅ Successful: {successful_analyses}/{total_repos}\n❌ Failed: {failed_analyses}/{total_repos}" |
|
if csv_update_failures > 0: |
|
final_status += f"\n⚠️ CSV Update Issues: {csv_update_failures}/{total_repos}" |
|
|
|
|
|
if not top_repos.empty: |
|
final_status += f"\n\n🏆 Top {len(top_repos)} most relevant repositories selected!" |
|
|
|
|
|
show_top_section = gr.update(visible=not top_repos.empty) |
|
|
|
logger.info(f"Batch analysis completed: {successful_analyses} successful, {failed_analyses} failed, {csv_update_failures} CSV update issues") |
|
return format_dataframe_for_display(analyzed_df), final_status, format_dataframe_for_display(top_repos), show_top_section, all_links_html, top_links_html |
|
|
|
except Exception as e: |
|
logger.error(f"Error in batch analysis: {e}") |
|
error_status = f"❌ Batch analysis failed: {e}" |
|
return format_dataframe_for_display(read_csv_to_dataframe()), error_status, pd.DataFrame(), gr.update(visible=False), "", "" |
|
|
|
def handle_reset_everything() -> Tuple[List[str], int, str, pd.DataFrame, pd.DataFrame, Any, List[Dict[str, str]], str, str, str]: |
|
"""Reset everything to initial state - clear all data, CSV, and UI components.""" |
|
try: |
|
|
|
if os.path.exists(CSV_FILE): |
|
os.remove(CSV_FILE) |
|
logger.info("CSV file deleted for reset") |
|
|
|
|
|
empty_df = pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"]) |
|
|
|
|
|
repo_ids_reset = [] |
|
current_idx_reset = 0 |
|
user_requirements_reset = "" |
|
|
|
|
|
status_reset = "Status: Everything has been reset. Ready to start fresh!" |
|
|
|
|
|
current_requirements_reset = "No requirements extracted yet." |
|
extracted_keywords_reset = "" |
|
|
|
|
|
chatbot_reset = [{"role": "assistant", "content": CHATBOT_INITIAL_MESSAGE}] |
|
|
|
logger.info("Complete system reset performed") |
|
|
|
return ( |
|
repo_ids_reset, |
|
current_idx_reset, |
|
user_requirements_reset, |
|
empty_df, |
|
empty_df, |
|
gr.update(visible=False), |
|
chatbot_reset, |
|
status_reset, |
|
current_requirements_reset, |
|
extracted_keywords_reset |
|
) |
|
|
|
except Exception as e: |
|
logger.error(f"Error during reset: {e}") |
|
error_status = f"Reset failed: {e}" |
|
return ( |
|
[], |
|
0, |
|
"", |
|
pd.DataFrame(), |
|
pd.DataFrame(), |
|
gr.update(visible=False), |
|
[{"role": "assistant", "content": CHATBOT_INITIAL_MESSAGE}], |
|
error_status, |
|
"No requirements extracted yet.", |
|
"" |
|
) |
|
|
|
|
|
|
|
|
|
app.load( |
|
fn=lambda: [{"role": "assistant", "content": CHATBOT_INITIAL_MESSAGE}], |
|
outputs=[chatbot] |
|
) |
|
|
|
|
|
smart_input.submit( |
|
fn=handle_smart_input, |
|
inputs=[smart_input, auto_analyze_checkbox], |
|
outputs=[repo_ids_state, current_repo_idx_state, df_output, status_box_input, tabs, status_box_input] |
|
).then( |
|
|
|
fn=lambda repo_ids, user_reqs, trigger: handle_analyze_all_repos(repo_ids, user_reqs) if trigger == "auto_analyze" and repo_ids else (pd.DataFrame(), "Ready for analysis.", pd.DataFrame(), gr.update(visible=False), "", ""), |
|
inputs=[repo_ids_state, user_requirements_state, status_box_input], |
|
outputs=[df_output, status_box_input, top_repos_df, top_repos_section, all_repo_links, top_repo_links] |
|
) |
|
|
|
|
|
smart_submit_btn.click( |
|
fn=handle_smart_input, |
|
inputs=[smart_input, auto_analyze_checkbox], |
|
outputs=[repo_ids_state, current_repo_idx_state, df_output, status_box_input, tabs, status_box_input] |
|
).then( |
|
|
|
fn=lambda repo_ids, user_reqs, trigger: handle_analyze_all_repos(repo_ids, user_reqs) if trigger == "auto_analyze" and repo_ids else (pd.DataFrame(), "Ready for analysis.", pd.DataFrame(), gr.update(visible=False), "", ""), |
|
inputs=[repo_ids_state, user_requirements_state, status_box_input], |
|
outputs=[df_output, status_box_input, top_repos_df, top_repos_section, all_repo_links, top_repo_links] |
|
) |
|
|
|
|
|
auto_analyze_checkbox.change( |
|
fn=handle_auto_analyze_toggle, |
|
inputs=[auto_analyze_checkbox], |
|
outputs=[manual_analysis_row] |
|
) |
|
|
|
|
|
analyze_all_btn.click( |
|
fn=handle_analyze_all_repos, |
|
inputs=[repo_ids_state, user_requirements_state], |
|
outputs=[df_output, status_box_analysis, top_repos_df, top_repos_section, all_repo_links, top_repo_links] |
|
) |
|
|
|
|
|
msg_input.submit( |
|
fn=handle_user_message, |
|
inputs=[msg_input, chatbot], |
|
outputs=[chatbot, msg_input] |
|
).then( |
|
fn=handle_bot_response, |
|
inputs=[chatbot], |
|
outputs=[chatbot, chat_status, extracted_keywords_output, user_requirements_state, repo_ids_state, current_repo_idx_state, df_output, tabs] |
|
).then( |
|
|
|
fn=lambda req: req if req.strip() else "No specific requirements extracted from conversation.", |
|
inputs=[user_requirements_state], |
|
outputs=[current_requirements_display] |
|
).then( |
|
|
|
fn=lambda repo_ids, user_reqs: handle_analyze_all_repos(repo_ids, user_reqs) if repo_ids else (pd.DataFrame(), "", pd.DataFrame(), gr.update(visible=False), "", ""), |
|
inputs=[repo_ids_state, user_requirements_state], |
|
outputs=[df_output, chat_status, top_repos_df, top_repos_section, all_repo_links, top_repo_links] |
|
) |
|
|
|
send_btn.click( |
|
fn=handle_user_message, |
|
inputs=[msg_input, chatbot], |
|
outputs=[chatbot, msg_input] |
|
).then( |
|
fn=handle_bot_response, |
|
inputs=[chatbot], |
|
outputs=[chatbot, chat_status, extracted_keywords_output, user_requirements_state, repo_ids_state, current_repo_idx_state, df_output, tabs] |
|
).then( |
|
|
|
fn=lambda req: req if req.strip() else "No specific requirements extracted from conversation.", |
|
inputs=[user_requirements_state], |
|
outputs=[current_requirements_display] |
|
).then( |
|
|
|
fn=lambda repo_ids, user_reqs: handle_analyze_all_repos(repo_ids, user_reqs) if repo_ids else (pd.DataFrame(), "", pd.DataFrame(), gr.update(visible=False), "", ""), |
|
inputs=[repo_ids_state, user_requirements_state], |
|
outputs=[df_output, chat_status, top_repos_df, top_repos_section, all_repo_links, top_repo_links] |
|
) |
|
|
|
|
|
extract_analyze_btn.click( |
|
fn=handle_extract_and_analyze, |
|
inputs=[chatbot], |
|
outputs=[chat_status, extracted_keywords_output, user_requirements_state, repo_ids_state, current_repo_idx_state, df_output, tabs, top_repos_df, status_box_analysis, top_repos_section, all_repo_links, top_repo_links] |
|
).then( |
|
|
|
fn=lambda req: req if req.strip() else "No specific requirements extracted from conversation.", |
|
inputs=[user_requirements_state], |
|
outputs=[current_requirements_display] |
|
) |
|
|
|
|
|
setup_repo_explorer_events(repo_components, repo_states) |
|
|
|
|
|
df_output.select( |
|
fn=handle_dataframe_select, |
|
inputs=[df_output], |
|
outputs=[selected_repo_display, repo_action_modal, selected_repo_id_state] |
|
) |
|
|
|
top_repos_df.select( |
|
fn=handle_dataframe_select, |
|
inputs=[top_repos_df], |
|
outputs=[selected_repo_display, repo_action_modal, selected_repo_id_state] |
|
) |
|
|
|
|
|
visit_repo_btn.click( |
|
fn=handle_visit_repo, |
|
inputs=[selected_repo_display], |
|
outputs=[repo_action_modal, selected_repo_display], |
|
js="(repo_id) => { if(repo_id && repo_id.trim()) { window.open('https://huggingface.co/spaces/' + repo_id.trim(), '_blank'); } }" |
|
) |
|
explore_repo_btn.click( |
|
fn=handle_explore_repo, |
|
inputs=[selected_repo_id_state], |
|
outputs=[ |
|
repo_action_modal, |
|
tabs, |
|
repo_components["repo_explorer_input"], |
|
repo_states["current_repo_id"], |
|
status_box_input |
|
], |
|
js="""(repo_id) => { |
|
setTimeout(() => { |
|
window.scrollTo({top: 0, behavior: 'smooth'}); |
|
}, 200); |
|
}""" |
|
).then( |
|
|
|
fn=lambda repo_id, signal: handle_load_repository_with_vectorization(repo_id) if signal == "auto_load" and repo_id else ("", "", gr.update(value="", visible=False)), |
|
inputs=[repo_states["current_repo_id"], status_box_input], |
|
outputs=[repo_components["repo_status_display"], repo_states["repo_context_summary"], repo_components["visit_hf_link"]] |
|
).then( |
|
|
|
fn=lambda repo_status, repo_id, repo_context, signal: ( |
|
initialize_repo_chatbot(repo_status, repo_id, repo_context) |
|
if signal == "auto_load" and repo_id else [] |
|
), |
|
inputs=[repo_components["repo_status_display"], repo_states["current_repo_id"], repo_states["repo_context_summary"], status_box_input], |
|
outputs=[repo_components["repo_chatbot"]] |
|
) |
|
cancel_modal_btn.click( |
|
fn=handle_cancel_modal, |
|
outputs=[repo_action_modal] |
|
) |
|
|
|
|
|
reset_all_btn.click( |
|
fn=handle_reset_everything, |
|
outputs=[repo_ids_state, current_repo_idx_state, user_requirements_state, df_output, top_repos_df, top_repos_section, chatbot, status_box_input, current_requirements_display, extracted_keywords_output] |
|
) |
|
|
|
|
|
help_btn.click( |
|
fn=lambda: gr.update(visible=True), |
|
outputs=[help_modal] |
|
) |
|
|
|
close_help_btn.click( |
|
fn=lambda: gr.update(visible=False), |
|
outputs=[help_modal] |
|
) |
|
|
|
return app |
|
|
|
if __name__ == "__main__": |
|
app = create_ui() |
|
app.launch(debug=False) |
|
|