import gradio as gr import regex as re import csv import pandas as pd from typing import List, Dict, Tuple, Optional import logging from datetime import datetime import os from huggingface_hub import HfApi, SpaceCard # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Constants CSV_FILE = "repo_ids.csv" CHATBOT_SYSTEM_PROMPT = """You are a helpful AI assistant that analyzes Hugging Face repositories. Your task is to help users understand repositories, extract key information, and provide insights. Be concise, clear, and focus on the most important aspects of each repository.""" class AppState: """Simple state management for the application.""" def __init__(self): self.repo_ids: List[str] = [] self.current_repo_idx: int = 0 self.chat_history: List[Dict[str, str]] = [] def read_csv_as_text(filename: str) -> pd.DataFrame: """Read CSV file and return as DataFrame.""" try: return pd.read_csv(filename) except Exception as e: logger.error(f"Error reading CSV: {e}") return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"]) def write_repos_to_csv(repo_ids: List[str]) -> None: """Write repository IDs to CSV file.""" try: with open(CSV_FILE, 'w', newline='') as f: writer = csv.writer(f) writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"]) for repo_id in repo_ids: writer.writerow([repo_id, "", "", "", ""]) except Exception as e: logger.error(f"Error writing to CSV: {e}") def search_top_spaces(keyword: str, limit: int = 5) -> List[str]: """Search for repositories by keyword.""" try: api = HfApi() spaces = api.list_spaces(search=keyword, limit=limit) return [space.id for space in spaces] except Exception as e: logger.error(f"Error searching spaces: {e}") return [] def analyze_repo(repo_id: str) -> Tuple[str, str]: """Analyze a single repository.""" try: api = HfApi() space = api.get_space(repo_id) card = SpaceCard.load(repo_id) content = f""" Repository: {repo_id} Title: {card.title} Description: {card.description} Tags: {', '.join(card.tags)} """ summary = f"Analysis of {repo_id}:\n" summary += f"- Title: {card.title}\n" summary += f"- Main focus: {card.description[:200]}...\n" summary += f"- Key tags: {', '.join(card.tags[:5])}\n" return content, summary except Exception as e: logger.error(f"Error analyzing repo {repo_id}: {e}") return f"Error analyzing {repo_id}", f"Error: {str(e)}" def chat_with_user(message: str, history: List[Dict[str, str]], system_prompt: str) -> str: """Simple chat response.""" try: # For now, return a simple response return f"I understand you're asking about: {message}. How can I help you analyze this repository?" except Exception as e: logger.error(f"Error in chat: {e}") return "I apologize, but I encountered an error. Please try again." def create_ui() -> gr.Blocks: """Create a simplified Gradio interface.""" state = gr.State(AppState()) with gr.Blocks(title="Hugging Face Repo Analyzer", theme=gr.themes.Soft()) as app: gr.Markdown("# Hugging Face Repository Analyzer") with gr.Row(): with gr.Column(): # Input Section gr.Markdown("### Enter Repository Information") repo_input = gr.Textbox( label="Enter repo IDs (comma or newline separated) or keywords to search", lines=5, placeholder="Enter repository IDs or keywords to search" ) submit_btn = gr.Button("Submit", variant="primary") status = gr.Textbox(label="Status", visible=True) # Results Section df_output = gr.Dataframe( headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating"], datatype=["str", "str", "str", "str", "str"] ) # Analysis Section content_output = gr.Textbox(label="Repository Content", lines=10) summary_output = gr.Textbox(label="Analysis Summary", lines=5) # Chat Section chatbot = gr.Chatbot(label="Chat with Assistant", height=400) msg = gr.Textbox(label="Message", placeholder="Ask about the repository...") with gr.Row(): send_btn = gr.Button("Send", variant="primary") clear_btn = gr.Button("Clear Chat", variant="secondary") def process_input(text: str, state: AppState) -> Tuple[pd.DataFrame, str, str, str]: """Process input and return results.""" try: # Check if input is keywords or repo IDs if any(kw in text.lower() for kw in ['search', 'find', 'look for']): # Handle as keyword search keywords = [k.strip() for k in re.split(r'[\n,]+', text) if k.strip()] repo_ids = [] for kw in keywords: repo_ids.extend(search_top_spaces(kw, limit=5)) else: # Handle as repo IDs repo_ids = [rid.strip() for rid in re.split(r'[\n,]+', text) if rid.strip()] # Remove duplicates repo_ids = list(dict.fromkeys(repo_ids)) if not repo_ids: return pd.DataFrame(), "No repositories found", "", "" # Update state and CSV state.repo_ids = repo_ids state.current_repo_idx = 0 write_repos_to_csv(repo_ids) # Get first repo analysis content, summary = analyze_repo(repo_ids[0]) return read_csv_as_text(CSV_FILE), f"Found {len(repo_ids)} repositories", content, summary except Exception as e: logger.error(f"Error processing input: {e}") return pd.DataFrame(), f"Error: {str(e)}", "", "" def send_message(message: str, history: List[Dict[str, str]], state: AppState) -> Tuple[List[Dict[str, str]], str]: """Send message to chat.""" if not message: return history, "" history.append({"role": "user", "content": message}) response = chat_with_user(message, history, CHATBOT_SYSTEM_PROMPT) history.append({"role": "assistant", "content": response}) return history, "" def clear_chat() -> Tuple[List[Dict[str, str]], str]: """Clear chat history.""" return [], "" # Event handlers submit_btn.click( fn=process_input, inputs=[repo_input, state], outputs=[df_output, status, content_output, summary_output] ) send_btn.click( fn=send_message, inputs=[msg, chatbot, state], outputs=[chatbot, msg] ) clear_btn.click( fn=clear_chat, inputs=[], outputs=[chatbot, msg] ) return app if __name__ == "__main__": app = create_ui() app.launch()