File size: 7,740 Bytes
1efd29f 6245b3b b138e3b 3c63f39 f367387 3c63f39 f367387 3c63f39 885c1f9 f367387 3c63f39 f367387 3c63f39 10a33ac f367387 3c63f39 f367387 3c63f39 f367387 3c63f39 f367387 3c63f39 f367387 3c63f39 b138e3b 5b7f342 3c63f39 f367387 3c63f39 f367387 3c63f39 d27a85c 3c63f39 f367387 3c63f39 5b7f342 3c63f39 5b7f342 f367387 3c63f39 f367387 3c63f39 1a943f1 3c63f39 1a943f1 3c63f39 885c1f9 f367387 3c63f39 f367387 3c63f39 f367387 3c63f39 2001b9b 3c63f39 2001b9b 3c63f39 2001b9b 3c63f39 2001b9b 3c63f39 2001b9b 3c63f39 2001b9b 3c63f39 2001b9b 3c63f39 9169bdf 3c63f39 f367387 3c63f39 f367387 3c63f39 f367387 3c63f39 f367387 3c63f39 bc78434 f367387 3c63f39 f367387 3c63f39 d27a85c 3c63f39 f367387 3fa421f f367387 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 |
import gradio as gr
import regex as re
import csv
import pandas as pd
from typing import List, Dict, Tuple, Optional
import logging
from datetime import datetime
import os
from huggingface_hub import HfApi, SpaceCard
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Constants
CSV_FILE = "repo_ids.csv"
CHATBOT_SYSTEM_PROMPT = """You are a helpful AI assistant that analyzes Hugging Face repositories.
Your task is to help users understand repositories, extract key information, and provide insights.
Be concise, clear, and focus on the most important aspects of each repository."""
class AppState:
"""Simple state management for the application."""
def __init__(self):
self.repo_ids: List[str] = []
self.current_repo_idx: int = 0
self.chat_history: List[Dict[str, str]] = []
def read_csv_as_text(filename: str) -> pd.DataFrame:
"""Read CSV file and return as DataFrame."""
try:
return pd.read_csv(filename)
except Exception as e:
logger.error(f"Error reading CSV: {e}")
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
def write_repos_to_csv(repo_ids: List[str]) -> None:
"""Write repository IDs to CSV file."""
try:
with open(CSV_FILE, 'w', newline='') as f:
writer = csv.writer(f)
writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
for repo_id in repo_ids:
writer.writerow([repo_id, "", "", "", ""])
except Exception as e:
logger.error(f"Error writing to CSV: {e}")
def search_top_spaces(keyword: str, limit: int = 5) -> List[str]:
"""Search for repositories by keyword."""
try:
api = HfApi()
spaces = api.list_spaces(search=keyword, limit=limit)
return [space.id for space in spaces]
except Exception as e:
logger.error(f"Error searching spaces: {e}")
return []
def analyze_repo(repo_id: str) -> Tuple[str, str]:
"""Analyze a single repository."""
try:
api = HfApi()
space = api.get_space(repo_id)
card = SpaceCard.load(repo_id)
content = f"""
Repository: {repo_id}
Title: {card.title}
Description: {card.description}
Tags: {', '.join(card.tags)}
"""
summary = f"Analysis of {repo_id}:\n"
summary += f"- Title: {card.title}\n"
summary += f"- Main focus: {card.description[:200]}...\n"
summary += f"- Key tags: {', '.join(card.tags[:5])}\n"
return content, summary
except Exception as e:
logger.error(f"Error analyzing repo {repo_id}: {e}")
return f"Error analyzing {repo_id}", f"Error: {str(e)}"
def chat_with_user(message: str, history: List[Dict[str, str]], system_prompt: str) -> str:
"""Simple chat response."""
try:
# For now, return a simple response
return f"I understand you're asking about: {message}. How can I help you analyze this repository?"
except Exception as e:
logger.error(f"Error in chat: {e}")
return "I apologize, but I encountered an error. Please try again."
def create_ui() -> gr.Blocks:
"""Create a simplified Gradio interface."""
state = gr.State(AppState())
with gr.Blocks(title="Hugging Face Repo Analyzer", theme=gr.themes.Soft()) as app:
gr.Markdown("# Hugging Face Repository Analyzer")
with gr.Row():
with gr.Column():
# Input Section
gr.Markdown("### Enter Repository Information")
repo_input = gr.Textbox(
label="Enter repo IDs (comma or newline separated) or keywords to search",
lines=5,
placeholder="Enter repository IDs or keywords to search"
)
submit_btn = gr.Button("Submit", variant="primary")
status = gr.Textbox(label="Status", visible=True)
# Results Section
df_output = gr.Dataframe(
headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating"],
datatype=["str", "str", "str", "str", "str"]
)
# Analysis Section
content_output = gr.Textbox(label="Repository Content", lines=10)
summary_output = gr.Textbox(label="Analysis Summary", lines=5)
# Chat Section
chatbot = gr.Chatbot(label="Chat with Assistant", height=400)
msg = gr.Textbox(label="Message", placeholder="Ask about the repository...")
with gr.Row():
send_btn = gr.Button("Send", variant="primary")
clear_btn = gr.Button("Clear Chat", variant="secondary")
def process_input(text: str, state: AppState) -> Tuple[pd.DataFrame, str, str, str]:
"""Process input and return results."""
try:
# Check if input is keywords or repo IDs
if any(kw in text.lower() for kw in ['search', 'find', 'look for']):
# Handle as keyword search
keywords = [k.strip() for k in re.split(r'[\n,]+', text) if k.strip()]
repo_ids = []
for kw in keywords:
repo_ids.extend(search_top_spaces(kw, limit=5))
else:
# Handle as repo IDs
repo_ids = [rid.strip() for rid in re.split(r'[\n,]+', text) if rid.strip()]
# Remove duplicates
repo_ids = list(dict.fromkeys(repo_ids))
if not repo_ids:
return pd.DataFrame(), "No repositories found", "", ""
# Update state and CSV
state.repo_ids = repo_ids
state.current_repo_idx = 0
write_repos_to_csv(repo_ids)
# Get first repo analysis
content, summary = analyze_repo(repo_ids[0])
return read_csv_as_text(CSV_FILE), f"Found {len(repo_ids)} repositories", content, summary
except Exception as e:
logger.error(f"Error processing input: {e}")
return pd.DataFrame(), f"Error: {str(e)}", "", ""
def send_message(message: str, history: List[Dict[str, str]], state: AppState) -> Tuple[List[Dict[str, str]], str]:
"""Send message to chat."""
if not message:
return history, ""
history.append({"role": "user", "content": message})
response = chat_with_user(message, history, CHATBOT_SYSTEM_PROMPT)
history.append({"role": "assistant", "content": response})
return history, ""
def clear_chat() -> Tuple[List[Dict[str, str]], str]:
"""Clear chat history."""
return [], ""
# Event handlers
submit_btn.click(
fn=process_input,
inputs=[repo_input, state],
outputs=[df_output, status, content_output, summary_output]
)
send_btn.click(
fn=send_message,
inputs=[msg, chatbot, state],
outputs=[chatbot, msg]
)
clear_btn.click(
fn=clear_chat,
inputs=[],
outputs=[chatbot, msg]
)
return app
if __name__ == "__main__":
app = create_ui()
app.launch()
|