HF_RepoSense / app.py
naman1102's picture
Update app.py
2c3873c
raw
history blame
7.74 kB
import gradio as gr
import regex as re
import csv
import pandas as pd
from typing import List, Dict, Tuple, Optional
import logging
from datetime import datetime
import os
from huggingface_hub import HfApi, SpaceCard
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Constants
CSV_FILE = "repo_ids.csv"
CHATBOT_SYSTEM_PROMPT = """You are a helpful AI assistant that analyzes Hugging Face repositories.
Your task is to help users understand repositories, extract key information, and provide insights.
Be concise, clear, and focus on the most important aspects of each repository."""
class AppState:
"""Simple state management for the application."""
def __init__(self):
self.repo_ids: List[str] = []
self.current_repo_idx: int = 0
self.chat_history: List[Dict[str, str]] = []
def read_csv_as_text(filename: str) -> pd.DataFrame:
"""Read CSV file and return as DataFrame."""
try:
return pd.read_csv(filename)
except Exception as e:
logger.error(f"Error reading CSV: {e}")
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
def write_repos_to_csv(repo_ids: List[str]) -> None:
"""Write repository IDs to CSV file."""
try:
with open(CSV_FILE, 'w', newline='') as f:
writer = csv.writer(f)
writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
for repo_id in repo_ids:
writer.writerow([repo_id, "", "", "", ""])
except Exception as e:
logger.error(f"Error writing to CSV: {e}")
def search_top_spaces(keyword: str, limit: int = 5) -> List[str]:
"""Search for repositories by keyword."""
try:
api = HfApi()
spaces = api.list_spaces(search=keyword, limit=limit)
return [space.id for space in spaces]
except Exception as e:
logger.error(f"Error searching spaces: {e}")
return []
def analyze_repo(repo_id: str) -> Tuple[str, str]:
"""Analyze a single repository."""
try:
api = HfApi()
space = api.get_space(repo_id)
card = SpaceCard.load(repo_id)
content = f"""
Repository: {repo_id}
Title: {card.title}
Description: {card.description}
Tags: {', '.join(card.tags)}
"""
summary = f"Analysis of {repo_id}:\n"
summary += f"- Title: {card.title}\n"
summary += f"- Main focus: {card.description[:200]}...\n"
summary += f"- Key tags: {', '.join(card.tags[:5])}\n"
return content, summary
except Exception as e:
logger.error(f"Error analyzing repo {repo_id}: {e}")
return f"Error analyzing {repo_id}", f"Error: {str(e)}"
def chat_with_user(message: str, history: List[Dict[str, str]], system_prompt: str) -> str:
"""Simple chat response."""
try:
# For now, return a simple response
return f"I understand you're asking about: {message}. How can I help you analyze this repository?"
except Exception as e:
logger.error(f"Error in chat: {e}")
return "I apologize, but I encountered an error. Please try again."
def create_ui() -> gr.Blocks:
"""Create a simplified Gradio interface."""
state = gr.State(AppState())
with gr.Blocks(title="Hugging Face Repo Analyzer", theme=gr.themes.Soft()) as app:
gr.Markdown("# Hugging Face Repository Analyzer")
with gr.Row():
with gr.Column():
# Input Section
gr.Markdown("### Enter Repository Information")
repo_input = gr.Textbox(
label="Enter repo IDs (comma or newline separated) or keywords to search",
lines=5,
placeholder="Enter repository IDs or keywords to search"
)
submit_btn = gr.Button("Submit", variant="primary")
status = gr.Textbox(label="Status", visible=True)
# Results Section
df_output = gr.Dataframe(
headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating"],
datatype=["str", "str", "str", "str", "str"]
)
# Analysis Section
content_output = gr.Textbox(label="Repository Content", lines=10)
summary_output = gr.Textbox(label="Analysis Summary", lines=5)
# Chat Section
chatbot = gr.Chatbot(label="Chat with Assistant", height=400)
msg = gr.Textbox(label="Message", placeholder="Ask about the repository...")
with gr.Row():
send_btn = gr.Button("Send", variant="primary")
clear_btn = gr.Button("Clear Chat", variant="secondary")
def process_input(text: str, state: AppState) -> Tuple[pd.DataFrame, str, str, str]:
"""Process input and return results."""
try:
# Check if input is keywords or repo IDs
if any(kw in text.lower() for kw in ['search', 'find', 'look for']):
# Handle as keyword search
keywords = [k.strip() for k in re.split(r'[\n,]+', text) if k.strip()]
repo_ids = []
for kw in keywords:
repo_ids.extend(search_top_spaces(kw, limit=5))
else:
# Handle as repo IDs
repo_ids = [rid.strip() for rid in re.split(r'[\n,]+', text) if rid.strip()]
# Remove duplicates
repo_ids = list(dict.fromkeys(repo_ids))
if not repo_ids:
return pd.DataFrame(), "No repositories found", "", ""
# Update state and CSV
state.repo_ids = repo_ids
state.current_repo_idx = 0
write_repos_to_csv(repo_ids)
# Get first repo analysis
content, summary = analyze_repo(repo_ids[0])
return read_csv_as_text(CSV_FILE), f"Found {len(repo_ids)} repositories", content, summary
except Exception as e:
logger.error(f"Error processing input: {e}")
return pd.DataFrame(), f"Error: {str(e)}", "", ""
def send_message(message: str, history: List[Dict[str, str]], state: AppState) -> Tuple[List[Dict[str, str]], str]:
"""Send message to chat."""
if not message:
return history, ""
history.append({"role": "user", "content": message})
response = chat_with_user(message, history, CHATBOT_SYSTEM_PROMPT)
history.append({"role": "assistant", "content": response})
return history, ""
def clear_chat() -> Tuple[List[Dict[str, str]], str]:
"""Clear chat history."""
return [], ""
# Event handlers
submit_btn.click(
fn=process_input,
inputs=[repo_input, state],
outputs=[df_output, status, content_output, summary_output]
)
send_btn.click(
fn=send_message,
inputs=[msg, chatbot, state],
outputs=[chatbot, msg]
)
clear_btn.click(
fn=clear_chat,
inputs=[],
outputs=[chatbot, msg]
)
return app
if __name__ == "__main__":
app = create_ui()
app.launch()