File size: 7,740 Bytes
1efd29f
6245b3b
b138e3b
 
3c63f39
f367387
3c63f39
f367387
3c63f39
885c1f9
f367387
3c63f39
f367387
 
 
3c63f39
 
 
 
10a33ac
f367387
3c63f39
f367387
 
 
3c63f39
f367387
3c63f39
 
f367387
3c63f39
f367387
3c63f39
b138e3b
5b7f342
3c63f39
 
f367387
3c63f39
 
f367387
 
 
 
3c63f39
d27a85c
3c63f39
 
 
 
 
 
 
 
 
f367387
3c63f39
 
5b7f342
3c63f39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b7f342
f367387
3c63f39
f367387
3c63f39
 
1a943f1
3c63f39
 
1a943f1
3c63f39
 
885c1f9
f367387
3c63f39
f367387
 
 
 
 
3c63f39
 
 
 
 
 
 
 
 
 
 
f367387
3c63f39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2001b9b
3c63f39
 
 
 
 
 
 
 
 
 
2001b9b
3c63f39
 
2001b9b
3c63f39
 
2001b9b
3c63f39
 
2001b9b
3c63f39
 
 
 
2001b9b
3c63f39
2001b9b
 
3c63f39
 
 
 
 
 
 
 
 
9169bdf
3c63f39
f367387
3c63f39
 
 
f367387
3c63f39
 
 
 
 
f367387
 
 
3c63f39
f367387
3c63f39
bc78434
f367387
3c63f39
 
f367387
3c63f39
d27a85c
3c63f39
f367387
3fa421f
f367387
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import gradio as gr
import regex as re
import csv
import pandas as pd
from typing import List, Dict, Tuple, Optional
import logging
from datetime import datetime
import os
from huggingface_hub import HfApi, SpaceCard

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Constants
CSV_FILE = "repo_ids.csv"
CHATBOT_SYSTEM_PROMPT = """You are a helpful AI assistant that analyzes Hugging Face repositories. 
Your task is to help users understand repositories, extract key information, and provide insights.
Be concise, clear, and focus on the most important aspects of each repository."""

class AppState:
    """Simple state management for the application."""
    def __init__(self):
        self.repo_ids: List[str] = []
        self.current_repo_idx: int = 0
        self.chat_history: List[Dict[str, str]] = []

def read_csv_as_text(filename: str) -> pd.DataFrame:
    """Read CSV file and return as DataFrame."""
    try:
        return pd.read_csv(filename)
    except Exception as e:
        logger.error(f"Error reading CSV: {e}")
        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])

def write_repos_to_csv(repo_ids: List[str]) -> None:
    """Write repository IDs to CSV file."""
    try:
        with open(CSV_FILE, 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
            for repo_id in repo_ids:
                writer.writerow([repo_id, "", "", "", ""])
    except Exception as e:
        logger.error(f"Error writing to CSV: {e}")

def search_top_spaces(keyword: str, limit: int = 5) -> List[str]:
    """Search for repositories by keyword."""
    try:
        api = HfApi()
        spaces = api.list_spaces(search=keyword, limit=limit)
        return [space.id for space in spaces]
    except Exception as e:
        logger.error(f"Error searching spaces: {e}")
        return []

def analyze_repo(repo_id: str) -> Tuple[str, str]:
    """Analyze a single repository."""
    try:
        api = HfApi()
        space = api.get_space(repo_id)
        card = SpaceCard.load(repo_id)
        
        content = f"""
        Repository: {repo_id}
        Title: {card.title}
        Description: {card.description}
        Tags: {', '.join(card.tags)}
        """
        
        summary = f"Analysis of {repo_id}:\n"
        summary += f"- Title: {card.title}\n"
        summary += f"- Main focus: {card.description[:200]}...\n"
        summary += f"- Key tags: {', '.join(card.tags[:5])}\n"
        
        return content, summary
    except Exception as e:
        logger.error(f"Error analyzing repo {repo_id}: {e}")
        return f"Error analyzing {repo_id}", f"Error: {str(e)}"

def chat_with_user(message: str, history: List[Dict[str, str]], system_prompt: str) -> str:
    """Simple chat response."""
    try:
        # For now, return a simple response
        return f"I understand you're asking about: {message}. How can I help you analyze this repository?"
    except Exception as e:
        logger.error(f"Error in chat: {e}")
        return "I apologize, but I encountered an error. Please try again."

def create_ui() -> gr.Blocks:
    """Create a simplified Gradio interface."""
    state = gr.State(AppState())
    
    with gr.Blocks(title="Hugging Face Repo Analyzer", theme=gr.themes.Soft()) as app:
        gr.Markdown("# Hugging Face Repository Analyzer")
        
        with gr.Row():
            with gr.Column():
                # Input Section
                gr.Markdown("### Enter Repository Information")
                repo_input = gr.Textbox(
                    label="Enter repo IDs (comma or newline separated) or keywords to search",
                    lines=5,
                    placeholder="Enter repository IDs or keywords to search"
                )
                submit_btn = gr.Button("Submit", variant="primary")
                status = gr.Textbox(label="Status", visible=True)
                
                # Results Section
                df_output = gr.Dataframe(
                    headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating"],
                    datatype=["str", "str", "str", "str", "str"]
                )
                
                # Analysis Section
                content_output = gr.Textbox(label="Repository Content", lines=10)
                summary_output = gr.Textbox(label="Analysis Summary", lines=5)
                
                # Chat Section
                chatbot = gr.Chatbot(label="Chat with Assistant", height=400)
                msg = gr.Textbox(label="Message", placeholder="Ask about the repository...")
                with gr.Row():
                    send_btn = gr.Button("Send", variant="primary")
                    clear_btn = gr.Button("Clear Chat", variant="secondary")
        
        def process_input(text: str, state: AppState) -> Tuple[pd.DataFrame, str, str, str]:
            """Process input and return results."""
            try:
                # Check if input is keywords or repo IDs
                if any(kw in text.lower() for kw in ['search', 'find', 'look for']):
                    # Handle as keyword search
                    keywords = [k.strip() for k in re.split(r'[\n,]+', text) if k.strip()]
                    repo_ids = []
                    for kw in keywords:
                        repo_ids.extend(search_top_spaces(kw, limit=5))
                else:
                    # Handle as repo IDs
                    repo_ids = [rid.strip() for rid in re.split(r'[\n,]+', text) if rid.strip()]
                
                # Remove duplicates
                repo_ids = list(dict.fromkeys(repo_ids))
                
                if not repo_ids:
                    return pd.DataFrame(), "No repositories found", "", ""
                
                # Update state and CSV
                state.repo_ids = repo_ids
                state.current_repo_idx = 0
                write_repos_to_csv(repo_ids)
                
                # Get first repo analysis
                content, summary = analyze_repo(repo_ids[0])
                
                return read_csv_as_text(CSV_FILE), f"Found {len(repo_ids)} repositories", content, summary
                
            except Exception as e:
                logger.error(f"Error processing input: {e}")
                return pd.DataFrame(), f"Error: {str(e)}", "", ""
        
        def send_message(message: str, history: List[Dict[str, str]], state: AppState) -> Tuple[List[Dict[str, str]], str]:
            """Send message to chat."""
            if not message:
                return history, ""
            history.append({"role": "user", "content": message})
            response = chat_with_user(message, history, CHATBOT_SYSTEM_PROMPT)
            history.append({"role": "assistant", "content": response})
            return history, ""
        
        def clear_chat() -> Tuple[List[Dict[str, str]], str]:
            """Clear chat history."""
            return [], ""
        
        # Event handlers
        submit_btn.click(
            fn=process_input,
            inputs=[repo_input, state],
            outputs=[df_output, status, content_output, summary_output]
        )
        
        send_btn.click(
            fn=send_message,
            inputs=[msg, chatbot, state],
            outputs=[chatbot, msg]
        )
        
        clear_btn.click(
            fn=clear_chat,
            inputs=[],
            outputs=[chatbot, msg]
        )
    
    return app

if __name__ == "__main__":
    app = create_ui()
    app.launch()