Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,607 +2,198 @@ import gradio as gr
|
|
2 |
import regex as re
|
3 |
import csv
|
4 |
import pandas as pd
|
5 |
-
from typing import
|
6 |
import logging
|
7 |
-
from
|
8 |
import os
|
9 |
-
|
10 |
-
from
|
11 |
-
|
12 |
-
|
13 |
-
parse_llm_json_response,
|
14 |
-
analyze_code
|
15 |
-
)
|
16 |
-
from hf_utils import download_space_repo, search_top_spaces
|
17 |
-
from chatbot_page import chat_with_user, extract_keywords_from_conversation
|
18 |
|
19 |
# Configure logging
|
20 |
-
logging.basicConfig(
|
21 |
-
level=logging.INFO,
|
22 |
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
23 |
-
)
|
24 |
logger = logging.getLogger(__name__)
|
25 |
|
26 |
# Constants
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
"Return only the keywords as a comma-separated list."
|
32 |
-
)
|
33 |
-
|
34 |
-
CHATBOT_INITIAL_MESSAGE = (
|
35 |
-
"Hello! Please tell me about your ideal Hugging Face repo. "
|
36 |
-
"What use case, preferred language, or features are you looking for?"
|
37 |
-
)
|
38 |
|
39 |
-
# State management
|
40 |
class AppState:
|
|
|
41 |
def __init__(self):
|
42 |
self.repo_ids: List[str] = []
|
43 |
self.current_repo_idx: int = 0
|
44 |
-
self.
|
45 |
-
self.analysis_results: Dict[str, Dict[str, Any]] = {}
|
46 |
-
self.chat_history: List[Tuple[str, str]] = []
|
47 |
-
|
48 |
-
def reset(self):
|
49 |
-
self.__init__()
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
"""Read CSV file and return as DataFrame with string dtype."""
|
54 |
try:
|
55 |
-
return pd.read_csv(
|
56 |
except Exception as e:
|
57 |
-
logger.error(f"Error reading CSV
|
58 |
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
59 |
|
60 |
-
def write_repos_to_csv(repo_ids: List[str]
|
61 |
-
"""Write
|
62 |
try:
|
63 |
-
with open(
|
64 |
-
writer = csv.writer(
|
65 |
writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
66 |
for repo_id in repo_ids:
|
67 |
writer.writerow([repo_id, "", "", "", ""])
|
68 |
except Exception as e:
|
69 |
-
logger.error(f"Error writing to CSV
|
70 |
|
71 |
-
def
|
72 |
-
"""
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
return
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
state.current_repo_idx = 0
|
81 |
-
|
82 |
-
write_repos_to_csv(repo_ids)
|
83 |
-
return read_csv_as_text("repo_ids.csv")
|
84 |
|
85 |
-
def
|
86 |
-
"""Analyze a single repository
|
87 |
try:
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
else:
|
106 |
-
summary = f"JSON extraction: FAILED\nRaw: {llm_json.get('raw', '') if isinstance(llm_json, dict) else llm_json}"
|
107 |
-
|
108 |
-
return combined_content, summary, llm_json
|
109 |
-
|
110 |
except Exception as e:
|
111 |
logger.error(f"Error analyzing repo {repo_id}: {e}")
|
112 |
-
return
|
113 |
|
114 |
-
def
|
115 |
-
"""
|
116 |
try:
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
for idx, row in df.iterrows():
|
121 |
-
if row["repo id"] == repo_id:
|
122 |
-
if isinstance(analysis_results, dict) and "error" not in analysis_results:
|
123 |
-
df.at[idx, "strength"] = analysis_results.get("strength", "")
|
124 |
-
df.at[idx, "weaknesses"] = analysis_results.get("weaknesses", "")
|
125 |
-
df.at[idx, "speciality"] = analysis_results.get("speciality", "")
|
126 |
-
df.at[idx, "relevance rating"] = analysis_results.get("relevance rating", "")
|
127 |
-
updated = True
|
128 |
-
break
|
129 |
-
|
130 |
-
if not updated and isinstance(analysis_results, dict) and "error" not in analysis_results:
|
131 |
-
new_row = {
|
132 |
-
"repo id": repo_id,
|
133 |
-
"strength": analysis_results.get("strength", ""),
|
134 |
-
"weaknesses": analysis_results.get("weaknesses", ""),
|
135 |
-
"speciality": analysis_results.get("speciality", ""),
|
136 |
-
"relevance rating": analysis_results.get("relevance rating", "")
|
137 |
-
}
|
138 |
-
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
|
139 |
-
|
140 |
-
df.to_csv(csv_filename, index=False)
|
141 |
-
return df
|
142 |
-
|
143 |
except Exception as e:
|
144 |
-
logger.error(f"Error
|
145 |
-
return
|
146 |
-
|
147 |
-
def show_combined_repo_and_llm(state: AppState) -> Tuple[str, str, pd.DataFrame]:
|
148 |
-
"""Show combined repo content and LLM analysis for current repo."""
|
149 |
-
if not state.repo_ids:
|
150 |
-
return "No repo ID available. Please submit repo IDs first.", "", pd.DataFrame()
|
151 |
-
|
152 |
-
if state.current_repo_idx >= len(state.repo_ids):
|
153 |
-
return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv")
|
154 |
-
|
155 |
-
repo_id = state.repo_ids[state.current_repo_idx]
|
156 |
-
combined_content, summary, analysis_results = analyze_single_repo(repo_id)
|
157 |
-
df = update_csv_with_analysis(repo_id, analysis_results)
|
158 |
-
|
159 |
-
state.current_repo_idx += 1
|
160 |
-
return combined_content, summary, df
|
161 |
|
162 |
-
def keyword_search_and_update(keyword: str, state: AppState) -> pd.DataFrame:
|
163 |
-
"""Search for repos using keywords and update state."""
|
164 |
-
if not keyword:
|
165 |
-
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
166 |
-
|
167 |
-
keyword_list = [k.strip() for k in re.split(r'[\n,]+', keyword) if k.strip()]
|
168 |
-
repo_ids = []
|
169 |
-
|
170 |
-
for kw in keyword_list:
|
171 |
-
repo_ids.extend(search_top_spaces(kw, limit=5))
|
172 |
-
|
173 |
-
# Remove duplicates while preserving order
|
174 |
-
seen = set()
|
175 |
-
unique_repo_ids = []
|
176 |
-
for rid in repo_ids:
|
177 |
-
if rid not in seen:
|
178 |
-
unique_repo_ids.append(rid)
|
179 |
-
seen.add(rid)
|
180 |
-
|
181 |
-
state.repo_ids = unique_repo_ids
|
182 |
-
state.current_repo_idx = 0
|
183 |
-
|
184 |
-
write_repos_to_csv(unique_repo_ids)
|
185 |
-
return read_csv_as_text("repo_ids.csv")
|
186 |
-
|
187 |
-
# UI Components
|
188 |
def create_ui() -> gr.Blocks:
|
189 |
-
"""Create
|
190 |
state = gr.State(AppState())
|
191 |
|
192 |
with gr.Blocks(title="Hugging Face Repo Analyzer", theme=gr.themes.Soft()) as app:
|
193 |
gr.Markdown("# Hugging Face Repository Analyzer")
|
194 |
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
- Chat with an AI assistant to find the perfect repository
|
207 |
-
- Get detailed analysis of repositories
|
208 |
-
|
209 |
-
Click 'Start Analysis' to begin!
|
210 |
-
""")
|
211 |
-
with gr.Row():
|
212 |
-
start_btn = gr.Button("Start Analysis", variant="primary")
|
213 |
-
help_btn = gr.Button("View Help Guide", variant="secondary")
|
214 |
-
|
215 |
-
# Help Guide
|
216 |
-
with gr.Group(visible=False) as help_page:
|
217 |
-
gr.Markdown("""
|
218 |
-
# Help Guide
|
219 |
-
|
220 |
-
## Quick Start
|
221 |
-
1. Enter repository IDs or search by keywords
|
222 |
-
2. Start the analysis
|
223 |
-
3. Review the results
|
224 |
-
|
225 |
-
## Features
|
226 |
-
- **Repository Analysis**: Get detailed insights about repositories
|
227 |
-
- **Keyword Search**: Find repositories matching your criteria
|
228 |
-
- **AI Assistant**: Chat to find the perfect repository
|
229 |
-
- **Comparison**: Compare repositories side by side
|
230 |
-
|
231 |
-
## Keyboard Shortcuts
|
232 |
-
- `Ctrl + Enter`: Send message in chat
|
233 |
-
- `Ctrl + S`: Start new analysis
|
234 |
-
- `Ctrl + H`: Toggle help guide
|
235 |
-
""")
|
236 |
-
back_btn = gr.Button("Back to Start", variant="primary")
|
237 |
-
|
238 |
-
# Input Page
|
239 |
-
with gr.Group(visible=False) as input_page:
|
240 |
-
with gr.Row():
|
241 |
-
with gr.Column():
|
242 |
-
gr.Markdown("### Enter Repository IDs")
|
243 |
-
repo_id_input = gr.Textbox(
|
244 |
-
label="Enter repo IDs (comma or newline separated)",
|
245 |
-
lines=5,
|
246 |
-
placeholder="repo1, repo2\nrepo3"
|
247 |
-
)
|
248 |
-
submit_btn = gr.Button("Submit Repo IDs", variant="primary")
|
249 |
-
submit_status = gr.Textbox(label="Status", visible=False)
|
250 |
-
|
251 |
-
with gr.Column():
|
252 |
-
gr.Markdown("### Or Search by Keywords")
|
253 |
-
keyword_input = gr.Textbox(
|
254 |
-
label="Enter keywords to search",
|
255 |
-
lines=3,
|
256 |
-
placeholder="Enter keywords separated by commas"
|
257 |
-
)
|
258 |
-
search_btn = gr.Button("Search by Keywords", variant="primary")
|
259 |
-
search_status = gr.Textbox(label="Status", visible=False)
|
260 |
-
|
261 |
-
df_output = gr.Dataframe(
|
262 |
-
headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating"],
|
263 |
-
datatype=["str", "str", "str", "str", "str"]
|
264 |
-
)
|
265 |
-
with gr.Row():
|
266 |
-
analyze_btn = gr.Button("Start Analysis", variant="primary")
|
267 |
-
analyze_status = gr.Textbox(label="Status", visible=False)
|
268 |
-
compare_btn = gr.Button("Compare Repositories", variant="secondary")
|
269 |
-
|
270 |
-
# Analysis Page
|
271 |
-
with gr.Group(visible=False) as analysis_page:
|
272 |
-
gr.Markdown("### Repository Analysis")
|
273 |
-
progress = gr.Slider(
|
274 |
-
minimum=0,
|
275 |
-
maximum=100,
|
276 |
-
value=0,
|
277 |
-
label="Analysis Progress",
|
278 |
-
interactive=False
|
279 |
-
)
|
280 |
-
with gr.Row():
|
281 |
-
with gr.Column():
|
282 |
-
content_output = gr.Textbox(label="Repository Content", lines=10)
|
283 |
-
with gr.Column():
|
284 |
-
summary_output = gr.Textbox(label="Analysis Summary", lines=10)
|
285 |
-
with gr.Row():
|
286 |
-
next_btn = gr.Button("Analyze Next Repository", variant="primary")
|
287 |
-
next_status = gr.Textbox(label="Status", visible=False)
|
288 |
-
finish_btn = gr.Button("Finish Analysis", variant="secondary")
|
289 |
-
export_btn = gr.Button("Export Results", variant="secondary")
|
290 |
-
export_status = gr.Textbox(label="Status", visible=False)
|
291 |
-
|
292 |
-
# Comparison Page
|
293 |
-
with gr.Group(visible=False) as comparison_page:
|
294 |
-
gr.Markdown("### Repository Comparison")
|
295 |
-
with gr.Row():
|
296 |
-
with gr.Column():
|
297 |
-
repo1_select = gr.Dropdown(
|
298 |
-
label="Select First Repository",
|
299 |
-
choices=[],
|
300 |
-
interactive=True
|
301 |
-
)
|
302 |
-
repo1_content = gr.Textbox(label="Repository 1 Content", lines=10)
|
303 |
-
repo1_summary = gr.Textbox(label="Repository 1 Summary", lines=10)
|
304 |
-
with gr.Column():
|
305 |
-
repo2_select = gr.Dropdown(
|
306 |
-
label="Select Second Repository",
|
307 |
-
choices=[],
|
308 |
-
interactive=True
|
309 |
-
)
|
310 |
-
repo2_content = gr.Textbox(label="Repository 2 Content", lines=10)
|
311 |
-
repo2_summary = gr.Textbox(label="Repository 2 Summary", lines=10)
|
312 |
-
compare_btn = gr.Button("Compare", variant="primary")
|
313 |
-
back_to_analysis_btn = gr.Button("Back to Analysis", variant="secondary")
|
314 |
-
|
315 |
-
# Chatbot Page
|
316 |
-
with gr.Group(visible=False) as chatbot_page:
|
317 |
-
gr.Markdown("### Chat with Assistant")
|
318 |
-
gr.Markdown("""
|
319 |
-
Tell me about your ideal repository. I'll help you find the perfect match!
|
320 |
-
What are you looking for? Consider:
|
321 |
-
- Your use case
|
322 |
-
- Preferred programming language
|
323 |
-
- Required features
|
324 |
-
- Any specific requirements
|
325 |
-
""")
|
326 |
-
chatbot = gr.Chatbot(
|
327 |
-
label="Chat with Assistant",
|
328 |
-
height=400,
|
329 |
-
type="messages"
|
330 |
-
)
|
331 |
-
msg = gr.Textbox(
|
332 |
-
label="Message",
|
333 |
-
placeholder="Type your message here...",
|
334 |
-
lines=2
|
335 |
-
)
|
336 |
-
with gr.Row():
|
337 |
-
send_btn = gr.Button("Send", variant="primary")
|
338 |
-
send_status = gr.Textbox(label="Status", visible=False)
|
339 |
-
end_chat_btn = gr.Button("End Chat", variant="secondary")
|
340 |
-
end_chat_status = gr.Textbox(label="Status", visible=False)
|
341 |
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
gr.Markdown("### Analysis History")
|
362 |
-
history_df = gr.Dataframe(
|
363 |
-
headers=["Date", "Repositories", "Keywords", "Results"],
|
364 |
-
datatype=["str", "str", "str", "str"]
|
365 |
-
)
|
366 |
-
back_to_results_btn = gr.Button("Back to Results", variant="primary")
|
367 |
-
|
368 |
-
# Navigation functions
|
369 |
-
def navigate_to(page: str) -> List[gr.update]:
|
370 |
-
"""Navigate to a specific page."""
|
371 |
-
updates = []
|
372 |
-
for p in ["start", "input", "analysis", "chatbot", "results", "help", "comparison", "history"]:
|
373 |
-
updates.append(gr.update(visible=(p == page)))
|
374 |
-
return updates
|
375 |
-
|
376 |
-
# Event handlers
|
377 |
-
start_btn.click(
|
378 |
-
fn=lambda: navigate_to("input"),
|
379 |
-
inputs=[],
|
380 |
-
outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
|
381 |
-
)
|
382 |
-
|
383 |
-
help_btn.click(
|
384 |
-
fn=lambda: navigate_to("help"),
|
385 |
-
inputs=[],
|
386 |
-
outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
|
387 |
-
)
|
388 |
-
|
389 |
-
back_btn.click(
|
390 |
-
fn=lambda: navigate_to("start"),
|
391 |
-
inputs=[],
|
392 |
-
outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
|
393 |
-
)
|
394 |
-
|
395 |
-
# Modified event handlers with status updates
|
396 |
-
def process_repo_input_with_status(text: str, state: AppState) -> Tuple[pd.DataFrame, str]:
|
397 |
-
"""Process repo input with status update."""
|
398 |
-
df = process_repo_input(text, state)
|
399 |
-
return df, ""
|
400 |
-
|
401 |
-
def keyword_search_with_status(keyword: str, state: AppState) -> Tuple[pd.DataFrame, str]:
|
402 |
-
"""Search keywords with status update."""
|
403 |
try:
|
404 |
-
if
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
|
|
|
|
|
|
|
|
|
|
409 |
|
410 |
-
|
411 |
-
|
412 |
-
results = search_top_spaces(kw, limit=5)
|
413 |
-
repo_ids.extend(results)
|
414 |
-
except Exception as e:
|
415 |
-
logger.error(f"Error searching for keyword {kw}: {e}")
|
416 |
-
continue
|
417 |
|
418 |
-
|
419 |
-
|
420 |
-
unique_repo_ids = []
|
421 |
-
for rid in repo_ids:
|
422 |
-
if rid not in seen:
|
423 |
-
unique_repo_ids.append(rid)
|
424 |
-
seen.add(rid)
|
425 |
|
426 |
-
state
|
|
|
427 |
state.current_repo_idx = 0
|
|
|
|
|
|
|
|
|
428 |
|
429 |
-
|
430 |
-
df = read_csv_as_text("repo_ids.csv")
|
431 |
-
return df, ""
|
432 |
|
433 |
except Exception as e:
|
434 |
-
logger.error(f"Error
|
435 |
-
return pd.DataFrame(
|
436 |
-
|
437 |
-
def
|
438 |
-
"""
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
"""Send message with status update."""
|
444 |
-
if not user_message:
|
445 |
-
return history, "", ""
|
446 |
-
history.append({"role": "user", "content": user_message})
|
447 |
-
response = chat_with_user(user_message, history, CHATBOT_SYSTEM_PROMPT)
|
448 |
history.append({"role": "assistant", "content": response})
|
449 |
-
return history, ""
|
450 |
-
|
451 |
-
def end_chat_with_status(history: List[Dict[str, str]], state: AppState) -> Tuple[List[str], gr.update, str]:
|
452 |
-
"""End chat and extract keywords."""
|
453 |
-
if not history:
|
454 |
-
return [], gr.update(visible=True), ""
|
455 |
-
keywords = extract_keywords_from_conversation(history)
|
456 |
-
state.generated_keywords = keywords
|
457 |
-
return keywords, gr.update(visible=True), ""
|
458 |
-
|
459 |
-
def export_with_status(df: pd.DataFrame) -> Tuple[str, str]:
|
460 |
-
"""Export with status update."""
|
461 |
-
result = export_results(df)
|
462 |
-
return result, ""
|
463 |
-
|
464 |
-
# Update event handlers with status updates
|
465 |
-
submit_btn.click(
|
466 |
-
fn=lambda: "Processing...",
|
467 |
-
inputs=[],
|
468 |
-
outputs=[submit_status]
|
469 |
-
).then(
|
470 |
-
fn=process_repo_input_with_status,
|
471 |
-
inputs=[repo_id_input, state],
|
472 |
-
outputs=[df_output, submit_status]
|
473 |
-
)
|
474 |
|
475 |
-
def
|
476 |
-
"""
|
477 |
-
return "
|
478 |
-
|
479 |
-
def search_complete(keyword: str, state: AppState):
|
480 |
-
"""Complete search operation."""
|
481 |
-
return keyword_search_with_status(keyword, state)
|
482 |
-
|
483 |
-
search_btn.click(
|
484 |
-
fn=search_click,
|
485 |
-
inputs=[],
|
486 |
-
outputs=[search_status]
|
487 |
-
).then(
|
488 |
-
fn=search_complete,
|
489 |
-
inputs=[keyword_input, state],
|
490 |
-
outputs=[df_output, search_status]
|
491 |
-
)
|
492 |
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
fn=analyze_with_status,
|
499 |
-
inputs=[state],
|
500 |
-
outputs=[content_output, summary_output, df_output, next_status]
|
501 |
)
|
502 |
|
503 |
send_btn.click(
|
504 |
-
fn=
|
505 |
-
inputs=[],
|
506 |
-
outputs=[send_status]
|
507 |
-
).then(
|
508 |
-
fn=send_message_with_status,
|
509 |
inputs=[msg, chatbot, state],
|
510 |
-
outputs=[chatbot, msg
|
511 |
-
)
|
512 |
-
|
513 |
-
end_chat_btn.click(
|
514 |
-
fn=lambda: "Processing...",
|
515 |
-
inputs=[],
|
516 |
-
outputs=[end_chat_status]
|
517 |
-
).then(
|
518 |
-
fn=end_chat_with_status,
|
519 |
-
inputs=[chatbot, state],
|
520 |
-
outputs=[gr.Textbox(label="Extracted Keywords"), results_page, end_chat_status]
|
521 |
-
)
|
522 |
-
|
523 |
-
export_btn.click(
|
524 |
-
fn=lambda: "Exporting...",
|
525 |
-
inputs=[],
|
526 |
-
outputs=[export_status]
|
527 |
-
).then(
|
528 |
-
fn=export_with_status,
|
529 |
-
inputs=[results_df],
|
530 |
-
outputs=[gr.Textbox(label="Export Status"), export_status]
|
531 |
-
)
|
532 |
-
|
533 |
-
restart_btn.click(
|
534 |
-
fn=lambda: (state.reset(), navigate_to("start")),
|
535 |
-
inputs=[state],
|
536 |
-
outputs=[start_page, input_page, analysis_page, chatbot_page, results_page]
|
537 |
-
)
|
538 |
-
|
539 |
-
def update_progress(current: int, total: int) -> float:
|
540 |
-
"""Update progress bar."""
|
541 |
-
return (current / total) * 100
|
542 |
-
|
543 |
-
def export_results(df: pd.DataFrame) -> str:
|
544 |
-
"""Export results to CSV."""
|
545 |
-
try:
|
546 |
-
filename = f"analysis_results_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv"
|
547 |
-
df.to_csv(filename, index=False)
|
548 |
-
return f"Results exported to {filename}"
|
549 |
-
except Exception as e:
|
550 |
-
return f"Error exporting results: {e}"
|
551 |
-
|
552 |
-
def load_history() -> pd.DataFrame:
|
553 |
-
"""Load analysis history."""
|
554 |
-
try:
|
555 |
-
return pd.read_csv("analysis_history.csv")
|
556 |
-
except:
|
557 |
-
return pd.DataFrame(columns=["Date", "Repositories", "Keywords", "Results"])
|
558 |
-
|
559 |
-
def save_to_history(repos: List[str], keywords: List[str], results: pd.DataFrame) -> None:
|
560 |
-
"""Save current analysis to history."""
|
561 |
-
try:
|
562 |
-
history_df = load_history()
|
563 |
-
new_row = {
|
564 |
-
"Date": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
|
565 |
-
"Repositories": ", ".join(repos),
|
566 |
-
"Keywords": ", ".join(keywords),
|
567 |
-
"Results": results.to_json()
|
568 |
-
}
|
569 |
-
history_df = pd.concat([history_df, pd.DataFrame([new_row])], ignore_index=True)
|
570 |
-
history_df.to_csv("analysis_history.csv", index=False)
|
571 |
-
except Exception as e:
|
572 |
-
logger.error(f"Error saving to history: {e}")
|
573 |
-
|
574 |
-
# Add new event handlers for new features
|
575 |
-
history_btn.click(
|
576 |
-
fn=lambda: (load_history(), navigate_to("history")),
|
577 |
-
inputs=[],
|
578 |
-
outputs=[history_df, start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
|
579 |
)
|
580 |
|
581 |
-
|
582 |
-
fn=
|
583 |
inputs=[],
|
584 |
-
outputs=[
|
585 |
-
)
|
586 |
-
|
587 |
-
compare_btn.click(
|
588 |
-
fn=lambda: (update_repo_choices(state), navigate_to("comparison")),
|
589 |
-
inputs=[state],
|
590 |
-
outputs=[repo1_select, repo2_select, start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
|
591 |
)
|
592 |
-
|
593 |
-
back_to_analysis_btn.click(
|
594 |
-
fn=lambda: navigate_to("analysis"),
|
595 |
-
inputs=[],
|
596 |
-
outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
|
597 |
-
)
|
598 |
-
|
599 |
return app
|
600 |
|
601 |
-
def update_repo_choices(state: AppState) -> Tuple[List[str], List[str]]:
|
602 |
-
"""Update repository choices for comparison."""
|
603 |
-
choices = state.repo_ids
|
604 |
-
return choices, choices
|
605 |
-
|
606 |
if __name__ == "__main__":
|
607 |
app = create_ui()
|
608 |
app.launch()
|
|
|
2 |
import regex as re
|
3 |
import csv
|
4 |
import pandas as pd
|
5 |
+
from typing import List, Dict, Tuple, Optional
|
6 |
import logging
|
7 |
+
from datetime import datetime
|
8 |
import os
|
9 |
+
from huggingface_hub import HfApi, SpaceCard
|
10 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
11 |
+
import torch
|
12 |
+
import json
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
# Configure logging
|
15 |
+
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
|
16 |
logger = logging.getLogger(__name__)
|
17 |
|
18 |
# Constants
|
19 |
+
CSV_FILE = "repo_ids.csv"
|
20 |
+
CHATBOT_SYSTEM_PROMPT = """You are a helpful AI assistant that analyzes Hugging Face repositories.
|
21 |
+
Your task is to help users understand repositories, extract key information, and provide insights.
|
22 |
+
Be concise, clear, and focus on the most important aspects of each repository."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
|
|
24 |
class AppState:
|
25 |
+
"""Simple state management for the application."""
|
26 |
def __init__(self):
|
27 |
self.repo_ids: List[str] = []
|
28 |
self.current_repo_idx: int = 0
|
29 |
+
self.chat_history: List[Dict[str, str]] = []
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
+
def read_csv_as_text(filename: str) -> pd.DataFrame:
|
32 |
+
"""Read CSV file and return as DataFrame."""
|
|
|
33 |
try:
|
34 |
+
return pd.read_csv(filename)
|
35 |
except Exception as e:
|
36 |
+
logger.error(f"Error reading CSV: {e}")
|
37 |
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
38 |
|
39 |
+
def write_repos_to_csv(repo_ids: List[str]) -> None:
|
40 |
+
"""Write repository IDs to CSV file."""
|
41 |
try:
|
42 |
+
with open(CSV_FILE, 'w', newline='') as f:
|
43 |
+
writer = csv.writer(f)
|
44 |
writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
45 |
for repo_id in repo_ids:
|
46 |
writer.writerow([repo_id, "", "", "", ""])
|
47 |
except Exception as e:
|
48 |
+
logger.error(f"Error writing to CSV: {e}")
|
49 |
|
50 |
+
def search_top_spaces(keyword: str, limit: int = 5) -> List[str]:
|
51 |
+
"""Search for repositories by keyword."""
|
52 |
+
try:
|
53 |
+
api = HfApi()
|
54 |
+
spaces = api.list_spaces(search=keyword, limit=limit)
|
55 |
+
return [space.id for space in spaces]
|
56 |
+
except Exception as e:
|
57 |
+
logger.error(f"Error searching spaces: {e}")
|
58 |
+
return []
|
|
|
|
|
|
|
|
|
59 |
|
60 |
+
def analyze_repo(repo_id: str) -> Tuple[str, str]:
|
61 |
+
"""Analyze a single repository."""
|
62 |
try:
|
63 |
+
api = HfApi()
|
64 |
+
space = api.get_space(repo_id)
|
65 |
+
card = SpaceCard.load(repo_id)
|
66 |
+
|
67 |
+
content = f"""
|
68 |
+
Repository: {repo_id}
|
69 |
+
Title: {card.title}
|
70 |
+
Description: {card.description}
|
71 |
+
Tags: {', '.join(card.tags)}
|
72 |
+
"""
|
73 |
+
|
74 |
+
summary = f"Analysis of {repo_id}:\n"
|
75 |
+
summary += f"- Title: {card.title}\n"
|
76 |
+
summary += f"- Main focus: {card.description[:200]}...\n"
|
77 |
+
summary += f"- Key tags: {', '.join(card.tags[:5])}\n"
|
78 |
+
|
79 |
+
return content, summary
|
|
|
|
|
|
|
|
|
|
|
80 |
except Exception as e:
|
81 |
logger.error(f"Error analyzing repo {repo_id}: {e}")
|
82 |
+
return f"Error analyzing {repo_id}", f"Error: {str(e)}"
|
83 |
|
84 |
+
def chat_with_user(message: str, history: List[Dict[str, str]], system_prompt: str) -> str:
|
85 |
+
"""Simple chat response."""
|
86 |
try:
|
87 |
+
# For now, return a simple response
|
88 |
+
return f"I understand you're asking about: {message}. How can I help you analyze this repository?"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
except Exception as e:
|
90 |
+
logger.error(f"Error in chat: {e}")
|
91 |
+
return "I apologize, but I encountered an error. Please try again."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
def create_ui() -> gr.Blocks:
|
94 |
+
"""Create a simplified Gradio interface."""
|
95 |
state = gr.State(AppState())
|
96 |
|
97 |
with gr.Blocks(title="Hugging Face Repo Analyzer", theme=gr.themes.Soft()) as app:
|
98 |
gr.Markdown("# Hugging Face Repository Analyzer")
|
99 |
|
100 |
+
with gr.Row():
|
101 |
+
with gr.Column():
|
102 |
+
# Input Section
|
103 |
+
gr.Markdown("### Enter Repository Information")
|
104 |
+
repo_input = gr.Textbox(
|
105 |
+
label="Enter repo IDs (comma or newline separated) or keywords to search",
|
106 |
+
lines=5,
|
107 |
+
placeholder="Enter repository IDs or keywords to search"
|
108 |
+
)
|
109 |
+
submit_btn = gr.Button("Submit", variant="primary")
|
110 |
+
status = gr.Textbox(label="Status", visible=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
+
# Results Section
|
113 |
+
df_output = gr.Dataframe(
|
114 |
+
headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating"],
|
115 |
+
datatype=["str", "str", "str", "str", "str"]
|
116 |
+
)
|
117 |
+
|
118 |
+
# Analysis Section
|
119 |
+
content_output = gr.Textbox(label="Repository Content", lines=10)
|
120 |
+
summary_output = gr.Textbox(label="Analysis Summary", lines=5)
|
121 |
+
|
122 |
+
# Chat Section
|
123 |
+
chatbot = gr.Chatbot(label="Chat with Assistant", height=400)
|
124 |
+
msg = gr.Textbox(label="Message", placeholder="Ask about the repository...")
|
125 |
+
with gr.Row():
|
126 |
+
send_btn = gr.Button("Send", variant="primary")
|
127 |
+
clear_btn = gr.Button("Clear Chat", variant="secondary")
|
128 |
+
|
129 |
+
def process_input(text: str, state: AppState) -> Tuple[pd.DataFrame, str, str, str]:
|
130 |
+
"""Process input and return results."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
try:
|
132 |
+
# Check if input is keywords or repo IDs
|
133 |
+
if any(kw in text.lower() for kw in ['search', 'find', 'look for']):
|
134 |
+
# Handle as keyword search
|
135 |
+
keywords = [k.strip() for k in re.split(r'[\n,]+', text) if k.strip()]
|
136 |
+
repo_ids = []
|
137 |
+
for kw in keywords:
|
138 |
+
repo_ids.extend(search_top_spaces(kw, limit=5))
|
139 |
+
else:
|
140 |
+
# Handle as repo IDs
|
141 |
+
repo_ids = [rid.strip() for rid in re.split(r'[\n,]+', text) if rid.strip()]
|
142 |
|
143 |
+
# Remove duplicates
|
144 |
+
repo_ids = list(dict.fromkeys(repo_ids))
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
+
if not repo_ids:
|
147 |
+
return pd.DataFrame(), "No repositories found", "", ""
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
+
# Update state and CSV
|
150 |
+
state.repo_ids = repo_ids
|
151 |
state.current_repo_idx = 0
|
152 |
+
write_repos_to_csv(repo_ids)
|
153 |
+
|
154 |
+
# Get first repo analysis
|
155 |
+
content, summary = analyze_repo(repo_ids[0])
|
156 |
|
157 |
+
return read_csv_as_text(CSV_FILE), f"Found {len(repo_ids)} repositories", content, summary
|
|
|
|
|
158 |
|
159 |
except Exception as e:
|
160 |
+
logger.error(f"Error processing input: {e}")
|
161 |
+
return pd.DataFrame(), f"Error: {str(e)}", "", ""
|
162 |
+
|
163 |
+
def send_message(message: str, history: List[Dict[str, str]], state: AppState) -> Tuple[List[Dict[str, str]], str]:
|
164 |
+
"""Send message to chat."""
|
165 |
+
if not message:
|
166 |
+
return history, ""
|
167 |
+
history.append({"role": "user", "content": message})
|
168 |
+
response = chat_with_user(message, history, CHATBOT_SYSTEM_PROMPT)
|
|
|
|
|
|
|
|
|
|
|
169 |
history.append({"role": "assistant", "content": response})
|
170 |
+
return history, ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
+
def clear_chat() -> Tuple[List[Dict[str, str]], str]:
|
173 |
+
"""Clear chat history."""
|
174 |
+
return [], ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
|
176 |
+
# Event handlers
|
177 |
+
submit_btn.click(
|
178 |
+
fn=process_input,
|
179 |
+
inputs=[repo_input, state],
|
180 |
+
outputs=[df_output, status, content_output, summary_output]
|
|
|
|
|
|
|
181 |
)
|
182 |
|
183 |
send_btn.click(
|
184 |
+
fn=send_message,
|
|
|
|
|
|
|
|
|
185 |
inputs=[msg, chatbot, state],
|
186 |
+
outputs=[chatbot, msg]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
)
|
188 |
|
189 |
+
clear_btn.click(
|
190 |
+
fn=clear_chat,
|
191 |
inputs=[],
|
192 |
+
outputs=[chatbot, msg]
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
)
|
194 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
return app
|
196 |
|
|
|
|
|
|
|
|
|
|
|
197 |
if __name__ == "__main__":
|
198 |
app = create_ui()
|
199 |
app.launch()
|