naman1102 commited on
Commit
3c63f39
·
1 Parent(s): 2001b9b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -542
app.py CHANGED
@@ -2,607 +2,198 @@ import gradio as gr
2
  import regex as re
3
  import csv
4
  import pandas as pd
5
- from typing import Dict, List, Tuple, Optional, Any
6
  import logging
7
- from pathlib import Path
8
  import os
9
-
10
- from analyzer import (
11
- combine_repo_files_for_llm,
12
- analyze_combined_file,
13
- parse_llm_json_response,
14
- analyze_code
15
- )
16
- from hf_utils import download_space_repo, search_top_spaces
17
- from chatbot_page import chat_with_user, extract_keywords_from_conversation
18
 
19
  # Configure logging
20
- logging.basicConfig(
21
- level=logging.INFO,
22
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
23
- )
24
  logger = logging.getLogger(__name__)
25
 
26
  # Constants
27
- CHATBOT_SYSTEM_PROMPT = (
28
- "You are a helpful assistant. Your goal is to help the user describe their ideal open-source repo. "
29
- "Ask questions to clarify what they want, their use case, preferred language, features, etc. "
30
- "When the user clicks 'End Chat', analyze the conversation and return about 5 keywords for repo search. "
31
- "Return only the keywords as a comma-separated list."
32
- )
33
-
34
- CHATBOT_INITIAL_MESSAGE = (
35
- "Hello! Please tell me about your ideal Hugging Face repo. "
36
- "What use case, preferred language, or features are you looking for?"
37
- )
38
 
39
- # State management
40
  class AppState:
 
41
  def __init__(self):
42
  self.repo_ids: List[str] = []
43
  self.current_repo_idx: int = 0
44
- self.generated_keywords: List[str] = []
45
- self.analysis_results: Dict[str, Dict[str, Any]] = {}
46
- self.chat_history: List[Tuple[str, str]] = []
47
-
48
- def reset(self):
49
- self.__init__()
50
 
51
- # Helper functions
52
- def read_csv_as_text(csv_filename: str) -> pd.DataFrame:
53
- """Read CSV file and return as DataFrame with string dtype."""
54
  try:
55
- return pd.read_csv(csv_filename, dtype=str)
56
  except Exception as e:
57
- logger.error(f"Error reading CSV file {csv_filename}: {e}")
58
  return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
59
 
60
- def write_repos_to_csv(repo_ids: List[str], csv_filename: str = "repo_ids.csv") -> None:
61
- """Write repo IDs to CSV file."""
62
  try:
63
- with open(csv_filename, mode="w", newline='', encoding="utf-8") as csvfile:
64
- writer = csv.writer(csvfile)
65
  writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
66
  for repo_id in repo_ids:
67
  writer.writerow([repo_id, "", "", "", ""])
68
  except Exception as e:
69
- logger.error(f"Error writing to CSV file {csv_filename}: {e}")
70
 
71
- def process_repo_input(text: str, state: AppState) -> pd.DataFrame:
72
- """Process input text containing repo IDs and update state."""
73
- if not text:
74
- state.repo_ids = []
75
- state.current_repo_idx = 0
76
- return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
77
-
78
- repo_ids = [repo.strip() for repo in re.split(r'[\n,]+', text) if repo.strip()]
79
- state.repo_ids = repo_ids
80
- state.current_repo_idx = 0
81
-
82
- write_repos_to_csv(repo_ids)
83
- return read_csv_as_text("repo_ids.csv")
84
 
85
- def analyze_single_repo(repo_id: str) -> Tuple[str, str, Dict[str, Any]]:
86
- """Analyze a single repository and return combined content, summary, and analysis results."""
87
  try:
88
- download_space_repo(repo_id, local_dir="repo_files")
89
- txt_path = combine_repo_files_for_llm()
90
-
91
- with open(txt_path, "r", encoding="utf-8") as f:
92
- combined_content = f.read()
93
-
94
- llm_output = analyze_combined_file(txt_path)
95
- last_start = llm_output.rfind('{')
96
- last_end = llm_output.rfind('}')
97
-
98
- final_json_str = llm_output[last_start:last_end+1] if last_start != -1 and last_end != -1 and last_end > last_start else llm_output
99
- llm_json = parse_llm_json_response(final_json_str)
100
-
101
- if isinstance(llm_json, dict) and "error" not in llm_json:
102
- strengths = llm_json.get("strength", "")
103
- weaknesses = llm_json.get("weaknesses", "")
104
- summary = f"JSON extraction: SUCCESS\n\nStrengths:\n{strengths}\n\nWeaknesses:\n{weaknesses}"
105
- else:
106
- summary = f"JSON extraction: FAILED\nRaw: {llm_json.get('raw', '') if isinstance(llm_json, dict) else llm_json}"
107
-
108
- return combined_content, summary, llm_json
109
-
110
  except Exception as e:
111
  logger.error(f"Error analyzing repo {repo_id}: {e}")
112
- return "", f"Error analyzing repo: {e}", {"error": str(e)}
113
 
114
- def update_csv_with_analysis(repo_id: str, analysis_results: Dict[str, Any], csv_filename: str = "repo_ids.csv") -> pd.DataFrame:
115
- """Update CSV file with analysis results for a repository."""
116
  try:
117
- df = read_csv_as_text(csv_filename)
118
- updated = False
119
-
120
- for idx, row in df.iterrows():
121
- if row["repo id"] == repo_id:
122
- if isinstance(analysis_results, dict) and "error" not in analysis_results:
123
- df.at[idx, "strength"] = analysis_results.get("strength", "")
124
- df.at[idx, "weaknesses"] = analysis_results.get("weaknesses", "")
125
- df.at[idx, "speciality"] = analysis_results.get("speciality", "")
126
- df.at[idx, "relevance rating"] = analysis_results.get("relevance rating", "")
127
- updated = True
128
- break
129
-
130
- if not updated and isinstance(analysis_results, dict) and "error" not in analysis_results:
131
- new_row = {
132
- "repo id": repo_id,
133
- "strength": analysis_results.get("strength", ""),
134
- "weaknesses": analysis_results.get("weaknesses", ""),
135
- "speciality": analysis_results.get("speciality", ""),
136
- "relevance rating": analysis_results.get("relevance rating", "")
137
- }
138
- df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
139
-
140
- df.to_csv(csv_filename, index=False)
141
- return df
142
-
143
  except Exception as e:
144
- logger.error(f"Error updating CSV for repo {repo_id}: {e}")
145
- return read_csv_as_text(csv_filename)
146
-
147
- def show_combined_repo_and_llm(state: AppState) -> Tuple[str, str, pd.DataFrame]:
148
- """Show combined repo content and LLM analysis for current repo."""
149
- if not state.repo_ids:
150
- return "No repo ID available. Please submit repo IDs first.", "", pd.DataFrame()
151
-
152
- if state.current_repo_idx >= len(state.repo_ids):
153
- return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv")
154
-
155
- repo_id = state.repo_ids[state.current_repo_idx]
156
- combined_content, summary, analysis_results = analyze_single_repo(repo_id)
157
- df = update_csv_with_analysis(repo_id, analysis_results)
158
-
159
- state.current_repo_idx += 1
160
- return combined_content, summary, df
161
 
162
- def keyword_search_and_update(keyword: str, state: AppState) -> pd.DataFrame:
163
- """Search for repos using keywords and update state."""
164
- if not keyword:
165
- return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
166
-
167
- keyword_list = [k.strip() for k in re.split(r'[\n,]+', keyword) if k.strip()]
168
- repo_ids = []
169
-
170
- for kw in keyword_list:
171
- repo_ids.extend(search_top_spaces(kw, limit=5))
172
-
173
- # Remove duplicates while preserving order
174
- seen = set()
175
- unique_repo_ids = []
176
- for rid in repo_ids:
177
- if rid not in seen:
178
- unique_repo_ids.append(rid)
179
- seen.add(rid)
180
-
181
- state.repo_ids = unique_repo_ids
182
- state.current_repo_idx = 0
183
-
184
- write_repos_to_csv(unique_repo_ids)
185
- return read_csv_as_text("repo_ids.csv")
186
-
187
- # UI Components
188
  def create_ui() -> gr.Blocks:
189
- """Create the Gradio interface."""
190
  state = gr.State(AppState())
191
 
192
  with gr.Blocks(title="Hugging Face Repo Analyzer", theme=gr.themes.Soft()) as app:
193
  gr.Markdown("# Hugging Face Repository Analyzer")
194
 
195
- # Navigation state
196
- current_page = gr.State("start")
197
-
198
- # Start Page
199
- with gr.Group(visible=True) as start_page:
200
- gr.Markdown("""
201
- # Welcome to the Hugging Face Repository Analyzer!
202
-
203
- This tool helps you analyze and understand Hugging Face repositories. You can:
204
- - Enter repository IDs directly
205
- - Search repositories using keywords
206
- - Chat with an AI assistant to find the perfect repository
207
- - Get detailed analysis of repositories
208
-
209
- Click 'Start Analysis' to begin!
210
- """)
211
- with gr.Row():
212
- start_btn = gr.Button("Start Analysis", variant="primary")
213
- help_btn = gr.Button("View Help Guide", variant="secondary")
214
-
215
- # Help Guide
216
- with gr.Group(visible=False) as help_page:
217
- gr.Markdown("""
218
- # Help Guide
219
-
220
- ## Quick Start
221
- 1. Enter repository IDs or search by keywords
222
- 2. Start the analysis
223
- 3. Review the results
224
-
225
- ## Features
226
- - **Repository Analysis**: Get detailed insights about repositories
227
- - **Keyword Search**: Find repositories matching your criteria
228
- - **AI Assistant**: Chat to find the perfect repository
229
- - **Comparison**: Compare repositories side by side
230
-
231
- ## Keyboard Shortcuts
232
- - `Ctrl + Enter`: Send message in chat
233
- - `Ctrl + S`: Start new analysis
234
- - `Ctrl + H`: Toggle help guide
235
- """)
236
- back_btn = gr.Button("Back to Start", variant="primary")
237
-
238
- # Input Page
239
- with gr.Group(visible=False) as input_page:
240
- with gr.Row():
241
- with gr.Column():
242
- gr.Markdown("### Enter Repository IDs")
243
- repo_id_input = gr.Textbox(
244
- label="Enter repo IDs (comma or newline separated)",
245
- lines=5,
246
- placeholder="repo1, repo2\nrepo3"
247
- )
248
- submit_btn = gr.Button("Submit Repo IDs", variant="primary")
249
- submit_status = gr.Textbox(label="Status", visible=False)
250
-
251
- with gr.Column():
252
- gr.Markdown("### Or Search by Keywords")
253
- keyword_input = gr.Textbox(
254
- label="Enter keywords to search",
255
- lines=3,
256
- placeholder="Enter keywords separated by commas"
257
- )
258
- search_btn = gr.Button("Search by Keywords", variant="primary")
259
- search_status = gr.Textbox(label="Status", visible=False)
260
-
261
- df_output = gr.Dataframe(
262
- headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating"],
263
- datatype=["str", "str", "str", "str", "str"]
264
- )
265
- with gr.Row():
266
- analyze_btn = gr.Button("Start Analysis", variant="primary")
267
- analyze_status = gr.Textbox(label="Status", visible=False)
268
- compare_btn = gr.Button("Compare Repositories", variant="secondary")
269
-
270
- # Analysis Page
271
- with gr.Group(visible=False) as analysis_page:
272
- gr.Markdown("### Repository Analysis")
273
- progress = gr.Slider(
274
- minimum=0,
275
- maximum=100,
276
- value=0,
277
- label="Analysis Progress",
278
- interactive=False
279
- )
280
- with gr.Row():
281
- with gr.Column():
282
- content_output = gr.Textbox(label="Repository Content", lines=10)
283
- with gr.Column():
284
- summary_output = gr.Textbox(label="Analysis Summary", lines=10)
285
- with gr.Row():
286
- next_btn = gr.Button("Analyze Next Repository", variant="primary")
287
- next_status = gr.Textbox(label="Status", visible=False)
288
- finish_btn = gr.Button("Finish Analysis", variant="secondary")
289
- export_btn = gr.Button("Export Results", variant="secondary")
290
- export_status = gr.Textbox(label="Status", visible=False)
291
-
292
- # Comparison Page
293
- with gr.Group(visible=False) as comparison_page:
294
- gr.Markdown("### Repository Comparison")
295
- with gr.Row():
296
- with gr.Column():
297
- repo1_select = gr.Dropdown(
298
- label="Select First Repository",
299
- choices=[],
300
- interactive=True
301
- )
302
- repo1_content = gr.Textbox(label="Repository 1 Content", lines=10)
303
- repo1_summary = gr.Textbox(label="Repository 1 Summary", lines=10)
304
- with gr.Column():
305
- repo2_select = gr.Dropdown(
306
- label="Select Second Repository",
307
- choices=[],
308
- interactive=True
309
- )
310
- repo2_content = gr.Textbox(label="Repository 2 Content", lines=10)
311
- repo2_summary = gr.Textbox(label="Repository 2 Summary", lines=10)
312
- compare_btn = gr.Button("Compare", variant="primary")
313
- back_to_analysis_btn = gr.Button("Back to Analysis", variant="secondary")
314
-
315
- # Chatbot Page
316
- with gr.Group(visible=False) as chatbot_page:
317
- gr.Markdown("### Chat with Assistant")
318
- gr.Markdown("""
319
- Tell me about your ideal repository. I'll help you find the perfect match!
320
- What are you looking for? Consider:
321
- - Your use case
322
- - Preferred programming language
323
- - Required features
324
- - Any specific requirements
325
- """)
326
- chatbot = gr.Chatbot(
327
- label="Chat with Assistant",
328
- height=400,
329
- type="messages"
330
- )
331
- msg = gr.Textbox(
332
- label="Message",
333
- placeholder="Type your message here...",
334
- lines=2
335
- )
336
- with gr.Row():
337
- send_btn = gr.Button("Send", variant="primary")
338
- send_status = gr.Textbox(label="Status", visible=False)
339
- end_chat_btn = gr.Button("End Chat", variant="secondary")
340
- end_chat_status = gr.Textbox(label="Status", visible=False)
341
 
342
- # Results Page
343
- with gr.Group(visible=False) as results_page:
344
- gr.Markdown("### Analysis Results")
345
- with gr.Row():
346
- with gr.Column():
347
- results_df = gr.Dataframe(
348
- headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating"],
349
- datatype=["str", "str", "str", "str", "str"]
350
- )
351
- with gr.Column():
352
- gr.Markdown("### Repository Metrics")
353
- metrics_plot = gr.Plot(label="Repository Metrics")
354
- with gr.Row():
355
- restart_btn = gr.Button("Start New Analysis", variant="primary")
356
- export_btn = gr.Button("Export Results", variant="secondary")
357
- history_btn = gr.Button("View History", variant="secondary")
358
-
359
- # History Page
360
- with gr.Group(visible=False) as history_page:
361
- gr.Markdown("### Analysis History")
362
- history_df = gr.Dataframe(
363
- headers=["Date", "Repositories", "Keywords", "Results"],
364
- datatype=["str", "str", "str", "str"]
365
- )
366
- back_to_results_btn = gr.Button("Back to Results", variant="primary")
367
-
368
- # Navigation functions
369
- def navigate_to(page: str) -> List[gr.update]:
370
- """Navigate to a specific page."""
371
- updates = []
372
- for p in ["start", "input", "analysis", "chatbot", "results", "help", "comparison", "history"]:
373
- updates.append(gr.update(visible=(p == page)))
374
- return updates
375
-
376
- # Event handlers
377
- start_btn.click(
378
- fn=lambda: navigate_to("input"),
379
- inputs=[],
380
- outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
381
- )
382
-
383
- help_btn.click(
384
- fn=lambda: navigate_to("help"),
385
- inputs=[],
386
- outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
387
- )
388
-
389
- back_btn.click(
390
- fn=lambda: navigate_to("start"),
391
- inputs=[],
392
- outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
393
- )
394
-
395
- # Modified event handlers with status updates
396
- def process_repo_input_with_status(text: str, state: AppState) -> Tuple[pd.DataFrame, str]:
397
- """Process repo input with status update."""
398
- df = process_repo_input(text, state)
399
- return df, ""
400
-
401
- def keyword_search_with_status(keyword: str, state: AppState) -> Tuple[pd.DataFrame, str]:
402
- """Search keywords with status update."""
403
  try:
404
- if not keyword:
405
- return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"]), ""
406
-
407
- keyword_list = [k.strip() for k in re.split(r'[\n,]+', keyword) if k.strip()]
408
- repo_ids = []
 
 
 
 
 
409
 
410
- for kw in keyword_list:
411
- try:
412
- results = search_top_spaces(kw, limit=5)
413
- repo_ids.extend(results)
414
- except Exception as e:
415
- logger.error(f"Error searching for keyword {kw}: {e}")
416
- continue
417
 
418
- # Remove duplicates while preserving order
419
- seen = set()
420
- unique_repo_ids = []
421
- for rid in repo_ids:
422
- if rid not in seen:
423
- unique_repo_ids.append(rid)
424
- seen.add(rid)
425
 
426
- state.repo_ids = unique_repo_ids
 
427
  state.current_repo_idx = 0
 
 
 
 
428
 
429
- write_repos_to_csv(unique_repo_ids)
430
- df = read_csv_as_text("repo_ids.csv")
431
- return df, ""
432
 
433
  except Exception as e:
434
- logger.error(f"Error in keyword search: {e}")
435
- return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"]), f"Error: {str(e)}"
436
-
437
- def analyze_with_status(state: AppState) -> Tuple[str, str, pd.DataFrame, str]:
438
- """Analyze with status update."""
439
- content, summary, df = show_combined_repo_and_llm(state)
440
- return content, summary, df, ""
441
-
442
- def send_message_with_status(user_message: str, history: List[Dict[str, str]], state: AppState) -> Tuple[List[Dict[str, str]], str, str]:
443
- """Send message with status update."""
444
- if not user_message:
445
- return history, "", ""
446
- history.append({"role": "user", "content": user_message})
447
- response = chat_with_user(user_message, history, CHATBOT_SYSTEM_PROMPT)
448
  history.append({"role": "assistant", "content": response})
449
- return history, "", ""
450
-
451
- def end_chat_with_status(history: List[Dict[str, str]], state: AppState) -> Tuple[List[str], gr.update, str]:
452
- """End chat and extract keywords."""
453
- if not history:
454
- return [], gr.update(visible=True), ""
455
- keywords = extract_keywords_from_conversation(history)
456
- state.generated_keywords = keywords
457
- return keywords, gr.update(visible=True), ""
458
-
459
- def export_with_status(df: pd.DataFrame) -> Tuple[str, str]:
460
- """Export with status update."""
461
- result = export_results(df)
462
- return result, ""
463
-
464
- # Update event handlers with status updates
465
- submit_btn.click(
466
- fn=lambda: "Processing...",
467
- inputs=[],
468
- outputs=[submit_status]
469
- ).then(
470
- fn=process_repo_input_with_status,
471
- inputs=[repo_id_input, state],
472
- outputs=[df_output, submit_status]
473
- )
474
 
475
- def search_click():
476
- """Handle search button click."""
477
- return "Searching..."
478
-
479
- def search_complete(keyword: str, state: AppState):
480
- """Complete search operation."""
481
- return keyword_search_with_status(keyword, state)
482
-
483
- search_btn.click(
484
- fn=search_click,
485
- inputs=[],
486
- outputs=[search_status]
487
- ).then(
488
- fn=search_complete,
489
- inputs=[keyword_input, state],
490
- outputs=[df_output, search_status]
491
- )
492
 
493
- next_btn.click(
494
- fn=lambda: "Analyzing...",
495
- inputs=[],
496
- outputs=[next_status]
497
- ).then(
498
- fn=analyze_with_status,
499
- inputs=[state],
500
- outputs=[content_output, summary_output, df_output, next_status]
501
  )
502
 
503
  send_btn.click(
504
- fn=lambda: "Sending...",
505
- inputs=[],
506
- outputs=[send_status]
507
- ).then(
508
- fn=send_message_with_status,
509
  inputs=[msg, chatbot, state],
510
- outputs=[chatbot, msg, send_status]
511
- )
512
-
513
- end_chat_btn.click(
514
- fn=lambda: "Processing...",
515
- inputs=[],
516
- outputs=[end_chat_status]
517
- ).then(
518
- fn=end_chat_with_status,
519
- inputs=[chatbot, state],
520
- outputs=[gr.Textbox(label="Extracted Keywords"), results_page, end_chat_status]
521
- )
522
-
523
- export_btn.click(
524
- fn=lambda: "Exporting...",
525
- inputs=[],
526
- outputs=[export_status]
527
- ).then(
528
- fn=export_with_status,
529
- inputs=[results_df],
530
- outputs=[gr.Textbox(label="Export Status"), export_status]
531
- )
532
-
533
- restart_btn.click(
534
- fn=lambda: (state.reset(), navigate_to("start")),
535
- inputs=[state],
536
- outputs=[start_page, input_page, analysis_page, chatbot_page, results_page]
537
- )
538
-
539
- def update_progress(current: int, total: int) -> float:
540
- """Update progress bar."""
541
- return (current / total) * 100
542
-
543
- def export_results(df: pd.DataFrame) -> str:
544
- """Export results to CSV."""
545
- try:
546
- filename = f"analysis_results_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv"
547
- df.to_csv(filename, index=False)
548
- return f"Results exported to {filename}"
549
- except Exception as e:
550
- return f"Error exporting results: {e}"
551
-
552
- def load_history() -> pd.DataFrame:
553
- """Load analysis history."""
554
- try:
555
- return pd.read_csv("analysis_history.csv")
556
- except:
557
- return pd.DataFrame(columns=["Date", "Repositories", "Keywords", "Results"])
558
-
559
- def save_to_history(repos: List[str], keywords: List[str], results: pd.DataFrame) -> None:
560
- """Save current analysis to history."""
561
- try:
562
- history_df = load_history()
563
- new_row = {
564
- "Date": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
565
- "Repositories": ", ".join(repos),
566
- "Keywords": ", ".join(keywords),
567
- "Results": results.to_json()
568
- }
569
- history_df = pd.concat([history_df, pd.DataFrame([new_row])], ignore_index=True)
570
- history_df.to_csv("analysis_history.csv", index=False)
571
- except Exception as e:
572
- logger.error(f"Error saving to history: {e}")
573
-
574
- # Add new event handlers for new features
575
- history_btn.click(
576
- fn=lambda: (load_history(), navigate_to("history")),
577
- inputs=[],
578
- outputs=[history_df, start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
579
  )
580
 
581
- back_to_results_btn.click(
582
- fn=lambda: navigate_to("results"),
583
  inputs=[],
584
- outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
585
- )
586
-
587
- compare_btn.click(
588
- fn=lambda: (update_repo_choices(state), navigate_to("comparison")),
589
- inputs=[state],
590
- outputs=[repo1_select, repo2_select, start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
591
  )
592
-
593
- back_to_analysis_btn.click(
594
- fn=lambda: navigate_to("analysis"),
595
- inputs=[],
596
- outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
597
- )
598
-
599
  return app
600
 
601
- def update_repo_choices(state: AppState) -> Tuple[List[str], List[str]]:
602
- """Update repository choices for comparison."""
603
- choices = state.repo_ids
604
- return choices, choices
605
-
606
  if __name__ == "__main__":
607
  app = create_ui()
608
  app.launch()
 
2
  import regex as re
3
  import csv
4
  import pandas as pd
5
+ from typing import List, Dict, Tuple, Optional
6
  import logging
7
+ from datetime import datetime
8
  import os
9
+ from huggingface_hub import HfApi, SpaceCard
10
+ from transformers import AutoTokenizer, AutoModelForCausalLM
11
+ import torch
12
+ import json
 
 
 
 
 
13
 
14
  # Configure logging
15
+ logging.basicConfig(level=logging.INFO)
 
 
 
16
  logger = logging.getLogger(__name__)
17
 
18
  # Constants
19
+ CSV_FILE = "repo_ids.csv"
20
+ CHATBOT_SYSTEM_PROMPT = """You are a helpful AI assistant that analyzes Hugging Face repositories.
21
+ Your task is to help users understand repositories, extract key information, and provide insights.
22
+ Be concise, clear, and focus on the most important aspects of each repository."""
 
 
 
 
 
 
 
23
 
 
24
  class AppState:
25
+ """Simple state management for the application."""
26
  def __init__(self):
27
  self.repo_ids: List[str] = []
28
  self.current_repo_idx: int = 0
29
+ self.chat_history: List[Dict[str, str]] = []
 
 
 
 
 
30
 
31
+ def read_csv_as_text(filename: str) -> pd.DataFrame:
32
+ """Read CSV file and return as DataFrame."""
 
33
  try:
34
+ return pd.read_csv(filename)
35
  except Exception as e:
36
+ logger.error(f"Error reading CSV: {e}")
37
  return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
38
 
39
+ def write_repos_to_csv(repo_ids: List[str]) -> None:
40
+ """Write repository IDs to CSV file."""
41
  try:
42
+ with open(CSV_FILE, 'w', newline='') as f:
43
+ writer = csv.writer(f)
44
  writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
45
  for repo_id in repo_ids:
46
  writer.writerow([repo_id, "", "", "", ""])
47
  except Exception as e:
48
+ logger.error(f"Error writing to CSV: {e}")
49
 
50
+ def search_top_spaces(keyword: str, limit: int = 5) -> List[str]:
51
+ """Search for repositories by keyword."""
52
+ try:
53
+ api = HfApi()
54
+ spaces = api.list_spaces(search=keyword, limit=limit)
55
+ return [space.id for space in spaces]
56
+ except Exception as e:
57
+ logger.error(f"Error searching spaces: {e}")
58
+ return []
 
 
 
 
59
 
60
+ def analyze_repo(repo_id: str) -> Tuple[str, str]:
61
+ """Analyze a single repository."""
62
  try:
63
+ api = HfApi()
64
+ space = api.get_space(repo_id)
65
+ card = SpaceCard.load(repo_id)
66
+
67
+ content = f"""
68
+ Repository: {repo_id}
69
+ Title: {card.title}
70
+ Description: {card.description}
71
+ Tags: {', '.join(card.tags)}
72
+ """
73
+
74
+ summary = f"Analysis of {repo_id}:\n"
75
+ summary += f"- Title: {card.title}\n"
76
+ summary += f"- Main focus: {card.description[:200]}...\n"
77
+ summary += f"- Key tags: {', '.join(card.tags[:5])}\n"
78
+
79
+ return content, summary
 
 
 
 
 
80
  except Exception as e:
81
  logger.error(f"Error analyzing repo {repo_id}: {e}")
82
+ return f"Error analyzing {repo_id}", f"Error: {str(e)}"
83
 
84
+ def chat_with_user(message: str, history: List[Dict[str, str]], system_prompt: str) -> str:
85
+ """Simple chat response."""
86
  try:
87
+ # For now, return a simple response
88
+ return f"I understand you're asking about: {message}. How can I help you analyze this repository?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  except Exception as e:
90
+ logger.error(f"Error in chat: {e}")
91
+ return "I apologize, but I encountered an error. Please try again."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  def create_ui() -> gr.Blocks:
94
+ """Create a simplified Gradio interface."""
95
  state = gr.State(AppState())
96
 
97
  with gr.Blocks(title="Hugging Face Repo Analyzer", theme=gr.themes.Soft()) as app:
98
  gr.Markdown("# Hugging Face Repository Analyzer")
99
 
100
+ with gr.Row():
101
+ with gr.Column():
102
+ # Input Section
103
+ gr.Markdown("### Enter Repository Information")
104
+ repo_input = gr.Textbox(
105
+ label="Enter repo IDs (comma or newline separated) or keywords to search",
106
+ lines=5,
107
+ placeholder="Enter repository IDs or keywords to search"
108
+ )
109
+ submit_btn = gr.Button("Submit", variant="primary")
110
+ status = gr.Textbox(label="Status", visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
+ # Results Section
113
+ df_output = gr.Dataframe(
114
+ headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating"],
115
+ datatype=["str", "str", "str", "str", "str"]
116
+ )
117
+
118
+ # Analysis Section
119
+ content_output = gr.Textbox(label="Repository Content", lines=10)
120
+ summary_output = gr.Textbox(label="Analysis Summary", lines=5)
121
+
122
+ # Chat Section
123
+ chatbot = gr.Chatbot(label="Chat with Assistant", height=400)
124
+ msg = gr.Textbox(label="Message", placeholder="Ask about the repository...")
125
+ with gr.Row():
126
+ send_btn = gr.Button("Send", variant="primary")
127
+ clear_btn = gr.Button("Clear Chat", variant="secondary")
128
+
129
+ def process_input(text: str, state: AppState) -> Tuple[pd.DataFrame, str, str, str]:
130
+ """Process input and return results."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  try:
132
+ # Check if input is keywords or repo IDs
133
+ if any(kw in text.lower() for kw in ['search', 'find', 'look for']):
134
+ # Handle as keyword search
135
+ keywords = [k.strip() for k in re.split(r'[\n,]+', text) if k.strip()]
136
+ repo_ids = []
137
+ for kw in keywords:
138
+ repo_ids.extend(search_top_spaces(kw, limit=5))
139
+ else:
140
+ # Handle as repo IDs
141
+ repo_ids = [rid.strip() for rid in re.split(r'[\n,]+', text) if rid.strip()]
142
 
143
+ # Remove duplicates
144
+ repo_ids = list(dict.fromkeys(repo_ids))
 
 
 
 
 
145
 
146
+ if not repo_ids:
147
+ return pd.DataFrame(), "No repositories found", "", ""
 
 
 
 
 
148
 
149
+ # Update state and CSV
150
+ state.repo_ids = repo_ids
151
  state.current_repo_idx = 0
152
+ write_repos_to_csv(repo_ids)
153
+
154
+ # Get first repo analysis
155
+ content, summary = analyze_repo(repo_ids[0])
156
 
157
+ return read_csv_as_text(CSV_FILE), f"Found {len(repo_ids)} repositories", content, summary
 
 
158
 
159
  except Exception as e:
160
+ logger.error(f"Error processing input: {e}")
161
+ return pd.DataFrame(), f"Error: {str(e)}", "", ""
162
+
163
+ def send_message(message: str, history: List[Dict[str, str]], state: AppState) -> Tuple[List[Dict[str, str]], str]:
164
+ """Send message to chat."""
165
+ if not message:
166
+ return history, ""
167
+ history.append({"role": "user", "content": message})
168
+ response = chat_with_user(message, history, CHATBOT_SYSTEM_PROMPT)
 
 
 
 
 
169
  history.append({"role": "assistant", "content": response})
170
+ return history, ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
+ def clear_chat() -> Tuple[List[Dict[str, str]], str]:
173
+ """Clear chat history."""
174
+ return [], ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
+ # Event handlers
177
+ submit_btn.click(
178
+ fn=process_input,
179
+ inputs=[repo_input, state],
180
+ outputs=[df_output, status, content_output, summary_output]
 
 
 
181
  )
182
 
183
  send_btn.click(
184
+ fn=send_message,
 
 
 
 
185
  inputs=[msg, chatbot, state],
186
+ outputs=[chatbot, msg]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  )
188
 
189
+ clear_btn.click(
190
+ fn=clear_chat,
191
  inputs=[],
192
+ outputs=[chatbot, msg]
 
 
 
 
 
 
193
  )
194
+
 
 
 
 
 
 
195
  return app
196
 
 
 
 
 
 
197
  if __name__ == "__main__":
198
  app = create_ui()
199
  app.launch()