openfree commited on
Commit
ce3d342
ยท
verified ยท
1 Parent(s): 04927f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -68
app.py CHANGED
@@ -214,9 +214,9 @@ def get_user_commit_stats(username):
214
  "score": 0
215
  }
216
 
217
- # Enhanced function to get trending accounts (simplified without commit fetching)
218
- @st.cache_data(ttl=3600) # Cache for 1 hour
219
- def get_trending_accounts_simple(limit=100):
220
  try:
221
  # First, get top accounts by model/space count
222
  spaces_response = requests.get("https://huggingface.co/api/spaces",
@@ -227,10 +227,9 @@ def get_trending_accounts_simple(limit=100):
227
  timeout=30)
228
 
229
  # Process spaces data
230
- top_space_owners = []
231
  if spaces_response.status_code == 200:
232
  spaces = spaces_response.json()
233
- owner_counts_spaces = {}
234
  for space in spaces:
235
  if '/' in space.get('id', ''):
236
  owner, _ = space.get('id', '').split('/', 1)
@@ -239,14 +238,11 @@ def get_trending_accounts_simple(limit=100):
239
 
240
  if owner != 'None':
241
  owner_counts_spaces[owner] = owner_counts_spaces.get(owner, 0) + 1
242
-
243
- top_space_owners = sorted(owner_counts_spaces.items(), key=lambda x: x[1], reverse=True)[:limit]
244
 
245
  # Process models data
246
- top_model_owners = []
247
  if models_response.status_code == 200:
248
  models = models_response.json()
249
- owner_counts_models = {}
250
  for model in models:
251
  if '/' in model.get('id', ''):
252
  owner, _ = model.get('id', '').split('/', 1)
@@ -255,51 +251,56 @@ def get_trending_accounts_simple(limit=100):
255
 
256
  if owner != 'None':
257
  owner_counts_models[owner] = owner_counts_models.get(owner, 0) + 1
258
-
259
- top_model_owners = sorted(owner_counts_models.items(), key=lambda x: x[1], reverse=True)[:limit]
260
 
261
- # Get unique users from top 100 of both lists
262
- unique_users = set()
263
- for owner, _ in top_space_owners[:100]:
264
- unique_users.add(owner)
265
- for owner, _ in top_model_owners[:100]:
266
- unique_users.add(owner)
 
 
 
 
267
 
268
- # For now, use a simple ranking based on total repos
269
- # (We'll fetch commits only for the selected user)
270
  user_stats = []
271
- for user in unique_users:
272
- models_count = next((count for owner, count in top_model_owners if owner == user), 0)
273
- spaces_count = next((count for owner, count in top_space_owners if owner == user), 0)
274
-
275
- user_stats.append({
276
- "username": user,
277
- "models": models_count,
278
- "spaces": spaces_count,
279
- "datasets": 0, # We'll skip datasets for initial ranking
280
- "estimated_commits": 0, # Will be calculated later for selected user
281
- "score": models_count + spaces_count # Temporary score for initial display
282
- })
283
 
284
- # Sort by total repo count for initial display
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  user_stats.sort(key=lambda x: x["score"], reverse=True)
286
 
287
- # Extract rankings
288
- trending_authors = [stat["username"] for stat in user_stats[:limit]]
 
289
 
290
- # Create separate rankings sorted by spaces and models count
291
- spaces_rank_data = [(stat["username"], stat["spaces"]) for stat in user_stats if stat["spaces"] > 0]
292
- spaces_rank_data.sort(key=lambda x: x[1], reverse=True) # Sort by spaces count
293
- spaces_rank_data = spaces_rank_data[:limit]
294
 
295
- models_rank_data = [(stat["username"], stat["models"]) for stat in user_stats if stat["models"] > 0]
296
- models_rank_data.sort(key=lambda x: x[1], reverse=True) # Sort by models count
297
- models_rank_data = models_rank_data[:limit]
298
 
299
- return trending_authors, spaces_rank_data, models_rank_data, user_stats[:limit]
300
 
301
  except Exception as e:
302
- st.error(f"Error fetching trending accounts: {str(e)}")
303
  fallback_authors = ["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"]
304
  fallback_stats = [{"username": author, "models": 0, "spaces": 0, "datasets": 0, "estimated_commits": 0, "score": 0} for author in fallback_authors]
305
  return fallback_authors, [(author, 0) for author in fallback_authors], [(author, 0) for author in fallback_authors], fallback_stats
@@ -688,9 +689,33 @@ def simulate_follower_data(username, spaces_count, models_count, total_commits):
688
 
689
  return fig
690
 
691
- # Fetch trending accounts with a loading spinner (do this once at the beginning)
692
- with st.spinner("Loading top contributors..."):
693
- trending_accounts, top_owners_spaces, top_owners_models, user_stats = get_trending_accounts_simple(limit=100)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
694
 
695
  # Sidebar
696
  with st.sidebar:
@@ -698,14 +723,14 @@ with st.sidebar:
698
 
699
  # Create tabs for rankings
700
  tab1, tab2 = st.tabs([
701
- "Top Contributors",
702
- "Repository Rankings"
703
  ])
704
 
705
  with tab1:
706
- # Show combined trending accounts list
707
- st.markdown('<div class="subheader"><h3>๐Ÿ”ฅ Top 100 Contributors</h3></div>', unsafe_allow_html=True)
708
- st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">Initial ranking by total repositories. Select a user to see commit-based analysis.</p>', unsafe_allow_html=True)
709
 
710
  # Create a data frame for the table
711
  if user_stats:
@@ -723,14 +748,15 @@ with st.sidebar:
723
 
724
  overall_data.append([
725
  f"{rank_display}{stat['username']}",
 
726
  str(stat['models']),
727
  str(stat['spaces']),
728
- str(stat['models'] + stat['spaces']) # Total repos
729
  ])
730
 
731
  ranking_data_overall = pd.DataFrame(
732
  overall_data,
733
- columns=["Contributor", "Models", "Spaces", "Total Repos"]
734
  )
735
  ranking_data_overall.index = ranking_data_overall.index + 1 # Start index from 1 for ranking
736
 
@@ -739,17 +765,19 @@ with st.sidebar:
739
  height=900, # ์•ฝ 30ํ–‰ ์ •๋„ ๋ณด์ด๋„๋ก ํ”ฝ์…€ ๋‹จ์œ„ ๋†’์ด ์„ค์ •
740
  column_config={
741
  "Contributor": st.column_config.TextColumn("Contributor"),
 
742
  "Models": st.column_config.TextColumn("Models"),
743
  "Spaces": st.column_config.TextColumn("Spaces"),
744
- "Total Repos": st.column_config.TextColumn("Total Repos")
745
  },
746
  use_container_width=True,
747
  hide_index=False
748
  )
749
 
750
  with tab2:
751
- # Show accounts sorted by Spaces count
752
- st.markdown('<div class="subheader"><h3>๐Ÿš€ Top 50 by Spaces Count</h3></div>', unsafe_allow_html=True)
 
753
 
754
  # Create a data frame for the Spaces table with medals for top 3
755
  if top_owners_spaces:
@@ -779,8 +807,9 @@ with st.sidebar:
779
  hide_index=False
780
  )
781
 
782
- # Display accounts sorted by Models count
783
- st.markdown('<div class="subheader"><h3>๐Ÿง  Top 50 by Models Count</h3></div>', unsafe_allow_html=True)
 
784
 
785
  # Create a data frame for the Models table with medals for top 3
786
  if top_owners_models:
@@ -869,7 +898,7 @@ if username:
869
  st.markdown(f'<div style="background-color: #F3E5F5; padding: 20px; border-radius: 10px; border-left: 5px solid #9C27B0;">'
870
  f'<h3 style="color: #9C27B0;">About This Analysis</h3>'
871
  f'<p>This dashboard analyzes {username}\'s contributions to Hugging Face in {selected_year}, including models, datasets, and spaces.</p>'
872
- f'<p style="font-style: italic; font-size: 12px;">* Detailed commit analysis will be calculated after selection.</p>'
873
  f'</div>', unsafe_allow_html=True)
874
 
875
  with st.spinner(f"Fetching detailed contribution data for {username}..."):
@@ -887,8 +916,7 @@ if username:
887
 
888
  # Create a prominent ranking display
889
  st.markdown(f'<div style="background-color: #FFF8E1; padding: 20px; border-radius: 10px; border-left: 5px solid #FFC107; margin: 1rem 0;">'
890
- f'<h2 style="color: #FFA000; text-align: center;">๐Ÿ“Š Featured Contributor</h2>'
891
- f'<p style="text-align: center; font-size: 16px;">Analyzing detailed commit statistics...</p>'
892
  f'</div>', unsafe_allow_html=True)
893
 
894
  # Find user in spaces ranking
@@ -997,21 +1025,21 @@ if username:
997
 
998
  # Process repos in chunks
999
  chunk_size = 5
1000
- total_commits = 0
1001
  all_commit_dates = []
1002
 
1003
  for i in range(0, len(repo_ids), chunk_size):
1004
  chunk = repo_ids[i:i + chunk_size]
1005
  with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor:
1006
- future_to_repo = {
1007
- executor.submit(fetch_commits_for_repo, repo_id, kind, username, selected_year): repo_id
1008
- for repo_id in chunk
1009
- }
 
 
1010
  for future in as_completed(future_to_repo):
1011
  repo_commits, repo_count = future.result()
1012
  if repo_commits:
1013
  all_commit_dates.extend(repo_commits)
1014
- total_commits += repo_count
1015
 
1016
  # Update progress for all types
1017
  progress_per_type = 1.0 / len(types_to_fetch)
@@ -1020,7 +1048,7 @@ if username:
1020
  progress_bar.progress(overall_progress)
1021
 
1022
  commits_by_type[kind] = all_commit_dates
1023
- commit_counts_by_type[kind] = total_commits
1024
 
1025
  except Exception as e:
1026
  st.warning(f"Error fetching {kind}s for {username}: {str(e)}")
 
214
  "score": 0
215
  }
216
 
217
+ # Enhanced function to get trending accounts with commit-based ranking for top 100
218
+ @st.cache_data(ttl=3600, show_spinner=False) # Cache for 1 hour
219
+ def get_trending_accounts_with_commits(limit=100):
220
  try:
221
  # First, get top accounts by model/space count
222
  spaces_response = requests.get("https://huggingface.co/api/spaces",
 
227
  timeout=30)
228
 
229
  # Process spaces data
230
+ owner_counts_spaces = {}
231
  if spaces_response.status_code == 200:
232
  spaces = spaces_response.json()
 
233
  for space in spaces:
234
  if '/' in space.get('id', ''):
235
  owner, _ = space.get('id', '').split('/', 1)
 
238
 
239
  if owner != 'None':
240
  owner_counts_spaces[owner] = owner_counts_spaces.get(owner, 0) + 1
 
 
241
 
242
  # Process models data
243
+ owner_counts_models = {}
244
  if models_response.status_code == 200:
245
  models = models_response.json()
 
246
  for model in models:
247
  if '/' in model.get('id', ''):
248
  owner, _ = model.get('id', '').split('/', 1)
 
251
 
252
  if owner != 'None':
253
  owner_counts_models[owner] = owner_counts_models.get(owner, 0) + 1
 
 
254
 
255
+ # Get top accounts by total repos (models + spaces)
256
+ top_accounts = {}
257
+ for owner, count in owner_counts_models.items():
258
+ top_accounts[owner] = count + owner_counts_spaces.get(owner, 0)
259
+ for owner, count in owner_counts_spaces.items():
260
+ if owner not in top_accounts:
261
+ top_accounts[owner] = count
262
+
263
+ # Sort and get top 200 accounts for commit analysis
264
+ sorted_accounts = sorted(top_accounts.items(), key=lambda x: x[1], reverse=True)[:200]
265
 
266
+ # Fetch commit stats for top accounts
 
267
  user_stats = []
 
 
 
 
 
 
 
 
 
 
 
 
268
 
269
+ for username, _ in sorted_accounts:
270
+ try:
271
+ # Get commit stats
272
+ stats = get_user_commit_stats(username)
273
+ stats["models"] = owner_counts_models.get(username, 0)
274
+ stats["spaces"] = owner_counts_spaces.get(username, 0)
275
+ user_stats.append(stats)
276
+ except:
277
+ user_stats.append({
278
+ "username": username,
279
+ "models": owner_counts_models.get(username, 0),
280
+ "spaces": owner_counts_spaces.get(username, 0),
281
+ "datasets": 0,
282
+ "estimated_commits": 0,
283
+ "score": 0
284
+ })
285
+
286
+ # Sort by commits (score) for overall ranking
287
  user_stats.sort(key=lambda x: x["score"], reverse=True)
288
 
289
+ # Get top 100 by commits
290
+ top_100_by_commits = user_stats[:limit]
291
+ trending_authors = [stat["username"] for stat in top_100_by_commits]
292
 
293
+ # Create rankings within top 100 - sorted by spaces and models count
294
+ spaces_rank_data = [(stat["username"], stat["spaces"]) for stat in top_100_by_commits if stat["spaces"] > 0]
295
+ spaces_rank_data.sort(key=lambda x: x[1], reverse=True)
 
296
 
297
+ models_rank_data = [(stat["username"], stat["models"]) for stat in top_100_by_commits if stat["models"] > 0]
298
+ models_rank_data.sort(key=lambda x: x[1], reverse=True)
 
299
 
300
+ return trending_authors, spaces_rank_data, models_rank_data, top_100_by_commits
301
 
302
  except Exception as e:
303
+ # Return fallback data
304
  fallback_authors = ["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"]
305
  fallback_stats = [{"username": author, "models": 0, "spaces": 0, "datasets": 0, "estimated_commits": 0, "score": 0} for author in fallback_authors]
306
  return fallback_authors, [(author, 0) for author in fallback_authors], [(author, 0) for author in fallback_authors], fallback_stats
 
689
 
690
  return fig
691
 
692
+ # Fetch trending accounts with a loading spinner and progress bar
693
+ try:
694
+ with st.spinner("Loading top contributors..."):
695
+ # Add progress tracking
696
+ progress_text = st.empty()
697
+ progress_bar = st.progress(0)
698
+ progress_text.text("Fetching repository data...")
699
+ progress_bar.progress(0.3)
700
+
701
+ # Get the data
702
+ trending_accounts, top_owners_spaces, top_owners_models, user_stats = get_trending_accounts_with_commits(limit=100)
703
+
704
+ # Complete progress
705
+ progress_bar.progress(1.0)
706
+ progress_text.text("Loading complete!")
707
+ time.sleep(0.5)
708
+
709
+ # Clear progress indicators
710
+ progress_text.empty()
711
+ progress_bar.empty()
712
+ except Exception as e:
713
+ st.error(f"Error loading trending accounts: {str(e)}")
714
+ # Use fallback data
715
+ trending_accounts = ["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"]
716
+ top_owners_spaces = [(author, 0) for author in trending_accounts]
717
+ top_owners_models = [(author, 0) for author in trending_accounts]
718
+ user_stats = [{"username": author, "models": 0, "spaces": 0, "datasets": 0, "estimated_commits": 0, "score": 0} for author in trending_accounts]
719
 
720
  # Sidebar
721
  with st.sidebar:
 
723
 
724
  # Create tabs for rankings
725
  tab1, tab2 = st.tabs([
726
+ "Top 100 by Commits",
727
+ "Space/Model Rankings"
728
  ])
729
 
730
  with tab1:
731
+ # Show combined trending accounts list by commits
732
+ st.markdown('<div class="subheader"><h3>๐Ÿ”ฅ Top 100 Contributors by Commits</h3></div>', unsafe_allow_html=True)
733
+ st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">Ranked by estimated total commit count</p>', unsafe_allow_html=True)
734
 
735
  # Create a data frame for the table
736
  if user_stats:
 
748
 
749
  overall_data.append([
750
  f"{rank_display}{stat['username']}",
751
+ str(stat['estimated_commits']),
752
  str(stat['models']),
753
  str(stat['spaces']),
754
+ str(stat['datasets'])
755
  ])
756
 
757
  ranking_data_overall = pd.DataFrame(
758
  overall_data,
759
+ columns=["Contributor", "Est. Commits", "Models", "Spaces", "Datasets"]
760
  )
761
  ranking_data_overall.index = ranking_data_overall.index + 1 # Start index from 1 for ranking
762
 
 
765
  height=900, # ์•ฝ 30ํ–‰ ์ •๋„ ๋ณด์ด๋„๋ก ํ”ฝ์…€ ๋‹จ์œ„ ๋†’์ด ์„ค์ •
766
  column_config={
767
  "Contributor": st.column_config.TextColumn("Contributor"),
768
+ "Est. Commits": st.column_config.TextColumn("Est. Commits"),
769
  "Models": st.column_config.TextColumn("Models"),
770
  "Spaces": st.column_config.TextColumn("Spaces"),
771
+ "Datasets": st.column_config.TextColumn("Datasets")
772
  },
773
  use_container_width=True,
774
  hide_index=False
775
  )
776
 
777
  with tab2:
778
+ # Show accounts sorted by Spaces count (within top 100 by commits)
779
+ st.markdown('<div class="subheader"><h3>๐Ÿš€ Spaces Ranking (Top 100 Contributors)</h3></div>', unsafe_allow_html=True)
780
+ st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">Ranked by spaces count among top 100 contributors by commits</p>', unsafe_allow_html=True)
781
 
782
  # Create a data frame for the Spaces table with medals for top 3
783
  if top_owners_spaces:
 
807
  hide_index=False
808
  )
809
 
810
+ # Display accounts sorted by Models count (within top 100 by commits)
811
+ st.markdown('<div class="subheader"><h3>๐Ÿง  Models Ranking (Top 100 Contributors)</h3></div>', unsafe_allow_html=True)
812
+ st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">Ranked by models count among top 100 contributors by commits</p>', unsafe_allow_html=True)
813
 
814
  # Create a data frame for the Models table with medals for top 3
815
  if top_owners_models:
 
898
  st.markdown(f'<div style="background-color: #F3E5F5; padding: 20px; border-radius: 10px; border-left: 5px solid #9C27B0;">'
899
  f'<h3 style="color: #9C27B0;">About This Analysis</h3>'
900
  f'<p>This dashboard analyzes {username}\'s contributions to Hugging Face in {selected_year}, including models, datasets, and spaces.</p>'
901
+ f'<p style="font-style: italic; font-size: 12px;">* Overall rankings are based on commit count. Space/Model rankings are within top 100 contributors.</p>'
902
  f'</div>', unsafe_allow_html=True)
903
 
904
  with st.spinner(f"Fetching detailed contribution data for {username}..."):
 
916
 
917
  # Create a prominent ranking display
918
  st.markdown(f'<div style="background-color: #FFF8E1; padding: 20px; border-radius: 10px; border-left: 5px solid #FFC107; margin: 1rem 0;">'
919
+ f'<h2 style="color: #FFA000; text-align: center;">๐Ÿ† Ranked #{overall_rank} in Top 100 Contributors by Commits</h2>'
 
920
  f'</div>', unsafe_allow_html=True)
921
 
922
  # Find user in spaces ranking
 
1025
 
1026
  # Process repos in chunks
1027
  chunk_size = 5
 
1028
  all_commit_dates = []
1029
 
1030
  for i in range(0, len(repo_ids), chunk_size):
1031
  chunk = repo_ids[i:i + chunk_size]
1032
  with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor:
1033
+ future_to_repo = {}
1034
+ for repo_id in chunk:
1035
+ future = executor.submit(fetch_commits_for_repo, repo_id, kind, username, selected_year)
1036
+ add_script_run_ctx(future)
1037
+ future_to_repo[future] = repo_id
1038
+
1039
  for future in as_completed(future_to_repo):
1040
  repo_commits, repo_count = future.result()
1041
  if repo_commits:
1042
  all_commit_dates.extend(repo_commits)
 
1043
 
1044
  # Update progress for all types
1045
  progress_per_type = 1.0 / len(types_to_fetch)
 
1048
  progress_bar.progress(overall_progress)
1049
 
1050
  commits_by_type[kind] = all_commit_dates
1051
+ commit_counts_by_type[kind] = len(all_commit_dates)
1052
 
1053
  except Exception as e:
1054
  st.warning(f"Error fetching {kind}s for {username}: {str(e)}")