openfree commited on
Commit
04927f0
Β·
verified Β·
1 Parent(s): 59ca104

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -128
app.py CHANGED
@@ -10,6 +10,7 @@ import time
10
  import requests
11
  from collections import Counter
12
  import numpy as np
 
13
 
14
  st.set_page_config(page_title="HF Contributions", layout="wide", initial_sidebar_state="expanded")
15
 
@@ -213,9 +214,9 @@ def get_user_commit_stats(username):
213
  "score": 0
214
  }
215
 
216
- # Enhanced function to get trending accounts with commit-based ranking
217
  @st.cache_data(ttl=3600) # Cache for 1 hour
218
- def get_trending_accounts_with_commits(limit=100):
219
  try:
220
  # First, get top accounts by model/space count
221
  spaces_response = requests.get("https://huggingface.co/api/spaces",
@@ -264,30 +265,23 @@ def get_trending_accounts_with_commits(limit=100):
264
  for owner, _ in top_model_owners[:100]:
265
  unique_users.add(owner)
266
 
267
- # Create progress bar for fetching commit stats
268
- progress_text = st.empty()
269
- progress_bar = st.progress(0)
270
- progress_text.text(f"Analyzing top contributors... (0/{len(unique_users)})")
271
-
272
- # Fetch commit stats for all unique users
273
  user_stats = []
274
- with ThreadPoolExecutor(max_workers=5) as executor:
275
- future_to_user = {executor.submit(get_user_commit_stats, user): user for user in unique_users}
276
- completed = 0
277
- for future in as_completed(future_to_user):
278
- stats = future.result()
279
- if stats["score"] > 0: # Only include users with some activity
280
- user_stats.append(stats)
281
- completed += 1
282
- progress = completed / len(unique_users)
283
- progress_bar.progress(progress)
284
- progress_text.text(f"Analyzing top contributors... ({completed}/{len(unique_users)})")
285
-
286
- # Clear progress indicators
287
- progress_text.empty()
288
- progress_bar.empty()
289
 
290
- # Sort by score (commits only) for overall ranking
291
  user_stats.sort(key=lambda x: x["score"], reverse=True)
292
 
293
  # Extract rankings
@@ -358,10 +352,12 @@ def get_commit_events(username, kind=None, selected_year=None):
358
  for i in range(0, len(repo_ids), chunk_size):
359
  chunk = repo_ids[i:i + chunk_size]
360
  with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor:
361
- future_to_repo = {
362
- executor.submit(fetch_commits_for_repo, repo_id, k, username, selected_year): repo_id
363
- for repo_id in chunk
364
- }
 
 
365
  for future in as_completed(future_to_repo):
366
  repo_commits, repo_count = future.result()
367
  if repo_commits: # Only extend if we got commits
@@ -692,84 +688,9 @@ def simulate_follower_data(username, spaces_count, models_count, total_commits):
692
 
693
  return fig
694
 
695
- # Function to create ranking position visualization
696
- def create_ranking_chart(username, overall_rank, spaces_rank, models_rank):
697
- if not (overall_rank or spaces_rank or models_rank):
698
- return None
699
-
700
- # Create a horizontal bar chart for rankings with improved styling
701
- fig, ax = plt.subplots(figsize=(12, 5), facecolor='#F8F9FA')
702
-
703
- categories = []
704
- positions = []
705
- colors = []
706
- rank_values = []
707
-
708
- if overall_rank:
709
- categories.append('Overall')
710
- positions.append(101 - overall_rank) # Invert rank for visualization (higher is better)
711
- colors.append('#673AB7')
712
- rank_values.append(overall_rank)
713
-
714
- if spaces_rank:
715
- categories.append('Spaces')
716
- positions.append(101 - spaces_rank)
717
- colors.append('#2196F3')
718
- rank_values.append(spaces_rank)
719
-
720
- if models_rank:
721
- categories.append('Models')
722
- positions.append(101 - models_rank)
723
- colors.append('#FF9800')
724
- rank_values.append(models_rank)
725
-
726
- # Create horizontal bars with enhanced styling
727
- bars = ax.barh(categories, positions, color=colors, alpha=0.8, height=0.6,
728
- edgecolor='white', linewidth=1.5)
729
-
730
- # Add rank values as text with improved styling
731
- for i, bar in enumerate(bars):
732
- ax.text(bar.get_width() + 2, bar.get_y() + bar.get_height()/2,
733
- f'Rank #{rank_values[i]}', va='center', fontsize=12,
734
- fontweight='bold', color=colors[i])
735
-
736
- # Set chart properties with enhanced styling
737
- ax.set_xlim(0, 105)
738
- ax.set_title(f"Ranking Positions for {username} (Top 100)", fontsize=18, pad=20, fontweight='bold')
739
- ax.set_xlabel("Percentile (higher is better)", fontsize=14, labelpad=10)
740
-
741
- # Add explanatory text
742
- ax.text(50, -0.6, "← Lower rank (higher number) | Higher rank (lower number) β†’",
743
- ha='center', va='center', fontsize=10, fontweight='bold', color='#666666')
744
-
745
- # Add a vertical line at 90th percentile to highlight top 10 with improved styling
746
- ax.axvline(x=90, color='#FF5252', linestyle='--', alpha=0.7, linewidth=2)
747
- ax.text(92, len(categories)/2, 'Top 10', color='#D32F2F', fontsize=12,
748
- rotation=90, va='center', fontweight='bold')
749
-
750
- # Style the chart borders and background
751
- ax.spines['top'].set_visible(False)
752
- ax.spines['right'].set_visible(False)
753
- ax.spines['left'].set_linewidth(0.5)
754
- ax.spines['bottom'].set_linewidth(0.5)
755
-
756
- # Adjust tick parameters for better look
757
- ax.tick_params(axis='x', labelsize=12)
758
- ax.tick_params(axis='y', labelsize=14, pad=5)
759
-
760
- # Add grid for better readability
761
- ax.grid(axis='x', linestyle='--', alpha=0.5, color='#CCCCCC')
762
- ax.set_axisbelow(True) # Grid lines behind bars
763
-
764
- # Invert x-axis to show ranking position more intuitively
765
- ax.invert_xaxis()
766
-
767
- plt.tight_layout()
768
- return fig
769
-
770
  # Fetch trending accounts with a loading spinner (do this once at the beginning)
771
- with st.spinner("Loading and analyzing top contributors... This may take a few moments."):
772
- trending_accounts, top_owners_spaces, top_owners_models, user_stats = get_trending_accounts_with_commits(limit=100)
773
 
774
  # Sidebar
775
  with st.sidebar:
@@ -777,14 +698,14 @@ with st.sidebar:
777
 
778
  # Create tabs for rankings
779
  tab1, tab2 = st.tabs([
780
- "Top 100 Overall",
781
- "Top Spaces & Models"
782
  ])
783
 
784
  with tab1:
785
- # Show combined trending accounts list with commit-based ranking
786
- st.markdown('<div class="subheader"><h3>πŸ”₯ Top 100 Contributors by Commits</h3></div>', unsafe_allow_html=True)
787
- st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">Ranked by total commit count</p>', unsafe_allow_html=True)
788
 
789
  # Create a data frame for the table
790
  if user_stats:
@@ -802,15 +723,14 @@ with st.sidebar:
802
 
803
  overall_data.append([
804
  f"{rank_display}{stat['username']}",
805
- str(stat['estimated_commits']),
806
  str(stat['models']),
807
  str(stat['spaces']),
808
- str(stat['datasets'])
809
  ])
810
 
811
  ranking_data_overall = pd.DataFrame(
812
  overall_data,
813
- columns=["Contributor", "Total Commits", "Models", "Spaces", "Datasets"]
814
  )
815
  ranking_data_overall.index = ranking_data_overall.index + 1 # Start index from 1 for ranking
816
 
@@ -819,10 +739,9 @@ with st.sidebar:
819
  height=900, # μ•½ 30ν–‰ 정도 보이도둝 ν”½μ…€ λ‹¨μœ„ 높이 μ„€μ •
820
  column_config={
821
  "Contributor": st.column_config.TextColumn("Contributor"),
822
- "Total Commits": st.column_config.TextColumn("Total Commits"),
823
  "Models": st.column_config.TextColumn("Models"),
824
  "Spaces": st.column_config.TextColumn("Spaces"),
825
- "Datasets": st.column_config.TextColumn("Datasets")
826
  },
827
  use_container_width=True,
828
  hide_index=False
@@ -939,11 +858,9 @@ if username:
939
  # Create a header card with contributor info
940
  header_col1, header_col2 = st.columns([1, 2])
941
  with header_col1:
942
- commits_display = f"Est. Commits: {user_stat['estimated_commits']}" if user_stat else "Est. Commits: N/A"
943
  st.markdown(f'<div style="background-color: #E3F2FD; padding: 20px; border-radius: 10px; border-left: 5px solid #1E88E5;">'
944
  f'<h2 style="color: #1E88E5;">πŸ‘€ {username}</h2>'
945
  f'<p style="font-size: 16px;">Analyzing contributions for {selected_year}</p>'
946
- f'<p style="font-size: 14px; font-weight: bold;">{commits_display}</p>'
947
  f'<p><a href="https://huggingface.co/{username}" target="_blank" style="color: #1E88E5; font-weight: bold;">View Profile</a></p>'
948
  f'</div>', unsafe_allow_html=True)
949
 
@@ -952,7 +869,7 @@ if username:
952
  st.markdown(f'<div style="background-color: #F3E5F5; padding: 20px; border-radius: 10px; border-left: 5px solid #9C27B0;">'
953
  f'<h3 style="color: #9C27B0;">About This Analysis</h3>'
954
  f'<p>This dashboard analyzes {username}\'s contributions to Hugging Face in {selected_year}, including models, datasets, and spaces.</p>'
955
- f'<p style="font-style: italic; font-size: 12px;">* Overall rankings are based on total commit count. Space/Model rankings are based on repository count.</p>'
956
  f'</div>', unsafe_allow_html=True)
957
 
958
  with st.spinner(f"Fetching detailed contribution data for {username}..."):
@@ -970,7 +887,8 @@ if username:
970
 
971
  # Create a prominent ranking display
972
  st.markdown(f'<div style="background-color: #FFF8E1; padding: 20px; border-radius: 10px; border-left: 5px solid #FFC107; margin: 1rem 0;">'
973
- f'<h2 style="color: #FFA000; text-align: center;">πŸ† Ranked #{overall_rank} in Top Contributors</h2>'
 
974
  f'</div>', unsafe_allow_html=True)
975
 
976
  # Find user in spaces ranking
@@ -987,10 +905,56 @@ if username:
987
  models_count = count
988
  break
989
 
990
- # Display ranking visualization
991
- rank_chart = create_ranking_chart(username, overall_rank, spaces_rank, models_rank)
992
- if rank_chart:
993
- st.pyplot(rank_chart)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
994
 
995
  # Create a dictionary to store commits by type
996
  commits_by_type = {}
@@ -1163,12 +1127,16 @@ if username:
1163
  f'</ul>', unsafe_allow_html=True)
1164
 
1165
  # Add ranking context if available
1166
- if overall_rank:
1167
- percentile = 100 - overall_rank
1168
  st.markdown(f'<div style="margin-top: 20px;">'
1169
- f'<h3 style="color: #1E88E5; border-bottom: 1px solid #E0E0E0; padding-bottom: 10px;">Ranking Analysis</h3>'
1170
- f'<ul style="list-style-type: none; padding-left: 5px;">'
1171
- f'<li style="margin: 15px 0; font-size: 16px;">πŸ† <strong>Overall Ranking:</strong> #{overall_rank} (Top {percentile}% of contributors)</li>', unsafe_allow_html=True)
 
 
 
 
 
1172
 
1173
  badge_html = '<div style="margin: 20px 0;">'
1174
 
 
10
  import requests
11
  from collections import Counter
12
  import numpy as np
13
+ from streamlit.runtime.scriptrunner import add_script_run_ctx
14
 
15
  st.set_page_config(page_title="HF Contributions", layout="wide", initial_sidebar_state="expanded")
16
 
 
214
  "score": 0
215
  }
216
 
217
+ # Enhanced function to get trending accounts (simplified without commit fetching)
218
  @st.cache_data(ttl=3600) # Cache for 1 hour
219
+ def get_trending_accounts_simple(limit=100):
220
  try:
221
  # First, get top accounts by model/space count
222
  spaces_response = requests.get("https://huggingface.co/api/spaces",
 
265
  for owner, _ in top_model_owners[:100]:
266
  unique_users.add(owner)
267
 
268
+ # For now, use a simple ranking based on total repos
269
+ # (We'll fetch commits only for the selected user)
 
 
 
 
270
  user_stats = []
271
+ for user in unique_users:
272
+ models_count = next((count for owner, count in top_model_owners if owner == user), 0)
273
+ spaces_count = next((count for owner, count in top_space_owners if owner == user), 0)
274
+
275
+ user_stats.append({
276
+ "username": user,
277
+ "models": models_count,
278
+ "spaces": spaces_count,
279
+ "datasets": 0, # We'll skip datasets for initial ranking
280
+ "estimated_commits": 0, # Will be calculated later for selected user
281
+ "score": models_count + spaces_count # Temporary score for initial display
282
+ })
 
 
 
283
 
284
+ # Sort by total repo count for initial display
285
  user_stats.sort(key=lambda x: x["score"], reverse=True)
286
 
287
  # Extract rankings
 
352
  for i in range(0, len(repo_ids), chunk_size):
353
  chunk = repo_ids[i:i + chunk_size]
354
  with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor:
355
+ future_to_repo = {}
356
+ for repo_id in chunk:
357
+ future = executor.submit(fetch_commits_for_repo, repo_id, k, username, selected_year)
358
+ add_script_run_ctx(future)
359
+ future_to_repo[future] = repo_id
360
+
361
  for future in as_completed(future_to_repo):
362
  repo_commits, repo_count = future.result()
363
  if repo_commits: # Only extend if we got commits
 
688
 
689
  return fig
690
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
691
  # Fetch trending accounts with a loading spinner (do this once at the beginning)
692
+ with st.spinner("Loading top contributors..."):
693
+ trending_accounts, top_owners_spaces, top_owners_models, user_stats = get_trending_accounts_simple(limit=100)
694
 
695
  # Sidebar
696
  with st.sidebar:
 
698
 
699
  # Create tabs for rankings
700
  tab1, tab2 = st.tabs([
701
+ "Top Contributors",
702
+ "Repository Rankings"
703
  ])
704
 
705
  with tab1:
706
+ # Show combined trending accounts list
707
+ st.markdown('<div class="subheader"><h3>πŸ”₯ Top 100 Contributors</h3></div>', unsafe_allow_html=True)
708
+ st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">Initial ranking by total repositories. Select a user to see commit-based analysis.</p>', unsafe_allow_html=True)
709
 
710
  # Create a data frame for the table
711
  if user_stats:
 
723
 
724
  overall_data.append([
725
  f"{rank_display}{stat['username']}",
 
726
  str(stat['models']),
727
  str(stat['spaces']),
728
+ str(stat['models'] + stat['spaces']) # Total repos
729
  ])
730
 
731
  ranking_data_overall = pd.DataFrame(
732
  overall_data,
733
+ columns=["Contributor", "Models", "Spaces", "Total Repos"]
734
  )
735
  ranking_data_overall.index = ranking_data_overall.index + 1 # Start index from 1 for ranking
736
 
 
739
  height=900, # μ•½ 30ν–‰ 정도 보이도둝 ν”½μ…€ λ‹¨μœ„ 높이 μ„€μ •
740
  column_config={
741
  "Contributor": st.column_config.TextColumn("Contributor"),
 
742
  "Models": st.column_config.TextColumn("Models"),
743
  "Spaces": st.column_config.TextColumn("Spaces"),
744
+ "Total Repos": st.column_config.TextColumn("Total Repos")
745
  },
746
  use_container_width=True,
747
  hide_index=False
 
858
  # Create a header card with contributor info
859
  header_col1, header_col2 = st.columns([1, 2])
860
  with header_col1:
 
861
  st.markdown(f'<div style="background-color: #E3F2FD; padding: 20px; border-radius: 10px; border-left: 5px solid #1E88E5;">'
862
  f'<h2 style="color: #1E88E5;">πŸ‘€ {username}</h2>'
863
  f'<p style="font-size: 16px;">Analyzing contributions for {selected_year}</p>'
 
864
  f'<p><a href="https://huggingface.co/{username}" target="_blank" style="color: #1E88E5; font-weight: bold;">View Profile</a></p>'
865
  f'</div>', unsafe_allow_html=True)
866
 
 
869
  st.markdown(f'<div style="background-color: #F3E5F5; padding: 20px; border-radius: 10px; border-left: 5px solid #9C27B0;">'
870
  f'<h3 style="color: #9C27B0;">About This Analysis</h3>'
871
  f'<p>This dashboard analyzes {username}\'s contributions to Hugging Face in {selected_year}, including models, datasets, and spaces.</p>'
872
+ f'<p style="font-style: italic; font-size: 12px;">* Detailed commit analysis will be calculated after selection.</p>'
873
  f'</div>', unsafe_allow_html=True)
874
 
875
  with st.spinner(f"Fetching detailed contribution data for {username}..."):
 
887
 
888
  # Create a prominent ranking display
889
  st.markdown(f'<div style="background-color: #FFF8E1; padding: 20px; border-radius: 10px; border-left: 5px solid #FFC107; margin: 1rem 0;">'
890
+ f'<h2 style="color: #FFA000; text-align: center;">πŸ“Š Featured Contributor</h2>'
891
+ f'<p style="text-align: center; font-size: 16px;">Analyzing detailed commit statistics...</p>'
892
  f'</div>', unsafe_allow_html=True)
893
 
894
  # Find user in spaces ranking
 
905
  models_count = count
906
  break
907
 
908
+ # Display ranking visualization only if user has rankings
909
+ if spaces_rank or models_rank:
910
+ # Create custom ranking chart for spaces and models only
911
+ fig, ax = plt.subplots(figsize=(12, 5), facecolor='#F8F9FA')
912
+
913
+ categories = []
914
+ positions = []
915
+ colors = []
916
+ rank_values = []
917
+
918
+ if spaces_rank:
919
+ categories.append('Spaces')
920
+ positions.append(101 - spaces_rank)
921
+ colors.append('#2196F3')
922
+ rank_values.append(spaces_rank)
923
+
924
+ if models_rank:
925
+ categories.append('Models')
926
+ positions.append(101 - models_rank)
927
+ colors.append('#FF9800')
928
+ rank_values.append(models_rank)
929
+
930
+ if categories: # Only create chart if there are rankings
931
+ bars = ax.barh(categories, positions, color=colors, alpha=0.8, height=0.6,
932
+ edgecolor='white', linewidth=1.5)
933
+
934
+ for i, bar in enumerate(bars):
935
+ ax.text(bar.get_width() + 2, bar.get_y() + bar.get_height()/2,
936
+ f'Rank #{rank_values[i]}', va='center', fontsize=12,
937
+ fontweight='bold', color=colors[i])
938
+
939
+ ax.set_xlim(0, 105)
940
+ ax.set_title(f"Repository Rankings for {username} (Top 100)", fontsize=18, pad=20, fontweight='bold')
941
+ ax.set_xlabel("Percentile (higher is better)", fontsize=14, labelpad=10)
942
+
943
+ ax.spines['top'].set_visible(False)
944
+ ax.spines['right'].set_visible(False)
945
+ ax.spines['left'].set_linewidth(0.5)
946
+ ax.spines['bottom'].set_linewidth(0.5)
947
+
948
+ ax.tick_params(axis='x', labelsize=12)
949
+ ax.tick_params(axis='y', labelsize=14, pad=5)
950
+
951
+ ax.grid(axis='x', linestyle='--', alpha=0.5, color='#CCCCCC')
952
+ ax.set_axisbelow(True)
953
+
954
+ ax.invert_xaxis()
955
+
956
+ plt.tight_layout()
957
+ st.pyplot(fig)
958
 
959
  # Create a dictionary to store commits by type
960
  commits_by_type = {}
 
1127
  f'</ul>', unsafe_allow_html=True)
1128
 
1129
  # Add ranking context if available
1130
+ if spaces_rank or models_rank:
 
1131
  st.markdown(f'<div style="margin-top: 20px;">'
1132
+ f'<h3 style="color: #1E88E5; border-bottom: 1px solid #E0E0E0; padding-bottom: 10px;">Repository Rankings</h3>'
1133
+ f'<ul style="list-style-type: none; padding-left: 5px;">', unsafe_allow_html=True)
1134
+
1135
+ if spaces_rank:
1136
+ st.markdown(f'<li style="margin: 15px 0; font-size: 16px;">πŸš€ <strong>Spaces Ranking:</strong> #{spaces_rank} with {spaces_count} spaces</li>', unsafe_allow_html=True)
1137
+
1138
+ if models_rank:
1139
+ st.markdown(f'<li style="margin: 15px 0; font-size: 16px;">🧠 <strong>Models Ranking:</strong> #{models_rank} with {models_count} models</li>', unsafe_allow_html=True)
1140
 
1141
  badge_html = '<div style="margin: 20px 0;">'
1142