Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ import time
|
|
10 |
import requests
|
11 |
from collections import Counter
|
12 |
import numpy as np
|
|
|
13 |
|
14 |
st.set_page_config(page_title="HF Contributions", layout="wide", initial_sidebar_state="expanded")
|
15 |
|
@@ -213,9 +214,9 @@ def get_user_commit_stats(username):
|
|
213 |
"score": 0
|
214 |
}
|
215 |
|
216 |
-
# Enhanced function to get trending accounts
|
217 |
@st.cache_data(ttl=3600) # Cache for 1 hour
|
218 |
-
def
|
219 |
try:
|
220 |
# First, get top accounts by model/space count
|
221 |
spaces_response = requests.get("https://huggingface.co/api/spaces",
|
@@ -264,30 +265,23 @@ def get_trending_accounts_with_commits(limit=100):
|
|
264 |
for owner, _ in top_model_owners[:100]:
|
265 |
unique_users.add(owner)
|
266 |
|
267 |
-
#
|
268 |
-
|
269 |
-
progress_bar = st.progress(0)
|
270 |
-
progress_text.text(f"Analyzing top contributors... (0/{len(unique_users)})")
|
271 |
-
|
272 |
-
# Fetch commit stats for all unique users
|
273 |
user_stats = []
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
# Clear progress indicators
|
287 |
-
progress_text.empty()
|
288 |
-
progress_bar.empty()
|
289 |
|
290 |
-
# Sort by
|
291 |
user_stats.sort(key=lambda x: x["score"], reverse=True)
|
292 |
|
293 |
# Extract rankings
|
@@ -358,10 +352,12 @@ def get_commit_events(username, kind=None, selected_year=None):
|
|
358 |
for i in range(0, len(repo_ids), chunk_size):
|
359 |
chunk = repo_ids[i:i + chunk_size]
|
360 |
with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor:
|
361 |
-
future_to_repo = {
|
362 |
-
|
363 |
-
|
364 |
-
|
|
|
|
|
365 |
for future in as_completed(future_to_repo):
|
366 |
repo_commits, repo_count = future.result()
|
367 |
if repo_commits: # Only extend if we got commits
|
@@ -692,84 +688,9 @@ def simulate_follower_data(username, spaces_count, models_count, total_commits):
|
|
692 |
|
693 |
return fig
|
694 |
|
695 |
-
# Function to create ranking position visualization
|
696 |
-
def create_ranking_chart(username, overall_rank, spaces_rank, models_rank):
|
697 |
-
if not (overall_rank or spaces_rank or models_rank):
|
698 |
-
return None
|
699 |
-
|
700 |
-
# Create a horizontal bar chart for rankings with improved styling
|
701 |
-
fig, ax = plt.subplots(figsize=(12, 5), facecolor='#F8F9FA')
|
702 |
-
|
703 |
-
categories = []
|
704 |
-
positions = []
|
705 |
-
colors = []
|
706 |
-
rank_values = []
|
707 |
-
|
708 |
-
if overall_rank:
|
709 |
-
categories.append('Overall')
|
710 |
-
positions.append(101 - overall_rank) # Invert rank for visualization (higher is better)
|
711 |
-
colors.append('#673AB7')
|
712 |
-
rank_values.append(overall_rank)
|
713 |
-
|
714 |
-
if spaces_rank:
|
715 |
-
categories.append('Spaces')
|
716 |
-
positions.append(101 - spaces_rank)
|
717 |
-
colors.append('#2196F3')
|
718 |
-
rank_values.append(spaces_rank)
|
719 |
-
|
720 |
-
if models_rank:
|
721 |
-
categories.append('Models')
|
722 |
-
positions.append(101 - models_rank)
|
723 |
-
colors.append('#FF9800')
|
724 |
-
rank_values.append(models_rank)
|
725 |
-
|
726 |
-
# Create horizontal bars with enhanced styling
|
727 |
-
bars = ax.barh(categories, positions, color=colors, alpha=0.8, height=0.6,
|
728 |
-
edgecolor='white', linewidth=1.5)
|
729 |
-
|
730 |
-
# Add rank values as text with improved styling
|
731 |
-
for i, bar in enumerate(bars):
|
732 |
-
ax.text(bar.get_width() + 2, bar.get_y() + bar.get_height()/2,
|
733 |
-
f'Rank #{rank_values[i]}', va='center', fontsize=12,
|
734 |
-
fontweight='bold', color=colors[i])
|
735 |
-
|
736 |
-
# Set chart properties with enhanced styling
|
737 |
-
ax.set_xlim(0, 105)
|
738 |
-
ax.set_title(f"Ranking Positions for {username} (Top 100)", fontsize=18, pad=20, fontweight='bold')
|
739 |
-
ax.set_xlabel("Percentile (higher is better)", fontsize=14, labelpad=10)
|
740 |
-
|
741 |
-
# Add explanatory text
|
742 |
-
ax.text(50, -0.6, "β Lower rank (higher number) | Higher rank (lower number) β",
|
743 |
-
ha='center', va='center', fontsize=10, fontweight='bold', color='#666666')
|
744 |
-
|
745 |
-
# Add a vertical line at 90th percentile to highlight top 10 with improved styling
|
746 |
-
ax.axvline(x=90, color='#FF5252', linestyle='--', alpha=0.7, linewidth=2)
|
747 |
-
ax.text(92, len(categories)/2, 'Top 10', color='#D32F2F', fontsize=12,
|
748 |
-
rotation=90, va='center', fontweight='bold')
|
749 |
-
|
750 |
-
# Style the chart borders and background
|
751 |
-
ax.spines['top'].set_visible(False)
|
752 |
-
ax.spines['right'].set_visible(False)
|
753 |
-
ax.spines['left'].set_linewidth(0.5)
|
754 |
-
ax.spines['bottom'].set_linewidth(0.5)
|
755 |
-
|
756 |
-
# Adjust tick parameters for better look
|
757 |
-
ax.tick_params(axis='x', labelsize=12)
|
758 |
-
ax.tick_params(axis='y', labelsize=14, pad=5)
|
759 |
-
|
760 |
-
# Add grid for better readability
|
761 |
-
ax.grid(axis='x', linestyle='--', alpha=0.5, color='#CCCCCC')
|
762 |
-
ax.set_axisbelow(True) # Grid lines behind bars
|
763 |
-
|
764 |
-
# Invert x-axis to show ranking position more intuitively
|
765 |
-
ax.invert_xaxis()
|
766 |
-
|
767 |
-
plt.tight_layout()
|
768 |
-
return fig
|
769 |
-
|
770 |
# Fetch trending accounts with a loading spinner (do this once at the beginning)
|
771 |
-
with st.spinner("Loading
|
772 |
-
trending_accounts, top_owners_spaces, top_owners_models, user_stats =
|
773 |
|
774 |
# Sidebar
|
775 |
with st.sidebar:
|
@@ -777,14 +698,14 @@ with st.sidebar:
|
|
777 |
|
778 |
# Create tabs for rankings
|
779 |
tab1, tab2 = st.tabs([
|
780 |
-
"Top
|
781 |
-
"
|
782 |
])
|
783 |
|
784 |
with tab1:
|
785 |
-
# Show combined trending accounts list
|
786 |
-
st.markdown('<div class="subheader"><h3>π₯ Top 100 Contributors
|
787 |
-
st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">
|
788 |
|
789 |
# Create a data frame for the table
|
790 |
if user_stats:
|
@@ -802,15 +723,14 @@ with st.sidebar:
|
|
802 |
|
803 |
overall_data.append([
|
804 |
f"{rank_display}{stat['username']}",
|
805 |
-
str(stat['estimated_commits']),
|
806 |
str(stat['models']),
|
807 |
str(stat['spaces']),
|
808 |
-
str(stat['
|
809 |
])
|
810 |
|
811 |
ranking_data_overall = pd.DataFrame(
|
812 |
overall_data,
|
813 |
-
columns=["Contributor", "
|
814 |
)
|
815 |
ranking_data_overall.index = ranking_data_overall.index + 1 # Start index from 1 for ranking
|
816 |
|
@@ -819,10 +739,9 @@ with st.sidebar:
|
|
819 |
height=900, # μ½ 30ν μ λ 보μ΄λλ‘ ν½μ
λ¨μ λμ΄ μ€μ
|
820 |
column_config={
|
821 |
"Contributor": st.column_config.TextColumn("Contributor"),
|
822 |
-
"Total Commits": st.column_config.TextColumn("Total Commits"),
|
823 |
"Models": st.column_config.TextColumn("Models"),
|
824 |
"Spaces": st.column_config.TextColumn("Spaces"),
|
825 |
-
"
|
826 |
},
|
827 |
use_container_width=True,
|
828 |
hide_index=False
|
@@ -939,11 +858,9 @@ if username:
|
|
939 |
# Create a header card with contributor info
|
940 |
header_col1, header_col2 = st.columns([1, 2])
|
941 |
with header_col1:
|
942 |
-
commits_display = f"Est. Commits: {user_stat['estimated_commits']}" if user_stat else "Est. Commits: N/A"
|
943 |
st.markdown(f'<div style="background-color: #E3F2FD; padding: 20px; border-radius: 10px; border-left: 5px solid #1E88E5;">'
|
944 |
f'<h2 style="color: #1E88E5;">π€ {username}</h2>'
|
945 |
f'<p style="font-size: 16px;">Analyzing contributions for {selected_year}</p>'
|
946 |
-
f'<p style="font-size: 14px; font-weight: bold;">{commits_display}</p>'
|
947 |
f'<p><a href="https://huggingface.co/{username}" target="_blank" style="color: #1E88E5; font-weight: bold;">View Profile</a></p>'
|
948 |
f'</div>', unsafe_allow_html=True)
|
949 |
|
@@ -952,7 +869,7 @@ if username:
|
|
952 |
st.markdown(f'<div style="background-color: #F3E5F5; padding: 20px; border-radius: 10px; border-left: 5px solid #9C27B0;">'
|
953 |
f'<h3 style="color: #9C27B0;">About This Analysis</h3>'
|
954 |
f'<p>This dashboard analyzes {username}\'s contributions to Hugging Face in {selected_year}, including models, datasets, and spaces.</p>'
|
955 |
-
f'<p style="font-style: italic; font-size: 12px;">*
|
956 |
f'</div>', unsafe_allow_html=True)
|
957 |
|
958 |
with st.spinner(f"Fetching detailed contribution data for {username}..."):
|
@@ -970,7 +887,8 @@ if username:
|
|
970 |
|
971 |
# Create a prominent ranking display
|
972 |
st.markdown(f'<div style="background-color: #FFF8E1; padding: 20px; border-radius: 10px; border-left: 5px solid #FFC107; margin: 1rem 0;">'
|
973 |
-
f'<h2 style="color: #FFA000; text-align: center;"
|
|
|
974 |
f'</div>', unsafe_allow_html=True)
|
975 |
|
976 |
# Find user in spaces ranking
|
@@ -987,10 +905,56 @@ if username:
|
|
987 |
models_count = count
|
988 |
break
|
989 |
|
990 |
-
# Display ranking visualization
|
991 |
-
|
992 |
-
|
993 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
994 |
|
995 |
# Create a dictionary to store commits by type
|
996 |
commits_by_type = {}
|
@@ -1163,12 +1127,16 @@ if username:
|
|
1163 |
f'</ul>', unsafe_allow_html=True)
|
1164 |
|
1165 |
# Add ranking context if available
|
1166 |
-
if
|
1167 |
-
percentile = 100 - overall_rank
|
1168 |
st.markdown(f'<div style="margin-top: 20px;">'
|
1169 |
-
f'<h3 style="color: #1E88E5; border-bottom: 1px solid #E0E0E0; padding-bottom: 10px;">
|
1170 |
-
f'<ul style="list-style-type: none; padding-left: 5px;">'
|
1171 |
-
|
|
|
|
|
|
|
|
|
|
|
1172 |
|
1173 |
badge_html = '<div style="margin: 20px 0;">'
|
1174 |
|
|
|
10 |
import requests
|
11 |
from collections import Counter
|
12 |
import numpy as np
|
13 |
+
from streamlit.runtime.scriptrunner import add_script_run_ctx
|
14 |
|
15 |
st.set_page_config(page_title="HF Contributions", layout="wide", initial_sidebar_state="expanded")
|
16 |
|
|
|
214 |
"score": 0
|
215 |
}
|
216 |
|
217 |
+
# Enhanced function to get trending accounts (simplified without commit fetching)
|
218 |
@st.cache_data(ttl=3600) # Cache for 1 hour
|
219 |
+
def get_trending_accounts_simple(limit=100):
|
220 |
try:
|
221 |
# First, get top accounts by model/space count
|
222 |
spaces_response = requests.get("https://huggingface.co/api/spaces",
|
|
|
265 |
for owner, _ in top_model_owners[:100]:
|
266 |
unique_users.add(owner)
|
267 |
|
268 |
+
# For now, use a simple ranking based on total repos
|
269 |
+
# (We'll fetch commits only for the selected user)
|
|
|
|
|
|
|
|
|
270 |
user_stats = []
|
271 |
+
for user in unique_users:
|
272 |
+
models_count = next((count for owner, count in top_model_owners if owner == user), 0)
|
273 |
+
spaces_count = next((count for owner, count in top_space_owners if owner == user), 0)
|
274 |
+
|
275 |
+
user_stats.append({
|
276 |
+
"username": user,
|
277 |
+
"models": models_count,
|
278 |
+
"spaces": spaces_count,
|
279 |
+
"datasets": 0, # We'll skip datasets for initial ranking
|
280 |
+
"estimated_commits": 0, # Will be calculated later for selected user
|
281 |
+
"score": models_count + spaces_count # Temporary score for initial display
|
282 |
+
})
|
|
|
|
|
|
|
283 |
|
284 |
+
# Sort by total repo count for initial display
|
285 |
user_stats.sort(key=lambda x: x["score"], reverse=True)
|
286 |
|
287 |
# Extract rankings
|
|
|
352 |
for i in range(0, len(repo_ids), chunk_size):
|
353 |
chunk = repo_ids[i:i + chunk_size]
|
354 |
with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor:
|
355 |
+
future_to_repo = {}
|
356 |
+
for repo_id in chunk:
|
357 |
+
future = executor.submit(fetch_commits_for_repo, repo_id, k, username, selected_year)
|
358 |
+
add_script_run_ctx(future)
|
359 |
+
future_to_repo[future] = repo_id
|
360 |
+
|
361 |
for future in as_completed(future_to_repo):
|
362 |
repo_commits, repo_count = future.result()
|
363 |
if repo_commits: # Only extend if we got commits
|
|
|
688 |
|
689 |
return fig
|
690 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
691 |
# Fetch trending accounts with a loading spinner (do this once at the beginning)
|
692 |
+
with st.spinner("Loading top contributors..."):
|
693 |
+
trending_accounts, top_owners_spaces, top_owners_models, user_stats = get_trending_accounts_simple(limit=100)
|
694 |
|
695 |
# Sidebar
|
696 |
with st.sidebar:
|
|
|
698 |
|
699 |
# Create tabs for rankings
|
700 |
tab1, tab2 = st.tabs([
|
701 |
+
"Top Contributors",
|
702 |
+
"Repository Rankings"
|
703 |
])
|
704 |
|
705 |
with tab1:
|
706 |
+
# Show combined trending accounts list
|
707 |
+
st.markdown('<div class="subheader"><h3>π₯ Top 100 Contributors</h3></div>', unsafe_allow_html=True)
|
708 |
+
st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">Initial ranking by total repositories. Select a user to see commit-based analysis.</p>', unsafe_allow_html=True)
|
709 |
|
710 |
# Create a data frame for the table
|
711 |
if user_stats:
|
|
|
723 |
|
724 |
overall_data.append([
|
725 |
f"{rank_display}{stat['username']}",
|
|
|
726 |
str(stat['models']),
|
727 |
str(stat['spaces']),
|
728 |
+
str(stat['models'] + stat['spaces']) # Total repos
|
729 |
])
|
730 |
|
731 |
ranking_data_overall = pd.DataFrame(
|
732 |
overall_data,
|
733 |
+
columns=["Contributor", "Models", "Spaces", "Total Repos"]
|
734 |
)
|
735 |
ranking_data_overall.index = ranking_data_overall.index + 1 # Start index from 1 for ranking
|
736 |
|
|
|
739 |
height=900, # μ½ 30ν μ λ 보μ΄λλ‘ ν½μ
λ¨μ λμ΄ μ€μ
|
740 |
column_config={
|
741 |
"Contributor": st.column_config.TextColumn("Contributor"),
|
|
|
742 |
"Models": st.column_config.TextColumn("Models"),
|
743 |
"Spaces": st.column_config.TextColumn("Spaces"),
|
744 |
+
"Total Repos": st.column_config.TextColumn("Total Repos")
|
745 |
},
|
746 |
use_container_width=True,
|
747 |
hide_index=False
|
|
|
858 |
# Create a header card with contributor info
|
859 |
header_col1, header_col2 = st.columns([1, 2])
|
860 |
with header_col1:
|
|
|
861 |
st.markdown(f'<div style="background-color: #E3F2FD; padding: 20px; border-radius: 10px; border-left: 5px solid #1E88E5;">'
|
862 |
f'<h2 style="color: #1E88E5;">π€ {username}</h2>'
|
863 |
f'<p style="font-size: 16px;">Analyzing contributions for {selected_year}</p>'
|
|
|
864 |
f'<p><a href="https://huggingface.co/{username}" target="_blank" style="color: #1E88E5; font-weight: bold;">View Profile</a></p>'
|
865 |
f'</div>', unsafe_allow_html=True)
|
866 |
|
|
|
869 |
st.markdown(f'<div style="background-color: #F3E5F5; padding: 20px; border-radius: 10px; border-left: 5px solid #9C27B0;">'
|
870 |
f'<h3 style="color: #9C27B0;">About This Analysis</h3>'
|
871 |
f'<p>This dashboard analyzes {username}\'s contributions to Hugging Face in {selected_year}, including models, datasets, and spaces.</p>'
|
872 |
+
f'<p style="font-style: italic; font-size: 12px;">* Detailed commit analysis will be calculated after selection.</p>'
|
873 |
f'</div>', unsafe_allow_html=True)
|
874 |
|
875 |
with st.spinner(f"Fetching detailed contribution data for {username}..."):
|
|
|
887 |
|
888 |
# Create a prominent ranking display
|
889 |
st.markdown(f'<div style="background-color: #FFF8E1; padding: 20px; border-radius: 10px; border-left: 5px solid #FFC107; margin: 1rem 0;">'
|
890 |
+
f'<h2 style="color: #FFA000; text-align: center;">π Featured Contributor</h2>'
|
891 |
+
f'<p style="text-align: center; font-size: 16px;">Analyzing detailed commit statistics...</p>'
|
892 |
f'</div>', unsafe_allow_html=True)
|
893 |
|
894 |
# Find user in spaces ranking
|
|
|
905 |
models_count = count
|
906 |
break
|
907 |
|
908 |
+
# Display ranking visualization only if user has rankings
|
909 |
+
if spaces_rank or models_rank:
|
910 |
+
# Create custom ranking chart for spaces and models only
|
911 |
+
fig, ax = plt.subplots(figsize=(12, 5), facecolor='#F8F9FA')
|
912 |
+
|
913 |
+
categories = []
|
914 |
+
positions = []
|
915 |
+
colors = []
|
916 |
+
rank_values = []
|
917 |
+
|
918 |
+
if spaces_rank:
|
919 |
+
categories.append('Spaces')
|
920 |
+
positions.append(101 - spaces_rank)
|
921 |
+
colors.append('#2196F3')
|
922 |
+
rank_values.append(spaces_rank)
|
923 |
+
|
924 |
+
if models_rank:
|
925 |
+
categories.append('Models')
|
926 |
+
positions.append(101 - models_rank)
|
927 |
+
colors.append('#FF9800')
|
928 |
+
rank_values.append(models_rank)
|
929 |
+
|
930 |
+
if categories: # Only create chart if there are rankings
|
931 |
+
bars = ax.barh(categories, positions, color=colors, alpha=0.8, height=0.6,
|
932 |
+
edgecolor='white', linewidth=1.5)
|
933 |
+
|
934 |
+
for i, bar in enumerate(bars):
|
935 |
+
ax.text(bar.get_width() + 2, bar.get_y() + bar.get_height()/2,
|
936 |
+
f'Rank #{rank_values[i]}', va='center', fontsize=12,
|
937 |
+
fontweight='bold', color=colors[i])
|
938 |
+
|
939 |
+
ax.set_xlim(0, 105)
|
940 |
+
ax.set_title(f"Repository Rankings for {username} (Top 100)", fontsize=18, pad=20, fontweight='bold')
|
941 |
+
ax.set_xlabel("Percentile (higher is better)", fontsize=14, labelpad=10)
|
942 |
+
|
943 |
+
ax.spines['top'].set_visible(False)
|
944 |
+
ax.spines['right'].set_visible(False)
|
945 |
+
ax.spines['left'].set_linewidth(0.5)
|
946 |
+
ax.spines['bottom'].set_linewidth(0.5)
|
947 |
+
|
948 |
+
ax.tick_params(axis='x', labelsize=12)
|
949 |
+
ax.tick_params(axis='y', labelsize=14, pad=5)
|
950 |
+
|
951 |
+
ax.grid(axis='x', linestyle='--', alpha=0.5, color='#CCCCCC')
|
952 |
+
ax.set_axisbelow(True)
|
953 |
+
|
954 |
+
ax.invert_xaxis()
|
955 |
+
|
956 |
+
plt.tight_layout()
|
957 |
+
st.pyplot(fig)
|
958 |
|
959 |
# Create a dictionary to store commits by type
|
960 |
commits_by_type = {}
|
|
|
1127 |
f'</ul>', unsafe_allow_html=True)
|
1128 |
|
1129 |
# Add ranking context if available
|
1130 |
+
if spaces_rank or models_rank:
|
|
|
1131 |
st.markdown(f'<div style="margin-top: 20px;">'
|
1132 |
+
f'<h3 style="color: #1E88E5; border-bottom: 1px solid #E0E0E0; padding-bottom: 10px;">Repository Rankings</h3>'
|
1133 |
+
f'<ul style="list-style-type: none; padding-left: 5px;">', unsafe_allow_html=True)
|
1134 |
+
|
1135 |
+
if spaces_rank:
|
1136 |
+
st.markdown(f'<li style="margin: 15px 0; font-size: 16px;">π <strong>Spaces Ranking:</strong> #{spaces_rank} with {spaces_count} spaces</li>', unsafe_allow_html=True)
|
1137 |
+
|
1138 |
+
if models_rank:
|
1139 |
+
st.markdown(f'<li style="margin: 15px 0; font-size: 16px;">π§ <strong>Models Ranking:</strong> #{models_rank} with {models_count} models</li>', unsafe_allow_html=True)
|
1140 |
|
1141 |
badge_html = '<div style="margin: 20px 0;">'
|
1142 |
|