Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -214,9 +214,9 @@ def get_user_commit_stats(username):
|
|
214 |
"score": 0
|
215 |
}
|
216 |
|
217 |
-
# Enhanced function to get trending accounts
|
218 |
-
@st.cache_data(ttl=3600) # Cache for 1 hour
|
219 |
-
def
|
220 |
try:
|
221 |
# First, get top accounts by model/space count
|
222 |
spaces_response = requests.get("https://huggingface.co/api/spaces",
|
@@ -227,10 +227,9 @@ def get_trending_accounts_simple(limit=100):
|
|
227 |
timeout=30)
|
228 |
|
229 |
# Process spaces data
|
230 |
-
|
231 |
if spaces_response.status_code == 200:
|
232 |
spaces = spaces_response.json()
|
233 |
-
owner_counts_spaces = {}
|
234 |
for space in spaces:
|
235 |
if '/' in space.get('id', ''):
|
236 |
owner, _ = space.get('id', '').split('/', 1)
|
@@ -239,14 +238,11 @@ def get_trending_accounts_simple(limit=100):
|
|
239 |
|
240 |
if owner != 'None':
|
241 |
owner_counts_spaces[owner] = owner_counts_spaces.get(owner, 0) + 1
|
242 |
-
|
243 |
-
top_space_owners = sorted(owner_counts_spaces.items(), key=lambda x: x[1], reverse=True)[:limit]
|
244 |
|
245 |
# Process models data
|
246 |
-
|
247 |
if models_response.status_code == 200:
|
248 |
models = models_response.json()
|
249 |
-
owner_counts_models = {}
|
250 |
for model in models:
|
251 |
if '/' in model.get('id', ''):
|
252 |
owner, _ = model.get('id', '').split('/', 1)
|
@@ -255,51 +251,56 @@ def get_trending_accounts_simple(limit=100):
|
|
255 |
|
256 |
if owner != 'None':
|
257 |
owner_counts_models[owner] = owner_counts_models.get(owner, 0) + 1
|
258 |
-
|
259 |
-
top_model_owners = sorted(owner_counts_models.items(), key=lambda x: x[1], reverse=True)[:limit]
|
260 |
|
261 |
-
# Get
|
262 |
-
|
263 |
-
for owner,
|
264 |
-
|
265 |
-
for owner,
|
266 |
-
|
|
|
|
|
|
|
|
|
267 |
|
268 |
-
#
|
269 |
-
# (We'll fetch commits only for the selected user)
|
270 |
user_stats = []
|
271 |
-
for user in unique_users:
|
272 |
-
models_count = next((count for owner, count in top_model_owners if owner == user), 0)
|
273 |
-
spaces_count = next((count for owner, count in top_space_owners if owner == user), 0)
|
274 |
-
|
275 |
-
user_stats.append({
|
276 |
-
"username": user,
|
277 |
-
"models": models_count,
|
278 |
-
"spaces": spaces_count,
|
279 |
-
"datasets": 0, # We'll skip datasets for initial ranking
|
280 |
-
"estimated_commits": 0, # Will be calculated later for selected user
|
281 |
-
"score": models_count + spaces_count # Temporary score for initial display
|
282 |
-
})
|
283 |
|
284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
user_stats.sort(key=lambda x: x["score"], reverse=True)
|
286 |
|
287 |
-
#
|
288 |
-
|
|
|
289 |
|
290 |
-
# Create
|
291 |
-
spaces_rank_data = [(stat["username"], stat["spaces"]) for stat in
|
292 |
-
spaces_rank_data.sort(key=lambda x: x[1], reverse=True)
|
293 |
-
spaces_rank_data = spaces_rank_data[:limit]
|
294 |
|
295 |
-
models_rank_data = [(stat["username"], stat["models"]) for stat in
|
296 |
-
models_rank_data.sort(key=lambda x: x[1], reverse=True)
|
297 |
-
models_rank_data = models_rank_data[:limit]
|
298 |
|
299 |
-
return trending_authors, spaces_rank_data, models_rank_data,
|
300 |
|
301 |
except Exception as e:
|
302 |
-
|
303 |
fallback_authors = ["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"]
|
304 |
fallback_stats = [{"username": author, "models": 0, "spaces": 0, "datasets": 0, "estimated_commits": 0, "score": 0} for author in fallback_authors]
|
305 |
return fallback_authors, [(author, 0) for author in fallback_authors], [(author, 0) for author in fallback_authors], fallback_stats
|
@@ -688,9 +689,33 @@ def simulate_follower_data(username, spaces_count, models_count, total_commits):
|
|
688 |
|
689 |
return fig
|
690 |
|
691 |
-
# Fetch trending accounts with a loading spinner
|
692 |
-
|
693 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
694 |
|
695 |
# Sidebar
|
696 |
with st.sidebar:
|
@@ -698,14 +723,14 @@ with st.sidebar:
|
|
698 |
|
699 |
# Create tabs for rankings
|
700 |
tab1, tab2 = st.tabs([
|
701 |
-
"Top
|
702 |
-
"
|
703 |
])
|
704 |
|
705 |
with tab1:
|
706 |
-
# Show combined trending accounts list
|
707 |
-
st.markdown('<div class="subheader"><h3>๐ฅ Top 100 Contributors</h3></div>', unsafe_allow_html=True)
|
708 |
-
st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">
|
709 |
|
710 |
# Create a data frame for the table
|
711 |
if user_stats:
|
@@ -723,14 +748,15 @@ with st.sidebar:
|
|
723 |
|
724 |
overall_data.append([
|
725 |
f"{rank_display}{stat['username']}",
|
|
|
726 |
str(stat['models']),
|
727 |
str(stat['spaces']),
|
728 |
-
str(stat['
|
729 |
])
|
730 |
|
731 |
ranking_data_overall = pd.DataFrame(
|
732 |
overall_data,
|
733 |
-
columns=["Contributor", "Models", "Spaces", "
|
734 |
)
|
735 |
ranking_data_overall.index = ranking_data_overall.index + 1 # Start index from 1 for ranking
|
736 |
|
@@ -739,17 +765,19 @@ with st.sidebar:
|
|
739 |
height=900, # ์ฝ 30ํ ์ ๋ ๋ณด์ด๋๋ก ํฝ์
๋จ์ ๋์ด ์ค์
|
740 |
column_config={
|
741 |
"Contributor": st.column_config.TextColumn("Contributor"),
|
|
|
742 |
"Models": st.column_config.TextColumn("Models"),
|
743 |
"Spaces": st.column_config.TextColumn("Spaces"),
|
744 |
-
"
|
745 |
},
|
746 |
use_container_width=True,
|
747 |
hide_index=False
|
748 |
)
|
749 |
|
750 |
with tab2:
|
751 |
-
# Show accounts sorted by Spaces count
|
752 |
-
st.markdown('<div class="subheader"><h3>๐
|
|
|
753 |
|
754 |
# Create a data frame for the Spaces table with medals for top 3
|
755 |
if top_owners_spaces:
|
@@ -779,8 +807,9 @@ with st.sidebar:
|
|
779 |
hide_index=False
|
780 |
)
|
781 |
|
782 |
-
# Display accounts sorted by Models count
|
783 |
-
st.markdown('<div class="subheader"><h3>๐ง
|
|
|
784 |
|
785 |
# Create a data frame for the Models table with medals for top 3
|
786 |
if top_owners_models:
|
@@ -869,7 +898,7 @@ if username:
|
|
869 |
st.markdown(f'<div style="background-color: #F3E5F5; padding: 20px; border-radius: 10px; border-left: 5px solid #9C27B0;">'
|
870 |
f'<h3 style="color: #9C27B0;">About This Analysis</h3>'
|
871 |
f'<p>This dashboard analyzes {username}\'s contributions to Hugging Face in {selected_year}, including models, datasets, and spaces.</p>'
|
872 |
-
f'<p style="font-style: italic; font-size: 12px;">*
|
873 |
f'</div>', unsafe_allow_html=True)
|
874 |
|
875 |
with st.spinner(f"Fetching detailed contribution data for {username}..."):
|
@@ -887,8 +916,7 @@ if username:
|
|
887 |
|
888 |
# Create a prominent ranking display
|
889 |
st.markdown(f'<div style="background-color: #FFF8E1; padding: 20px; border-radius: 10px; border-left: 5px solid #FFC107; margin: 1rem 0;">'
|
890 |
-
f'<h2 style="color: #FFA000; text-align: center;"
|
891 |
-
f'<p style="text-align: center; font-size: 16px;">Analyzing detailed commit statistics...</p>'
|
892 |
f'</div>', unsafe_allow_html=True)
|
893 |
|
894 |
# Find user in spaces ranking
|
@@ -997,21 +1025,21 @@ if username:
|
|
997 |
|
998 |
# Process repos in chunks
|
999 |
chunk_size = 5
|
1000 |
-
total_commits = 0
|
1001 |
all_commit_dates = []
|
1002 |
|
1003 |
for i in range(0, len(repo_ids), chunk_size):
|
1004 |
chunk = repo_ids[i:i + chunk_size]
|
1005 |
with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor:
|
1006 |
-
future_to_repo = {
|
1007 |
-
|
1008 |
-
|
1009 |
-
|
|
|
|
|
1010 |
for future in as_completed(future_to_repo):
|
1011 |
repo_commits, repo_count = future.result()
|
1012 |
if repo_commits:
|
1013 |
all_commit_dates.extend(repo_commits)
|
1014 |
-
total_commits += repo_count
|
1015 |
|
1016 |
# Update progress for all types
|
1017 |
progress_per_type = 1.0 / len(types_to_fetch)
|
@@ -1020,7 +1048,7 @@ if username:
|
|
1020 |
progress_bar.progress(overall_progress)
|
1021 |
|
1022 |
commits_by_type[kind] = all_commit_dates
|
1023 |
-
commit_counts_by_type[kind] =
|
1024 |
|
1025 |
except Exception as e:
|
1026 |
st.warning(f"Error fetching {kind}s for {username}: {str(e)}")
|
|
|
214 |
"score": 0
|
215 |
}
|
216 |
|
217 |
+
# Enhanced function to get trending accounts with commit-based ranking for top 100
|
218 |
+
@st.cache_data(ttl=3600, show_spinner=False) # Cache for 1 hour
|
219 |
+
def get_trending_accounts_with_commits(limit=100):
|
220 |
try:
|
221 |
# First, get top accounts by model/space count
|
222 |
spaces_response = requests.get("https://huggingface.co/api/spaces",
|
|
|
227 |
timeout=30)
|
228 |
|
229 |
# Process spaces data
|
230 |
+
owner_counts_spaces = {}
|
231 |
if spaces_response.status_code == 200:
|
232 |
spaces = spaces_response.json()
|
|
|
233 |
for space in spaces:
|
234 |
if '/' in space.get('id', ''):
|
235 |
owner, _ = space.get('id', '').split('/', 1)
|
|
|
238 |
|
239 |
if owner != 'None':
|
240 |
owner_counts_spaces[owner] = owner_counts_spaces.get(owner, 0) + 1
|
|
|
|
|
241 |
|
242 |
# Process models data
|
243 |
+
owner_counts_models = {}
|
244 |
if models_response.status_code == 200:
|
245 |
models = models_response.json()
|
|
|
246 |
for model in models:
|
247 |
if '/' in model.get('id', ''):
|
248 |
owner, _ = model.get('id', '').split('/', 1)
|
|
|
251 |
|
252 |
if owner != 'None':
|
253 |
owner_counts_models[owner] = owner_counts_models.get(owner, 0) + 1
|
|
|
|
|
254 |
|
255 |
+
# Get top accounts by total repos (models + spaces)
|
256 |
+
top_accounts = {}
|
257 |
+
for owner, count in owner_counts_models.items():
|
258 |
+
top_accounts[owner] = count + owner_counts_spaces.get(owner, 0)
|
259 |
+
for owner, count in owner_counts_spaces.items():
|
260 |
+
if owner not in top_accounts:
|
261 |
+
top_accounts[owner] = count
|
262 |
+
|
263 |
+
# Sort and get top 200 accounts for commit analysis
|
264 |
+
sorted_accounts = sorted(top_accounts.items(), key=lambda x: x[1], reverse=True)[:200]
|
265 |
|
266 |
+
# Fetch commit stats for top accounts
|
|
|
267 |
user_stats = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
|
269 |
+
for username, _ in sorted_accounts:
|
270 |
+
try:
|
271 |
+
# Get commit stats
|
272 |
+
stats = get_user_commit_stats(username)
|
273 |
+
stats["models"] = owner_counts_models.get(username, 0)
|
274 |
+
stats["spaces"] = owner_counts_spaces.get(username, 0)
|
275 |
+
user_stats.append(stats)
|
276 |
+
except:
|
277 |
+
user_stats.append({
|
278 |
+
"username": username,
|
279 |
+
"models": owner_counts_models.get(username, 0),
|
280 |
+
"spaces": owner_counts_spaces.get(username, 0),
|
281 |
+
"datasets": 0,
|
282 |
+
"estimated_commits": 0,
|
283 |
+
"score": 0
|
284 |
+
})
|
285 |
+
|
286 |
+
# Sort by commits (score) for overall ranking
|
287 |
user_stats.sort(key=lambda x: x["score"], reverse=True)
|
288 |
|
289 |
+
# Get top 100 by commits
|
290 |
+
top_100_by_commits = user_stats[:limit]
|
291 |
+
trending_authors = [stat["username"] for stat in top_100_by_commits]
|
292 |
|
293 |
+
# Create rankings within top 100 - sorted by spaces and models count
|
294 |
+
spaces_rank_data = [(stat["username"], stat["spaces"]) for stat in top_100_by_commits if stat["spaces"] > 0]
|
295 |
+
spaces_rank_data.sort(key=lambda x: x[1], reverse=True)
|
|
|
296 |
|
297 |
+
models_rank_data = [(stat["username"], stat["models"]) for stat in top_100_by_commits if stat["models"] > 0]
|
298 |
+
models_rank_data.sort(key=lambda x: x[1], reverse=True)
|
|
|
299 |
|
300 |
+
return trending_authors, spaces_rank_data, models_rank_data, top_100_by_commits
|
301 |
|
302 |
except Exception as e:
|
303 |
+
# Return fallback data
|
304 |
fallback_authors = ["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"]
|
305 |
fallback_stats = [{"username": author, "models": 0, "spaces": 0, "datasets": 0, "estimated_commits": 0, "score": 0} for author in fallback_authors]
|
306 |
return fallback_authors, [(author, 0) for author in fallback_authors], [(author, 0) for author in fallback_authors], fallback_stats
|
|
|
689 |
|
690 |
return fig
|
691 |
|
692 |
+
# Fetch trending accounts with a loading spinner and progress bar
|
693 |
+
try:
|
694 |
+
with st.spinner("Loading top contributors..."):
|
695 |
+
# Add progress tracking
|
696 |
+
progress_text = st.empty()
|
697 |
+
progress_bar = st.progress(0)
|
698 |
+
progress_text.text("Fetching repository data...")
|
699 |
+
progress_bar.progress(0.3)
|
700 |
+
|
701 |
+
# Get the data
|
702 |
+
trending_accounts, top_owners_spaces, top_owners_models, user_stats = get_trending_accounts_with_commits(limit=100)
|
703 |
+
|
704 |
+
# Complete progress
|
705 |
+
progress_bar.progress(1.0)
|
706 |
+
progress_text.text("Loading complete!")
|
707 |
+
time.sleep(0.5)
|
708 |
+
|
709 |
+
# Clear progress indicators
|
710 |
+
progress_text.empty()
|
711 |
+
progress_bar.empty()
|
712 |
+
except Exception as e:
|
713 |
+
st.error(f"Error loading trending accounts: {str(e)}")
|
714 |
+
# Use fallback data
|
715 |
+
trending_accounts = ["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"]
|
716 |
+
top_owners_spaces = [(author, 0) for author in trending_accounts]
|
717 |
+
top_owners_models = [(author, 0) for author in trending_accounts]
|
718 |
+
user_stats = [{"username": author, "models": 0, "spaces": 0, "datasets": 0, "estimated_commits": 0, "score": 0} for author in trending_accounts]
|
719 |
|
720 |
# Sidebar
|
721 |
with st.sidebar:
|
|
|
723 |
|
724 |
# Create tabs for rankings
|
725 |
tab1, tab2 = st.tabs([
|
726 |
+
"Top 100 by Commits",
|
727 |
+
"Space/Model Rankings"
|
728 |
])
|
729 |
|
730 |
with tab1:
|
731 |
+
# Show combined trending accounts list by commits
|
732 |
+
st.markdown('<div class="subheader"><h3>๐ฅ Top 100 Contributors by Commits</h3></div>', unsafe_allow_html=True)
|
733 |
+
st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">Ranked by estimated total commit count</p>', unsafe_allow_html=True)
|
734 |
|
735 |
# Create a data frame for the table
|
736 |
if user_stats:
|
|
|
748 |
|
749 |
overall_data.append([
|
750 |
f"{rank_display}{stat['username']}",
|
751 |
+
str(stat['estimated_commits']),
|
752 |
str(stat['models']),
|
753 |
str(stat['spaces']),
|
754 |
+
str(stat['datasets'])
|
755 |
])
|
756 |
|
757 |
ranking_data_overall = pd.DataFrame(
|
758 |
overall_data,
|
759 |
+
columns=["Contributor", "Est. Commits", "Models", "Spaces", "Datasets"]
|
760 |
)
|
761 |
ranking_data_overall.index = ranking_data_overall.index + 1 # Start index from 1 for ranking
|
762 |
|
|
|
765 |
height=900, # ์ฝ 30ํ ์ ๋ ๋ณด์ด๋๋ก ํฝ์
๋จ์ ๋์ด ์ค์
|
766 |
column_config={
|
767 |
"Contributor": st.column_config.TextColumn("Contributor"),
|
768 |
+
"Est. Commits": st.column_config.TextColumn("Est. Commits"),
|
769 |
"Models": st.column_config.TextColumn("Models"),
|
770 |
"Spaces": st.column_config.TextColumn("Spaces"),
|
771 |
+
"Datasets": st.column_config.TextColumn("Datasets")
|
772 |
},
|
773 |
use_container_width=True,
|
774 |
hide_index=False
|
775 |
)
|
776 |
|
777 |
with tab2:
|
778 |
+
# Show accounts sorted by Spaces count (within top 100 by commits)
|
779 |
+
st.markdown('<div class="subheader"><h3>๐ Spaces Ranking (Top 100 Contributors)</h3></div>', unsafe_allow_html=True)
|
780 |
+
st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">Ranked by spaces count among top 100 contributors by commits</p>', unsafe_allow_html=True)
|
781 |
|
782 |
# Create a data frame for the Spaces table with medals for top 3
|
783 |
if top_owners_spaces:
|
|
|
807 |
hide_index=False
|
808 |
)
|
809 |
|
810 |
+
# Display accounts sorted by Models count (within top 100 by commits)
|
811 |
+
st.markdown('<div class="subheader"><h3>๐ง Models Ranking (Top 100 Contributors)</h3></div>', unsafe_allow_html=True)
|
812 |
+
st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">Ranked by models count among top 100 contributors by commits</p>', unsafe_allow_html=True)
|
813 |
|
814 |
# Create a data frame for the Models table with medals for top 3
|
815 |
if top_owners_models:
|
|
|
898 |
st.markdown(f'<div style="background-color: #F3E5F5; padding: 20px; border-radius: 10px; border-left: 5px solid #9C27B0;">'
|
899 |
f'<h3 style="color: #9C27B0;">About This Analysis</h3>'
|
900 |
f'<p>This dashboard analyzes {username}\'s contributions to Hugging Face in {selected_year}, including models, datasets, and spaces.</p>'
|
901 |
+
f'<p style="font-style: italic; font-size: 12px;">* Overall rankings are based on commit count. Space/Model rankings are within top 100 contributors.</p>'
|
902 |
f'</div>', unsafe_allow_html=True)
|
903 |
|
904 |
with st.spinner(f"Fetching detailed contribution data for {username}..."):
|
|
|
916 |
|
917 |
# Create a prominent ranking display
|
918 |
st.markdown(f'<div style="background-color: #FFF8E1; padding: 20px; border-radius: 10px; border-left: 5px solid #FFC107; margin: 1rem 0;">'
|
919 |
+
f'<h2 style="color: #FFA000; text-align: center;">๐ Ranked #{overall_rank} in Top 100 Contributors by Commits</h2>'
|
|
|
920 |
f'</div>', unsafe_allow_html=True)
|
921 |
|
922 |
# Find user in spaces ranking
|
|
|
1025 |
|
1026 |
# Process repos in chunks
|
1027 |
chunk_size = 5
|
|
|
1028 |
all_commit_dates = []
|
1029 |
|
1030 |
for i in range(0, len(repo_ids), chunk_size):
|
1031 |
chunk = repo_ids[i:i + chunk_size]
|
1032 |
with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor:
|
1033 |
+
future_to_repo = {}
|
1034 |
+
for repo_id in chunk:
|
1035 |
+
future = executor.submit(fetch_commits_for_repo, repo_id, kind, username, selected_year)
|
1036 |
+
add_script_run_ctx(future)
|
1037 |
+
future_to_repo[future] = repo_id
|
1038 |
+
|
1039 |
for future in as_completed(future_to_repo):
|
1040 |
repo_commits, repo_count = future.result()
|
1041 |
if repo_commits:
|
1042 |
all_commit_dates.extend(repo_commits)
|
|
|
1043 |
|
1044 |
# Update progress for all types
|
1045 |
progress_per_type = 1.0 / len(types_to_fetch)
|
|
|
1048 |
progress_bar.progress(overall_progress)
|
1049 |
|
1050 |
commits_by_type[kind] = all_commit_dates
|
1051 |
+
commit_counts_by_type[kind] = len(all_commit_dates)
|
1052 |
|
1053 |
except Exception as e:
|
1054 |
st.warning(f"Error fetching {kind}s for {username}: {str(e)}")
|