openfree commited on
Commit
f949063
·
verified ·
1 Parent(s): ccdd80f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +151 -76
app.py CHANGED
@@ -146,28 +146,93 @@ def cached_list_items(username, kind):
146
  return list(api.list_spaces(author=username))
147
  return []
148
 
149
- # Function to fetch trending accounts and create stats
150
- @lru_cache(maxsize=1)
151
- def get_trending_accounts(limit=100):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  try:
153
- trending_data = {"spaces": [], "models": []}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
- # Get spaces for stats calculation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  spaces_response = requests.get("https://huggingface.co/api/spaces",
157
  params={"limit": 10000},
158
  timeout=30)
159
-
160
- # Get models for stats calculation
161
  models_response = requests.get("https://huggingface.co/api/models",
162
  params={"limit": 10000},
163
  timeout=30)
164
 
165
  # Process spaces data
166
- spaces_owners = []
167
  if spaces_response.status_code == 200:
168
  spaces = spaces_response.json()
169
-
170
- # Count spaces by owner
171
  owner_counts_spaces = {}
172
  for space in spaces:
173
  if '/' in space.get('id', ''):
@@ -178,17 +243,12 @@ def get_trending_accounts(limit=100):
178
  if owner != 'None':
179
  owner_counts_spaces[owner] = owner_counts_spaces.get(owner, 0) + 1
180
 
181
- # Get top owners by count for spaces
182
- top_owners_spaces = sorted(owner_counts_spaces.items(), key=lambda x: x[1], reverse=True)[:limit]
183
- trending_data["spaces"] = top_owners_spaces
184
- spaces_owners = [owner for owner, _ in top_owners_spaces]
185
 
186
  # Process models data
187
- models_owners = []
188
  if models_response.status_code == 200:
189
  models = models_response.json()
190
-
191
- # Count models by owner
192
  owner_counts_models = {}
193
  for model in models:
194
  if '/' in model.get('id', ''):
@@ -199,47 +259,55 @@ def get_trending_accounts(limit=100):
199
  if owner != 'None':
200
  owner_counts_models[owner] = owner_counts_models.get(owner, 0) + 1
201
 
202
- # Get top owners by count for models
203
- top_owners_models = sorted(owner_counts_models.items(), key=lambda x: x[1], reverse=True)[:limit]
204
- trending_data["models"] = top_owners_models
205
- models_owners = [owner for owner, _ in top_owners_models]
206
 
207
- # Combine rankings for overall trending based on appearance in both lists
208
- combined_score = {}
209
- for i, owner in enumerate(spaces_owners):
210
- if owner not in combined_score:
211
- combined_score[owner] = 0
212
- combined_score[owner] += (limit - i) # Higher rank gives more points
213
-
214
- for i, owner in enumerate(models_owners):
215
- if owner not in combined_score:
216
- combined_score[owner] = 0
217
- combined_score[owner] += (limit - i) # Higher rank gives more points
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
- # Sort by combined score
220
- sorted_combined = sorted(combined_score.items(), key=lambda x: x[1], reverse=True)[:limit]
221
- trending_authors = [owner for owner, _ in sorted_combined]
 
 
 
 
 
222
 
223
- return trending_authors, trending_data["spaces"], trending_data["models"]
224
  except Exception as e:
225
  st.error(f"Error fetching trending accounts: {str(e)}")
226
  fallback_authors = ["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"]
227
- return fallback_authors, [(author, 0) for author in fallback_authors], [(author, 0) for author in fallback_authors]
228
-
229
- # Rate limiting
230
- class RateLimiter:
231
- def __init__(self, calls_per_second=10):
232
- self.calls_per_second = calls_per_second
233
- self.last_call = 0
234
-
235
- def wait(self):
236
- current_time = time.time()
237
- time_since_last_call = current_time - self.last_call
238
- if time_since_last_call < (1.0 / self.calls_per_second):
239
- time.sleep((1.0 / self.calls_per_second) - time_since_last_call)
240
- self.last_call = time.time()
241
-
242
- rate_limiter = RateLimiter()
243
 
244
  # Function to fetch commits for a repository (optimized)
245
  def fetch_commits_for_repo(repo_id, repo_type, username, selected_year):
@@ -699,32 +767,29 @@ def create_ranking_chart(username, overall_rank, spaces_rank, models_rank):
699
  return fig
700
 
701
  # Fetch trending accounts with a loading spinner (do this once at the beginning)
702
- with st.spinner("Loading trending accounts..."):
703
- trending_accounts, top_owners_spaces, top_owners_models = get_trending_accounts(limit=100)
704
 
705
  # Sidebar
706
  with st.sidebar:
707
  st.markdown('<h1 style="text-align: center; color: #1E88E5;">👤 Contributor</h1>', unsafe_allow_html=True)
708
 
709
- # Create tabs for Spaces and Models rankings - ONLY SHOWING FIRST TWO TABS
710
  tab1, tab2 = st.tabs([
711
  "Top 100 Overall",
712
  "Top Spaces & Models"
713
  ])
714
 
715
  with tab1:
716
- # Show combined trending accounts list
717
- st.markdown('<div class="subheader"><h3>🔥 Top 100 Contributors</h3></div>', unsafe_allow_html=True)
 
718
 
719
  # Create a data frame for the table
720
- if trending_accounts:
721
- # Create a mapping from username to Spaces and Models rankings
722
- spaces_rank = {owner: idx+1 for idx, (owner, _) in enumerate(top_owners_spaces)}
723
- models_rank = {owner: idx+1 for idx, (owner, _) in enumerate(top_owners_models)}
724
-
725
  # Create the overall ranking dataframe with trophies for top 3
726
  overall_data = []
727
- for idx, username in enumerate(trending_accounts[:100]):
728
  # Add trophy emojis for top 3
729
  rank_display = ""
730
  if idx == 0:
@@ -734,24 +799,29 @@ with st.sidebar:
734
  elif idx == 2:
735
  rank_display = "🏆 " # Bronze trophy for 3rd place
736
 
737
- # Use strings for all rankings to avoid type conversion issues
738
- spaces_position = str(spaces_rank.get(username, "-"))
739
- models_position = str(models_rank.get(username, "-"))
740
- overall_data.append([f"{rank_display}{username}", spaces_position, models_position])
 
 
 
741
 
742
  ranking_data_overall = pd.DataFrame(
743
  overall_data,
744
- columns=["Contributor", "Spaces Rank", "Models Rank"]
745
  )
746
  ranking_data_overall.index = ranking_data_overall.index + 1 # Start index from 1 for ranking
747
 
748
  st.dataframe(
749
  ranking_data_overall,
750
- height=900, # 약 30행 정도 보이도록 픽셀 단위 높이 설정 (필요에 따라 조정 가능)
751
  column_config={
752
  "Contributor": st.column_config.TextColumn("Contributor"),
753
- "Spaces Rank": st.column_config.TextColumn("Spaces Rank"),
754
- "Models Rank": st.column_config.TextColumn("Models Rank")
 
 
755
  },
756
  use_container_width=True,
757
  hide_index=False
@@ -776,7 +846,7 @@ with st.sidebar:
776
 
777
  spaces_data.append([f"{rank_display}{owner}", count])
778
 
779
- ranking_data_spaces = pd.DataFrame(spaces_data, columns=["Contributor", "Spaces Count(Top 500 positions)"])
780
  ranking_data_spaces.index = ranking_data_spaces.index + 1 # Start index from 1 for ranking
781
 
782
  st.dataframe(
@@ -807,7 +877,7 @@ with st.sidebar:
807
 
808
  models_data.append([f"{rank_display}{owner}", count])
809
 
810
- ranking_data_models = pd.DataFrame(models_data, columns=["Contributor", "Models Count(Top 500 positions)"])
811
  ranking_data_models.index = ranking_data_models.index + 1 # Start index from 1 for ranking
812
 
813
  st.dataframe(
@@ -862,12 +932,17 @@ with st.sidebar:
862
  st.markdown(f'<h1 style="text-align: center; color: #1E88E5; margin-bottom: 2rem;">🤗 Hugging Face Contributions</h1>', unsafe_allow_html=True)
863
 
864
  if username:
 
 
 
865
  # Create a header card with contributor info
866
  header_col1, header_col2 = st.columns([1, 2])
867
  with header_col1:
 
868
  st.markdown(f'<div style="background-color: #E3F2FD; padding: 20px; border-radius: 10px; border-left: 5px solid #1E88E5;">'
869
  f'<h2 style="color: #1E88E5;">👤 {username}</h2>'
870
  f'<p style="font-size: 16px;">Analyzing contributions for {selected_year}</p>'
 
871
  f'<p><a href="https://huggingface.co/{username}" target="_blank" style="color: #1E88E5; font-weight: bold;">View Profile</a></p>'
872
  f'</div>', unsafe_allow_html=True)
873
 
@@ -876,10 +951,10 @@ if username:
876
  st.markdown(f'<div style="background-color: #F3E5F5; padding: 20px; border-radius: 10px; border-left: 5px solid #9C27B0;">'
877
  f'<h3 style="color: #9C27B0;">About This Analysis</h3>'
878
  f'<p>This dashboard analyzes {username}\'s contributions to Hugging Face in {selected_year}, including models, datasets, and spaces.</p>'
879
- f'<p style="font-style: italic; font-size: 12px;">* Some metrics like follower growth are simulated for visualization purposes.</p>'
880
  f'</div>', unsafe_allow_html=True)
881
 
882
- with st.spinner(f"Fetching contribution data for {username}..."):
883
  # Initialize variables for tracking
884
  overall_rank = None
885
  spaces_rank = None
@@ -1177,4 +1252,4 @@ else:
1177
  f'<img src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg" style="width: 200px; margin-bottom: 30px;">'
1178
  f'<h2>Welcome to Hugging Face Contributions Dashboard</h2>'
1179
  f'<p style="font-size: 1.2rem;">Please select a contributor from the sidebar to view their activity.</p>'
1180
- f'</div>', unsafe_allow_html=True)
 
146
  return list(api.list_spaces(author=username))
147
  return []
148
 
149
+ # Rate limiting
150
+ class RateLimiter:
151
+ def __init__(self, calls_per_second=10):
152
+ self.calls_per_second = calls_per_second
153
+ self.last_call = 0
154
+
155
+ def wait(self):
156
+ current_time = time.time()
157
+ time_since_last_call = current_time - self.last_call
158
+ if time_since_last_call < (1.0 / self.calls_per_second):
159
+ time.sleep((1.0 / self.calls_per_second) - time_since_last_call)
160
+ self.last_call = time.time()
161
+
162
+ rate_limiter = RateLimiter()
163
+
164
+ # Function to fetch quick commit stats for a user (optimized for ranking)
165
+ @st.cache_data(ttl=3600) # Cache for 1 hour
166
+ def get_user_commit_stats(username):
167
+ """Fetch basic commit statistics for a user"""
168
  try:
169
+ total_commits = 0
170
+ items_count = {"model": 0, "dataset": 0, "space": 0}
171
+
172
+ for kind in ["model", "dataset", "space"]:
173
+ try:
174
+ items = cached_list_items(username, kind)
175
+ items_count[kind] = len(items)
176
+
177
+ # Sample a few repos to estimate commit activity
178
+ sample_size = min(5, len(items)) # Check up to 5 repos per type
179
+ if sample_size > 0:
180
+ sample_items = items[:sample_size]
181
+ for item in sample_items:
182
+ try:
183
+ rate_limiter.wait()
184
+ commits = cached_list_commits(item.id, kind)
185
+ total_commits += len(commits)
186
+ except:
187
+ pass
188
+
189
+ # Estimate total commits based on sample
190
+ if sample_size < len(items):
191
+ total_commits = int(total_commits * len(items) / sample_size)
192
+ except:
193
+ pass
194
+
195
+ # Calculate contribution score
196
+ # Weight: Models=3, Spaces=2, Datasets=1, Commits=0.1
197
+ score = (items_count["model"] * 3 +
198
+ items_count["space"] * 2 +
199
+ items_count["dataset"] * 1 +
200
+ total_commits * 0.1)
201
 
202
+ return {
203
+ "username": username,
204
+ "models": items_count["model"],
205
+ "spaces": items_count["space"],
206
+ "datasets": items_count["dataset"],
207
+ "estimated_commits": total_commits,
208
+ "score": score
209
+ }
210
+ except Exception as e:
211
+ return {
212
+ "username": username,
213
+ "models": 0,
214
+ "spaces": 0,
215
+ "datasets": 0,
216
+ "estimated_commits": 0,
217
+ "score": 0
218
+ }
219
+
220
+ # Enhanced function to get trending accounts with commit-based ranking
221
+ @st.cache_data(ttl=3600) # Cache for 1 hour
222
+ def get_trending_accounts_with_commits(limit=100):
223
+ try:
224
+ # First, get top accounts by model/space count
225
  spaces_response = requests.get("https://huggingface.co/api/spaces",
226
  params={"limit": 10000},
227
  timeout=30)
 
 
228
  models_response = requests.get("https://huggingface.co/api/models",
229
  params={"limit": 10000},
230
  timeout=30)
231
 
232
  # Process spaces data
233
+ top_space_owners = []
234
  if spaces_response.status_code == 200:
235
  spaces = spaces_response.json()
 
 
236
  owner_counts_spaces = {}
237
  for space in spaces:
238
  if '/' in space.get('id', ''):
 
243
  if owner != 'None':
244
  owner_counts_spaces[owner] = owner_counts_spaces.get(owner, 0) + 1
245
 
246
+ top_space_owners = sorted(owner_counts_spaces.items(), key=lambda x: x[1], reverse=True)[:limit]
 
 
 
247
 
248
  # Process models data
249
+ top_model_owners = []
250
  if models_response.status_code == 200:
251
  models = models_response.json()
 
 
252
  owner_counts_models = {}
253
  for model in models:
254
  if '/' in model.get('id', ''):
 
259
  if owner != 'None':
260
  owner_counts_models[owner] = owner_counts_models.get(owner, 0) + 1
261
 
262
+ top_model_owners = sorted(owner_counts_models.items(), key=lambda x: x[1], reverse=True)[:limit]
 
 
 
263
 
264
+ # Get unique users from top 100 of both lists
265
+ unique_users = set()
266
+ for owner, _ in top_space_owners[:100]:
267
+ unique_users.add(owner)
268
+ for owner, _ in top_model_owners[:100]:
269
+ unique_users.add(owner)
270
+
271
+ # Create progress bar for fetching commit stats
272
+ progress_text = st.empty()
273
+ progress_bar = st.progress(0)
274
+ progress_text.text(f"Analyzing top contributors... (0/{len(unique_users)})")
275
+
276
+ # Fetch commit stats for all unique users
277
+ user_stats = []
278
+ with ThreadPoolExecutor(max_workers=5) as executor:
279
+ future_to_user = {executor.submit(get_user_commit_stats, user): user for user in unique_users}
280
+ completed = 0
281
+ for future in as_completed(future_to_user):
282
+ stats = future.result()
283
+ if stats["score"] > 0: # Only include users with some activity
284
+ user_stats.append(stats)
285
+ completed += 1
286
+ progress = completed / len(unique_users)
287
+ progress_bar.progress(progress)
288
+ progress_text.text(f"Analyzing top contributors... ({completed}/{len(unique_users)})")
289
+
290
+ # Clear progress indicators
291
+ progress_text.empty()
292
+ progress_bar.empty()
293
+
294
+ # Sort by score (combination of commits and repo counts)
295
+ user_stats.sort(key=lambda x: x["score"], reverse=True)
296
 
297
+ # Extract rankings
298
+ trending_authors = [stat["username"] for stat in user_stats[:limit]]
299
+
300
+ # Create detailed rankings for display
301
+ spaces_rank_data = [(stat["username"], stat["spaces"]) for stat in user_stats if stat["spaces"] > 0][:limit]
302
+ models_rank_data = [(stat["username"], stat["models"]) for stat in user_stats if stat["models"] > 0][:limit]
303
+
304
+ return trending_authors, spaces_rank_data, models_rank_data, user_stats[:limit]
305
 
 
306
  except Exception as e:
307
  st.error(f"Error fetching trending accounts: {str(e)}")
308
  fallback_authors = ["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"]
309
+ fallback_stats = [{"username": author, "models": 0, "spaces": 0, "datasets": 0, "estimated_commits": 0, "score": 0} for author in fallback_authors]
310
+ return fallback_authors, [(author, 0) for author in fallback_authors], [(author, 0) for author in fallback_authors], fallback_stats
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
 
312
  # Function to fetch commits for a repository (optimized)
313
  def fetch_commits_for_repo(repo_id, repo_type, username, selected_year):
 
767
  return fig
768
 
769
  # Fetch trending accounts with a loading spinner (do this once at the beginning)
770
+ with st.spinner("Loading and analyzing top contributors... This may take a few moments."):
771
+ trending_accounts, top_owners_spaces, top_owners_models, user_stats = get_trending_accounts_with_commits(limit=100)
772
 
773
  # Sidebar
774
  with st.sidebar:
775
  st.markdown('<h1 style="text-align: center; color: #1E88E5;">👤 Contributor</h1>', unsafe_allow_html=True)
776
 
777
+ # Create tabs for rankings
778
  tab1, tab2 = st.tabs([
779
  "Top 100 Overall",
780
  "Top Spaces & Models"
781
  ])
782
 
783
  with tab1:
784
+ # Show combined trending accounts list with commit-based ranking
785
+ st.markdown('<div class="subheader"><h3>🔥 Top 100 Contributors by Score</h3></div>', unsafe_allow_html=True)
786
+ st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">Ranked by contribution score (Models×3 + Spaces×2 + Datasets×1 + Commits×0.1)</p>', unsafe_allow_html=True)
787
 
788
  # Create a data frame for the table
789
+ if user_stats:
 
 
 
 
790
  # Create the overall ranking dataframe with trophies for top 3
791
  overall_data = []
792
+ for idx, stat in enumerate(user_stats[:100]):
793
  # Add trophy emojis for top 3
794
  rank_display = ""
795
  if idx == 0:
 
799
  elif idx == 2:
800
  rank_display = "🏆 " # Bronze trophy for 3rd place
801
 
802
+ overall_data.append([
803
+ f"{rank_display}{stat['username']}",
804
+ f"{stat['score']:.1f}",
805
+ str(stat['estimated_commits']),
806
+ str(stat['models']),
807
+ str(stat['spaces'])
808
+ ])
809
 
810
  ranking_data_overall = pd.DataFrame(
811
  overall_data,
812
+ columns=["Contributor", "Score", "Est. Commits", "Models", "Spaces"]
813
  )
814
  ranking_data_overall.index = ranking_data_overall.index + 1 # Start index from 1 for ranking
815
 
816
  st.dataframe(
817
  ranking_data_overall,
818
+ height=900, # 약 30행 정도 보이도록 픽셀 단위 높이 설정
819
  column_config={
820
  "Contributor": st.column_config.TextColumn("Contributor"),
821
+ "Score": st.column_config.TextColumn("Score"),
822
+ "Est. Commits": st.column_config.TextColumn("Est. Commits"),
823
+ "Models": st.column_config.TextColumn("Models"),
824
+ "Spaces": st.column_config.TextColumn("Spaces")
825
  },
826
  use_container_width=True,
827
  hide_index=False
 
846
 
847
  spaces_data.append([f"{rank_display}{owner}", count])
848
 
849
+ ranking_data_spaces = pd.DataFrame(spaces_data, columns=["Contributor", "Spaces Count"])
850
  ranking_data_spaces.index = ranking_data_spaces.index + 1 # Start index from 1 for ranking
851
 
852
  st.dataframe(
 
877
 
878
  models_data.append([f"{rank_display}{owner}", count])
879
 
880
+ ranking_data_models = pd.DataFrame(models_data, columns=["Contributor", "Models Count"])
881
  ranking_data_models.index = ranking_data_models.index + 1 # Start index from 1 for ranking
882
 
883
  st.dataframe(
 
932
  st.markdown(f'<h1 style="text-align: center; color: #1E88E5; margin-bottom: 2rem;">🤗 Hugging Face Contributions</h1>', unsafe_allow_html=True)
933
 
934
  if username:
935
+ # Find user's stats in the pre-calculated data
936
+ user_stat = next((stat for stat in user_stats if stat["username"] == username), None)
937
+
938
  # Create a header card with contributor info
939
  header_col1, header_col2 = st.columns([1, 2])
940
  with header_col1:
941
+ score_display = f"Score: {user_stat['score']:.1f}" if user_stat else "Score: N/A"
942
  st.markdown(f'<div style="background-color: #E3F2FD; padding: 20px; border-radius: 10px; border-left: 5px solid #1E88E5;">'
943
  f'<h2 style="color: #1E88E5;">👤 {username}</h2>'
944
  f'<p style="font-size: 16px;">Analyzing contributions for {selected_year}</p>'
945
+ f'<p style="font-size: 14px; font-weight: bold;">{score_display}</p>'
946
  f'<p><a href="https://huggingface.co/{username}" target="_blank" style="color: #1E88E5; font-weight: bold;">View Profile</a></p>'
947
  f'</div>', unsafe_allow_html=True)
948
 
 
951
  st.markdown(f'<div style="background-color: #F3E5F5; padding: 20px; border-radius: 10px; border-left: 5px solid #9C27B0;">'
952
  f'<h3 style="color: #9C27B0;">About This Analysis</h3>'
953
  f'<p>This dashboard analyzes {username}\'s contributions to Hugging Face in {selected_year}, including models, datasets, and spaces.</p>'
954
+ f'<p style="font-style: italic; font-size: 12px;">* Rankings are based on contribution scores combining repos and commit activity.</p>'
955
  f'</div>', unsafe_allow_html=True)
956
 
957
+ with st.spinner(f"Fetching detailed contribution data for {username}..."):
958
  # Initialize variables for tracking
959
  overall_rank = None
960
  spaces_rank = None
 
1252
  f'<img src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg" style="width: 200px; margin-bottom: 30px;">'
1253
  f'<h2>Welcome to Hugging Face Contributions Dashboard</h2>'
1254
  f'<p style="font-size: 1.2rem;">Please select a contributor from the sidebar to view their activity.</p>'
1255
+ f'</div>', unsafe_allow_html=True)