Commits-Leaderboard

Running

App Files Files Community

openfree commited on May 28

Commit

ce3d342

verified ·

1 Parent(s): 04927f0

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -68

app.py CHANGED Viewed

@@ -214,9 +214,9 @@ def get_user_commit_stats(username):
             "score": 0
         }
-# Enhanced function to get trending accounts (simplified without commit fetching)
-@st.cache_data(ttl=3600)  # Cache for 1 hour
-def get_trending_accounts_simple(limit=100):
     try:
         # First, get top accounts by model/space count
         spaces_response = requests.get("https://huggingface.co/api/spaces",
@@ -227,10 +227,9 @@ def get_trending_accounts_simple(limit=100):
                                       timeout=30)
         # Process spaces data
-        top_space_owners = []
         if spaces_response.status_code == 200:
             spaces = spaces_response.json()
-            owner_counts_spaces = {}
             for space in spaces:
                 if '/' in space.get('id', ''):
                     owner, _ = space.get('id', '').split('/', 1)
@@ -239,14 +238,11 @@ def get_trending_accounts_simple(limit=100):
                 if owner != 'None':
                     owner_counts_spaces[owner] = owner_counts_spaces.get(owner, 0) + 1
-            top_space_owners = sorted(owner_counts_spaces.items(), key=lambda x: x[1], reverse=True)[:limit]
         # Process models data
-        top_model_owners = []
         if models_response.status_code == 200:
             models = models_response.json()
-            owner_counts_models = {}
             for model in models:
                 if '/' in model.get('id', ''):
                     owner, _ = model.get('id', '').split('/', 1)
@@ -255,51 +251,56 @@ def get_trending_accounts_simple(limit=100):
                 if owner != 'None':
                     owner_counts_models[owner] = owner_counts_models.get(owner, 0) + 1
-            top_model_owners = sorted(owner_counts_models.items(), key=lambda x: x[1], reverse=True)[:limit]
-        # Get unique users from top 100 of both lists
-        unique_users = set()
-        for owner, _ in top_space_owners[:100]:
-            unique_users.add(owner)
-        for owner, _ in top_model_owners[:100]:
-            unique_users.add(owner)
-        # For now, use a simple ranking based on total repos
-        # (We'll fetch commits only for the selected user)
         user_stats = []
-        for user in unique_users:
-            models_count = next((count for owner, count in top_model_owners if owner == user), 0)
-            spaces_count = next((count for owner, count in top_space_owners if owner == user), 0)
-            user_stats.append({
-                "username": user,
-                "models": models_count,
-                "spaces": spaces_count,
-                "datasets": 0,  # We'll skip datasets for initial ranking
-                "estimated_commits": 0,  # Will be calculated later for selected user
-                "score": models_count + spaces_count  # Temporary score for initial display
-            })
-        # Sort by total repo count for initial display
         user_stats.sort(key=lambda x: x["score"], reverse=True)
-        # Extract rankings
-        trending_authors = [stat["username"] for stat in user_stats[:limit]]
-        # Create separate rankings sorted by spaces and models count
-        spaces_rank_data = [(stat["username"], stat["spaces"]) for stat in user_stats if stat["spaces"] > 0]
-        spaces_rank_data.sort(key=lambda x: x[1], reverse=True)  # Sort by spaces count
-        spaces_rank_data = spaces_rank_data[:limit]
-        models_rank_data = [(stat["username"], stat["models"]) for stat in user_stats if stat["models"] > 0]
-        models_rank_data.sort(key=lambda x: x[1], reverse=True)  # Sort by models count
-        models_rank_data = models_rank_data[:limit]
-        return trending_authors, spaces_rank_data, models_rank_data, user_stats[:limit]
     except Exception as e:
-        st.error(f"Error fetching trending accounts: {str(e)}")
         fallback_authors = ["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"]
         fallback_stats = [{"username": author, "models": 0, "spaces": 0, "datasets": 0, "estimated_commits": 0, "score": 0} for author in fallback_authors]
         return fallback_authors, [(author, 0) for author in fallback_authors], [(author, 0) for author in fallback_authors], fallback_stats
@@ -688,9 +689,33 @@ def simulate_follower_data(username, spaces_count, models_count, total_commits):
     return fig
-# Fetch trending accounts with a loading spinner (do this once at the beginning)
-with st.spinner("Loading top contributors..."):
-    trending_accounts, top_owners_spaces, top_owners_models, user_stats = get_trending_accounts_simple(limit=100)
 # Sidebar
 with st.sidebar:
@@ -698,14 +723,14 @@ with st.sidebar:
     # Create tabs for rankings
     tab1, tab2 = st.tabs([
-        "Top Contributors",
-        "Repository Rankings"
     ])
     with tab1:
-        # Show combined trending accounts list
-        st.markdown('<div class="subheader"><h3>🔥 Top 100 Contributors</h3></div>', unsafe_allow_html=True)
-        st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">Initial ranking by total repositories. Select a user to see commit-based analysis.</p>', unsafe_allow_html=True)
         # Create a data frame for the table
         if user_stats:
@@ -723,14 +748,15 @@ with st.sidebar:
                 overall_data.append([
                     f"{rank_display}{stat['username']}",
                     str(stat['models']),
                     str(stat['spaces']),
-                    str(stat['models'] + stat['spaces'])  # Total repos
                 ])
             ranking_data_overall = pd.DataFrame(
                 overall_data,
-                columns=["Contributor", "Models", "Spaces", "Total Repos"]
             )
             ranking_data_overall.index = ranking_data_overall.index + 1  # Start index from 1 for ranking
@@ -739,17 +765,19 @@ with st.sidebar:
                 height=900,  # 약 30행 정도 보이도록 픽셀 단위 높이 설정
                 column_config={
                     "Contributor": st.column_config.TextColumn("Contributor"),
                     "Models": st.column_config.TextColumn("Models"),
                     "Spaces": st.column_config.TextColumn("Spaces"),
-                    "Total Repos": st.column_config.TextColumn("Total Repos")
                 },
                 use_container_width=True,
                 hide_index=False
             )
     with tab2:
-        # Show accounts sorted by Spaces count
-        st.markdown('<div class="subheader"><h3>🚀 Top 50 by Spaces Count</h3></div>', unsafe_allow_html=True)
         # Create a data frame for the Spaces table with medals for top 3
         if top_owners_spaces:
@@ -779,8 +807,9 @@ with st.sidebar:
                 hide_index=False
             )
-        # Display accounts sorted by Models count
-        st.markdown('<div class="subheader"><h3>🧠 Top 50 by Models Count</h3></div>', unsafe_allow_html=True)
         # Create a data frame for the Models table with medals for top 3
         if top_owners_models:
@@ -869,7 +898,7 @@ if username:
         st.markdown(f'<div style="background-color: #F3E5F5; padding: 20px; border-radius: 10px; border-left: 5px solid #9C27B0;">'
                   f'<h3 style="color: #9C27B0;">About This Analysis</h3>'
                   f'<p>This dashboard analyzes {username}\'s contributions to Hugging Face in {selected_year}, including models, datasets, and spaces.</p>'
-                  f'<p style="font-style: italic; font-size: 12px;">* Detailed commit analysis will be calculated after selection.</p>'
                   f'</div>', unsafe_allow_html=True)
     with st.spinner(f"Fetching detailed contribution data for {username}..."):
@@ -887,8 +916,7 @@ if username:
             # Create a prominent ranking display
             st.markdown(f'<div style="background-color: #FFF8E1; padding: 20px; border-radius: 10px; border-left: 5px solid #FFC107; margin: 1rem 0;">'
-                      f'<h2 style="color: #FFA000; text-align: center;">📊 Featured Contributor</h2>'
-                      f'<p style="text-align: center; font-size: 16px;">Analyzing detailed commit statistics...</p>'
                       f'</div>', unsafe_allow_html=True)
             # Find user in spaces ranking
@@ -997,21 +1025,21 @@ if username:
                 # Process repos in chunks
                 chunk_size = 5
-                total_commits = 0
                 all_commit_dates = []
                 for i in range(0, len(repo_ids), chunk_size):
                     chunk = repo_ids[i:i + chunk_size]
                     with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor:
-                        future_to_repo = {
-                            executor.submit(fetch_commits_for_repo, repo_id, kind, username, selected_year): repo_id
-                            for repo_id in chunk
-                        }
                         for future in as_completed(future_to_repo):
                             repo_commits, repo_count = future.result()
                             if repo_commits:
                                 all_commit_dates.extend(repo_commits)
-                                total_commits += repo_count
                     # Update progress for all types
                     progress_per_type = 1.0 / len(types_to_fetch)
@@ -1020,7 +1048,7 @@ if username:
                     progress_bar.progress(overall_progress)
                 commits_by_type[kind] = all_commit_dates
-                commit_counts_by_type[kind] = total_commits
             except Exception as e:
                 st.warning(f"Error fetching {kind}s for {username}: {str(e)}")

             "score": 0
         }
+# Enhanced function to get trending accounts with commit-based ranking for top 100
+@st.cache_data(ttl=3600, show_spinner=False)  # Cache for 1 hour
+def get_trending_accounts_with_commits(limit=100):
     try:
         # First, get top accounts by model/space count
         spaces_response = requests.get("https://huggingface.co/api/spaces",
                                       timeout=30)
         # Process spaces data
+        owner_counts_spaces = {}
         if spaces_response.status_code == 200:
             spaces = spaces_response.json()
             for space in spaces:
                 if '/' in space.get('id', ''):
                     owner, _ = space.get('id', '').split('/', 1)
                 if owner != 'None':
                     owner_counts_spaces[owner] = owner_counts_spaces.get(owner, 0) + 1
         # Process models data
+        owner_counts_models = {}
         if models_response.status_code == 200:
             models = models_response.json()
             for model in models:
                 if '/' in model.get('id', ''):
                     owner, _ = model.get('id', '').split('/', 1)
                 if owner != 'None':
                     owner_counts_models[owner] = owner_counts_models.get(owner, 0) + 1
+        # Get top accounts by total repos (models + spaces)
+        top_accounts = {}
+        for owner, count in owner_counts_models.items():
+            top_accounts[owner] = count + owner_counts_spaces.get(owner, 0)
+        for owner, count in owner_counts_spaces.items():
+            if owner not in top_accounts:
+                top_accounts[owner] = count
+        # Sort and get top 200 accounts for commit analysis
+        sorted_accounts = sorted(top_accounts.items(), key=lambda x: x[1], reverse=True)[:200]
+        # Fetch commit stats for top accounts
         user_stats = []
+        for username, _ in sorted_accounts:
+            try:
+                # Get commit stats
+                stats = get_user_commit_stats(username)
+                stats["models"] = owner_counts_models.get(username, 0)
+                stats["spaces"] = owner_counts_spaces.get(username, 0)
+                user_stats.append(stats)
+            except:
+                user_stats.append({
+                    "username": username,
+                    "models": owner_counts_models.get(username, 0),
+                    "spaces": owner_counts_spaces.get(username, 0),
+                    "datasets": 0,
+                    "estimated_commits": 0,
+                    "score": 0
+                })
+        # Sort by commits (score) for overall ranking
         user_stats.sort(key=lambda x: x["score"], reverse=True)
+        # Get top 100 by commits
+        top_100_by_commits = user_stats[:limit]
+        trending_authors = [stat["username"] for stat in top_100_by_commits]
+        # Create rankings within top 100 - sorted by spaces and models count
+        spaces_rank_data = [(stat["username"], stat["spaces"]) for stat in top_100_by_commits if stat["spaces"] > 0]
+        spaces_rank_data.sort(key=lambda x: x[1], reverse=True)
+        models_rank_data = [(stat["username"], stat["models"]) for stat in top_100_by_commits if stat["models"] > 0]
+        models_rank_data.sort(key=lambda x: x[1], reverse=True)
+        return trending_authors, spaces_rank_data, models_rank_data, top_100_by_commits
     except Exception as e:
+        # Return fallback data
         fallback_authors = ["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"]
         fallback_stats = [{"username": author, "models": 0, "spaces": 0, "datasets": 0, "estimated_commits": 0, "score": 0} for author in fallback_authors]
         return fallback_authors, [(author, 0) for author in fallback_authors], [(author, 0) for author in fallback_authors], fallback_stats
     return fig
+# Fetch trending accounts with a loading spinner and progress bar
+try:
+    with st.spinner("Loading top contributors..."):
+        # Add progress tracking
+        progress_text = st.empty()
+        progress_bar = st.progress(0)
+        progress_text.text("Fetching repository data...")
+        progress_bar.progress(0.3)
+        # Get the data
+        trending_accounts, top_owners_spaces, top_owners_models, user_stats = get_trending_accounts_with_commits(limit=100)
+        # Complete progress
+        progress_bar.progress(1.0)
+        progress_text.text("Loading complete!")
+        time.sleep(0.5)
+        # Clear progress indicators
+        progress_text.empty()
+        progress_bar.empty()
+except Exception as e:
+    st.error(f"Error loading trending accounts: {str(e)}")
+    # Use fallback data
+    trending_accounts = ["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"]
+    top_owners_spaces = [(author, 0) for author in trending_accounts]
+    top_owners_models = [(author, 0) for author in trending_accounts]
+    user_stats = [{"username": author, "models": 0, "spaces": 0, "datasets": 0, "estimated_commits": 0, "score": 0} for author in trending_accounts]
 # Sidebar
 with st.sidebar:
     # Create tabs for rankings
     tab1, tab2 = st.tabs([
+        "Top 100 by Commits",
+        "Space/Model Rankings"
     ])
     with tab1:
+        # Show combined trending accounts list by commits
+        st.markdown('<div class="subheader"><h3>🔥 Top 100 Contributors by Commits</h3></div>', unsafe_allow_html=True)
+        st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">Ranked by estimated total commit count</p>', unsafe_allow_html=True)
         # Create a data frame for the table
         if user_stats:
                 overall_data.append([
                     f"{rank_display}{stat['username']}",
+                    str(stat['estimated_commits']),
                     str(stat['models']),
                     str(stat['spaces']),
+                    str(stat['datasets'])
                 ])
             ranking_data_overall = pd.DataFrame(
                 overall_data,
+                columns=["Contributor", "Est. Commits", "Models", "Spaces", "Datasets"]
             )
             ranking_data_overall.index = ranking_data_overall.index + 1  # Start index from 1 for ranking
                 height=900,  # 약 30행 정도 보이도록 픽셀 단위 높이 설정
                 column_config={
                     "Contributor": st.column_config.TextColumn("Contributor"),
+                    "Est. Commits": st.column_config.TextColumn("Est. Commits"),
                     "Models": st.column_config.TextColumn("Models"),
                     "Spaces": st.column_config.TextColumn("Spaces"),
+                    "Datasets": st.column_config.TextColumn("Datasets")
                 },
                 use_container_width=True,
                 hide_index=False
             )
     with tab2:
+        # Show accounts sorted by Spaces count (within top 100 by commits)
+        st.markdown('<div class="subheader"><h3>🚀 Spaces Ranking (Top 100 Contributors)</h3></div>', unsafe_allow_html=True)
+        st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">Ranked by spaces count among top 100 contributors by commits</p>', unsafe_allow_html=True)
         # Create a data frame for the Spaces table with medals for top 3
         if top_owners_spaces:
                 hide_index=False
             )
+        # Display accounts sorted by Models count (within top 100 by commits)
+        st.markdown('<div class="subheader"><h3>🧠 Models Ranking (Top 100 Contributors)</h3></div>', unsafe_allow_html=True)
+        st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">Ranked by models count among top 100 contributors by commits</p>', unsafe_allow_html=True)
         # Create a data frame for the Models table with medals for top 3
         if top_owners_models:
         st.markdown(f'<div style="background-color: #F3E5F5; padding: 20px; border-radius: 10px; border-left: 5px solid #9C27B0;">'
                   f'<h3 style="color: #9C27B0;">About This Analysis</h3>'
                   f'<p>This dashboard analyzes {username}\'s contributions to Hugging Face in {selected_year}, including models, datasets, and spaces.</p>'
+                  f'<p style="font-style: italic; font-size: 12px;">* Overall rankings are based on commit count. Space/Model rankings are within top 100 contributors.</p>'
                   f'</div>', unsafe_allow_html=True)
     with st.spinner(f"Fetching detailed contribution data for {username}..."):
             # Create a prominent ranking display
             st.markdown(f'<div style="background-color: #FFF8E1; padding: 20px; border-radius: 10px; border-left: 5px solid #FFC107; margin: 1rem 0;">'
+                      f'<h2 style="color: #FFA000; text-align: center;">🏆 Ranked #{overall_rank} in Top 100 Contributors by Commits</h2>'
                       f'</div>', unsafe_allow_html=True)
             # Find user in spaces ranking
                 # Process repos in chunks
                 chunk_size = 5
                 all_commit_dates = []
                 for i in range(0, len(repo_ids), chunk_size):
                     chunk = repo_ids[i:i + chunk_size]
                     with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor:
+                        future_to_repo = {}
+                        for repo_id in chunk:
+                            future = executor.submit(fetch_commits_for_repo, repo_id, kind, username, selected_year)
+                            add_script_run_ctx(future)
+                            future_to_repo[future] = repo_id
                         for future in as_completed(future_to_repo):
                             repo_commits, repo_count = future.result()
                             if repo_commits:
                                 all_commit_dates.extend(repo_commits)
                     # Update progress for all types
                     progress_per_type = 1.0 / len(types_to_fetch)
                     progress_bar.progress(overall_progress)
                 commits_by_type[kind] = all_commit_dates
+                commit_counts_by_type[kind] = len(all_commit_dates)
             except Exception as e:
                 st.warning(f"Error fetching {kind}s for {username}: {str(e)}")