Commits-Leaderboard

Running

App Files Files Community

openfree commited on 9 days ago

Commit

f949063

verified ·

1 Parent(s): ccdd80f

Update app.py

Browse files

Files changed (1) hide show

app.py +151 -76

app.py CHANGED Viewed

@@ -146,28 +146,93 @@ def cached_list_items(username, kind):
         return list(api.list_spaces(author=username))
     return []
-# Function to fetch trending accounts and create stats
-@lru_cache(maxsize=1)
-def get_trending_accounts(limit=100):
     try:
-        trending_data = {"spaces": [], "models": []}
-        # Get spaces for stats calculation
         spaces_response = requests.get("https://huggingface.co/api/spaces",
                                       params={"limit": 10000},
                                       timeout=30)
-        # Get models for stats calculation
         models_response = requests.get("https://huggingface.co/api/models",
                                       params={"limit": 10000},
                                       timeout=30)
         # Process spaces data
-        spaces_owners = []
         if spaces_response.status_code == 200:
             spaces = spaces_response.json()
-            # Count spaces by owner
             owner_counts_spaces = {}
             for space in spaces:
                 if '/' in space.get('id', ''):
@@ -178,17 +243,12 @@ def get_trending_accounts(limit=100):
                 if owner != 'None':
                     owner_counts_spaces[owner] = owner_counts_spaces.get(owner, 0) + 1
-            # Get top owners by count for spaces
-            top_owners_spaces = sorted(owner_counts_spaces.items(), key=lambda x: x[1], reverse=True)[:limit]
-            trending_data["spaces"] = top_owners_spaces
-            spaces_owners = [owner for owner, _ in top_owners_spaces]
         # Process models data
-        models_owners = []
         if models_response.status_code == 200:
             models = models_response.json()
-            # Count models by owner
             owner_counts_models = {}
             for model in models:
                 if '/' in model.get('id', ''):
@@ -199,47 +259,55 @@ def get_trending_accounts(limit=100):
                 if owner != 'None':
                     owner_counts_models[owner] = owner_counts_models.get(owner, 0) + 1
-            # Get top owners by count for models
-            top_owners_models = sorted(owner_counts_models.items(), key=lambda x: x[1], reverse=True)[:limit]
-            trending_data["models"] = top_owners_models
-            models_owners = [owner for owner, _ in top_owners_models]
-        # Combine rankings for overall trending based on appearance in both lists
-        combined_score = {}
-        for i, owner in enumerate(spaces_owners):
-            if owner not in combined_score:
-                combined_score[owner] = 0
-            combined_score[owner] += (limit - i)  # Higher rank gives more points
-        for i, owner in enumerate(models_owners):
-            if owner not in combined_score:
-                combined_score[owner] = 0
-            combined_score[owner] += (limit - i)  # Higher rank gives more points
-        # Sort by combined score
-        sorted_combined = sorted(combined_score.items(), key=lambda x: x[1], reverse=True)[:limit]
-        trending_authors = [owner for owner, _ in sorted_combined]
-        return trending_authors, trending_data["spaces"], trending_data["models"]
     except Exception as e:
         st.error(f"Error fetching trending accounts: {str(e)}")
         fallback_authors = ["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"]
-        return fallback_authors, [(author, 0) for author in fallback_authors], [(author, 0) for author in fallback_authors]
-# Rate limiting
-class RateLimiter:
-    def __init__(self, calls_per_second=10):
-        self.calls_per_second = calls_per_second
-        self.last_call = 0
-    def wait(self):
-        current_time = time.time()
-        time_since_last_call = current_time - self.last_call
-        if time_since_last_call < (1.0 / self.calls_per_second):
-            time.sleep((1.0 / self.calls_per_second) - time_since_last_call)
-        self.last_call = time.time()
-rate_limiter = RateLimiter()
 # Function to fetch commits for a repository (optimized)
 def fetch_commits_for_repo(repo_id, repo_type, username, selected_year):
@@ -699,32 +767,29 @@ def create_ranking_chart(username, overall_rank, spaces_rank, models_rank):
     return fig
 # Fetch trending accounts with a loading spinner (do this once at the beginning)
-with st.spinner("Loading trending accounts..."):
-    trending_accounts, top_owners_spaces, top_owners_models = get_trending_accounts(limit=100)
 # Sidebar
 with st.sidebar:
     st.markdown('<h1 style="text-align: center; color: #1E88E5;">👤 Contributor</h1>', unsafe_allow_html=True)
-    # Create tabs for Spaces and Models rankings - ONLY SHOWING FIRST TWO TABS
     tab1, tab2 = st.tabs([
         "Top 100 Overall",
         "Top Spaces & Models"
     ])
     with tab1:
-        # Show combined trending accounts list
-        st.markdown('<div class="subheader"><h3>🔥 Top 100 Contributors</h3></div>', unsafe_allow_html=True)
         # Create a data frame for the table
-        if trending_accounts:
-            # Create a mapping from username to Spaces and Models rankings
-            spaces_rank = {owner: idx+1 for idx, (owner, _) in enumerate(top_owners_spaces)}
-            models_rank = {owner: idx+1 for idx, (owner, _) in enumerate(top_owners_models)}
             # Create the overall ranking dataframe with trophies for top 3
             overall_data = []
-            for idx, username in enumerate(trending_accounts[:100]):
                 # Add trophy emojis for top 3
                 rank_display = ""
                 if idx == 0:
@@ -734,24 +799,29 @@ with st.sidebar:
                 elif idx == 2:
                     rank_display = "🏆 "  # Bronze trophy for 3rd place
-                # Use strings for all rankings to avoid type conversion issues
-                spaces_position = str(spaces_rank.get(username, "-"))
-                models_position = str(models_rank.get(username, "-"))
-                overall_data.append([f"{rank_display}{username}", spaces_position, models_position])
             ranking_data_overall = pd.DataFrame(
                 overall_data,
-                columns=["Contributor", "Spaces Rank", "Models Rank"]
             )
             ranking_data_overall.index = ranking_data_overall.index + 1  # Start index from 1 for ranking
             st.dataframe(
                 ranking_data_overall,
-                height=900,  # 약 30행 정도 보이도록 픽셀 단위 높이 설정 (필요에 따라 조정 가능)
                 column_config={
                     "Contributor": st.column_config.TextColumn("Contributor"),
-                    "Spaces Rank": st.column_config.TextColumn("Spaces Rank"),
-                    "Models Rank": st.column_config.TextColumn("Models Rank")
                 },
                 use_container_width=True,
                 hide_index=False
@@ -776,7 +846,7 @@ with st.sidebar:
                 spaces_data.append([f"{rank_display}{owner}", count])
-            ranking_data_spaces = pd.DataFrame(spaces_data, columns=["Contributor", "Spaces Count(Top 500 positions)"])
             ranking_data_spaces.index = ranking_data_spaces.index + 1  # Start index from 1 for ranking
             st.dataframe(
@@ -807,7 +877,7 @@ with st.sidebar:
                 models_data.append([f"{rank_display}{owner}", count])
-            ranking_data_models = pd.DataFrame(models_data, columns=["Contributor", "Models Count(Top 500 positions)"])
             ranking_data_models.index = ranking_data_models.index + 1  # Start index from 1 for ranking
             st.dataframe(
@@ -862,12 +932,17 @@ with st.sidebar:
 st.markdown(f'<h1 style="text-align: center; color: #1E88E5; margin-bottom: 2rem;">🤗 Hugging Face Contributions</h1>', unsafe_allow_html=True)
 if username:
     # Create a header card with contributor info
     header_col1, header_col2 = st.columns([1, 2])
     with header_col1:
         st.markdown(f'<div style="background-color: #E3F2FD; padding: 20px; border-radius: 10px; border-left: 5px solid #1E88E5;">'
                   f'<h2 style="color: #1E88E5;">👤 {username}</h2>'
                   f'<p style="font-size: 16px;">Analyzing contributions for {selected_year}</p>'
                   f'<p><a href="https://huggingface.co/{username}" target="_blank" style="color: #1E88E5; font-weight: bold;">View Profile</a></p>'
                   f'</div>', unsafe_allow_html=True)
@@ -876,10 +951,10 @@ if username:
         st.markdown(f'<div style="background-color: #F3E5F5; padding: 20px; border-radius: 10px; border-left: 5px solid #9C27B0;">'
                   f'<h3 style="color: #9C27B0;">About This Analysis</h3>'
                   f'<p>This dashboard analyzes {username}\'s contributions to Hugging Face in {selected_year}, including models, datasets, and spaces.</p>'
-                  f'<p style="font-style: italic; font-size: 12px;">* Some metrics like follower growth are simulated for visualization purposes.</p>'
                   f'</div>', unsafe_allow_html=True)
-    with st.spinner(f"Fetching contribution data for {username}..."):
         # Initialize variables for tracking
         overall_rank = None
         spaces_rank = None
@@ -1177,4 +1252,4 @@ else:
               f'<img src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg" style="width: 200px; margin-bottom: 30px;">'
               f'<h2>Welcome to Hugging Face Contributions Dashboard</h2>'
               f'<p style="font-size: 1.2rem;">Please select a contributor from the sidebar to view their activity.</p>'
-              f'</div>', unsafe_allow_html=True)

         return list(api.list_spaces(author=username))
     return []
+# Rate limiting
+class RateLimiter:
+    def __init__(self, calls_per_second=10):
+        self.calls_per_second = calls_per_second
+        self.last_call = 0
+    def wait(self):
+        current_time = time.time()
+        time_since_last_call = current_time - self.last_call
+        if time_since_last_call < (1.0 / self.calls_per_second):
+            time.sleep((1.0 / self.calls_per_second) - time_since_last_call)
+        self.last_call = time.time()
+rate_limiter = RateLimiter()
+# Function to fetch quick commit stats for a user (optimized for ranking)
+@st.cache_data(ttl=3600)  # Cache for 1 hour
+def get_user_commit_stats(username):
+    """Fetch basic commit statistics for a user"""
     try:
+        total_commits = 0
+        items_count = {"model": 0, "dataset": 0, "space": 0}
+        for kind in ["model", "dataset", "space"]:
+            try:
+                items = cached_list_items(username, kind)
+                items_count[kind] = len(items)
+                # Sample a few repos to estimate commit activity
+                sample_size = min(5, len(items))  # Check up to 5 repos per type
+                if sample_size > 0:
+                    sample_items = items[:sample_size]
+                    for item in sample_items:
+                        try:
+                            rate_limiter.wait()
+                            commits = cached_list_commits(item.id, kind)
+                            total_commits += len(commits)
+                        except:
+                            pass
+                    # Estimate total commits based on sample
+                    if sample_size < len(items):
+                        total_commits = int(total_commits * len(items) / sample_size)
+            except:
+                pass
+        # Calculate contribution score
+        # Weight: Models=3, Spaces=2, Datasets=1, Commits=0.1
+        score = (items_count["model"] * 3 +
+                items_count["space"] * 2 +
+                items_count["dataset"] * 1 +
+                total_commits * 0.1)
+        return {
+            "username": username,
+            "models": items_count["model"],
+            "spaces": items_count["space"],
+            "datasets": items_count["dataset"],
+            "estimated_commits": total_commits,
+            "score": score
+        }
+    except Exception as e:
+        return {
+            "username": username,
+            "models": 0,
+            "spaces": 0,
+            "datasets": 0,
+            "estimated_commits": 0,
+            "score": 0
+        }
+# Enhanced function to get trending accounts with commit-based ranking
+@st.cache_data(ttl=3600)  # Cache for 1 hour
+def get_trending_accounts_with_commits(limit=100):
+    try:
+        # First, get top accounts by model/space count
         spaces_response = requests.get("https://huggingface.co/api/spaces",
                                       params={"limit": 10000},
                                       timeout=30)
         models_response = requests.get("https://huggingface.co/api/models",
                                       params={"limit": 10000},
                                       timeout=30)
         # Process spaces data
+        top_space_owners = []
         if spaces_response.status_code == 200:
             spaces = spaces_response.json()
             owner_counts_spaces = {}
             for space in spaces:
                 if '/' in space.get('id', ''):
                 if owner != 'None':
                     owner_counts_spaces[owner] = owner_counts_spaces.get(owner, 0) + 1
+            top_space_owners = sorted(owner_counts_spaces.items(), key=lambda x: x[1], reverse=True)[:limit]
         # Process models data
+        top_model_owners = []
         if models_response.status_code == 200:
             models = models_response.json()
             owner_counts_models = {}
             for model in models:
                 if '/' in model.get('id', ''):
                 if owner != 'None':
                     owner_counts_models[owner] = owner_counts_models.get(owner, 0) + 1
+            top_model_owners = sorted(owner_counts_models.items(), key=lambda x: x[1], reverse=True)[:limit]
+        # Get unique users from top 100 of both lists
+        unique_users = set()
+        for owner, _ in top_space_owners[:100]:
+            unique_users.add(owner)
+        for owner, _ in top_model_owners[:100]:
+            unique_users.add(owner)
+        # Create progress bar for fetching commit stats
+        progress_text = st.empty()
+        progress_bar = st.progress(0)
+        progress_text.text(f"Analyzing top contributors... (0/{len(unique_users)})")
+        # Fetch commit stats for all unique users
+        user_stats = []
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            future_to_user = {executor.submit(get_user_commit_stats, user): user for user in unique_users}
+            completed = 0
+            for future in as_completed(future_to_user):
+                stats = future.result()
+                if stats["score"] > 0:  # Only include users with some activity
+                    user_stats.append(stats)
+                completed += 1
+                progress = completed / len(unique_users)
+                progress_bar.progress(progress)
+                progress_text.text(f"Analyzing top contributors... ({completed}/{len(unique_users)})")
+        # Clear progress indicators
+        progress_text.empty()
+        progress_bar.empty()
+        # Sort by score (combination of commits and repo counts)
+        user_stats.sort(key=lambda x: x["score"], reverse=True)
+        # Extract rankings
+        trending_authors = [stat["username"] for stat in user_stats[:limit]]
+        # Create detailed rankings for display
+        spaces_rank_data = [(stat["username"], stat["spaces"]) for stat in user_stats if stat["spaces"] > 0][:limit]
+        models_rank_data = [(stat["username"], stat["models"]) for stat in user_stats if stat["models"] > 0][:limit]
+        return trending_authors, spaces_rank_data, models_rank_data, user_stats[:limit]
     except Exception as e:
         st.error(f"Error fetching trending accounts: {str(e)}")
         fallback_authors = ["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"]
+        fallback_stats = [{"username": author, "models": 0, "spaces": 0, "datasets": 0, "estimated_commits": 0, "score": 0} for author in fallback_authors]
+        return fallback_authors, [(author, 0) for author in fallback_authors], [(author, 0) for author in fallback_authors], fallback_stats
 # Function to fetch commits for a repository (optimized)
 def fetch_commits_for_repo(repo_id, repo_type, username, selected_year):
     return fig
 # Fetch trending accounts with a loading spinner (do this once at the beginning)
+with st.spinner("Loading and analyzing top contributors... This may take a few moments."):
+    trending_accounts, top_owners_spaces, top_owners_models, user_stats = get_trending_accounts_with_commits(limit=100)
 # Sidebar
 with st.sidebar:
     st.markdown('<h1 style="text-align: center; color: #1E88E5;">👤 Contributor</h1>', unsafe_allow_html=True)
+    # Create tabs for rankings
     tab1, tab2 = st.tabs([
         "Top 100 Overall",
         "Top Spaces & Models"
     ])
     with tab1:
+        # Show combined trending accounts list with commit-based ranking
+        st.markdown('<div class="subheader"><h3>🔥 Top 100 Contributors by Score</h3></div>', unsafe_allow_html=True)
+        st.markdown('<p style="font-size: 0.9rem; color: #666; margin-bottom: 10px;">Ranked by contribution score (Models×3 + Spaces×2 + Datasets×1 + Commits×0.1)</p>', unsafe_allow_html=True)
         # Create a data frame for the table
+        if user_stats:
             # Create the overall ranking dataframe with trophies for top 3
             overall_data = []
+            for idx, stat in enumerate(user_stats[:100]):
                 # Add trophy emojis for top 3
                 rank_display = ""
                 if idx == 0:
                 elif idx == 2:
                     rank_display = "🏆 "  # Bronze trophy for 3rd place
+                overall_data.append([
+                    f"{rank_display}{stat['username']}",
+                    f"{stat['score']:.1f}",
+                    str(stat['estimated_commits']),
+                    str(stat['models']),
+                    str(stat['spaces'])
+                ])
             ranking_data_overall = pd.DataFrame(
                 overall_data,
+                columns=["Contributor", "Score", "Est. Commits", "Models", "Spaces"]
             )
             ranking_data_overall.index = ranking_data_overall.index + 1  # Start index from 1 for ranking
             st.dataframe(
                 ranking_data_overall,
+                height=900,  # 약 30행 정도 보이도록 픽셀 단위 높이 설정
                 column_config={
                     "Contributor": st.column_config.TextColumn("Contributor"),
+                    "Score": st.column_config.TextColumn("Score"),
+                    "Est. Commits": st.column_config.TextColumn("Est. Commits"),
+                    "Models": st.column_config.TextColumn("Models"),
+                    "Spaces": st.column_config.TextColumn("Spaces")
                 },
                 use_container_width=True,
                 hide_index=False
                 spaces_data.append([f"{rank_display}{owner}", count])
+            ranking_data_spaces = pd.DataFrame(spaces_data, columns=["Contributor", "Spaces Count"])
             ranking_data_spaces.index = ranking_data_spaces.index + 1  # Start index from 1 for ranking
             st.dataframe(
                 models_data.append([f"{rank_display}{owner}", count])
+            ranking_data_models = pd.DataFrame(models_data, columns=["Contributor", "Models Count"])
             ranking_data_models.index = ranking_data_models.index + 1  # Start index from 1 for ranking
             st.dataframe(
 st.markdown(f'<h1 style="text-align: center; color: #1E88E5; margin-bottom: 2rem;">🤗 Hugging Face Contributions</h1>', unsafe_allow_html=True)
 if username:
+    # Find user's stats in the pre-calculated data
+    user_stat = next((stat for stat in user_stats if stat["username"] == username), None)
     # Create a header card with contributor info
     header_col1, header_col2 = st.columns([1, 2])
     with header_col1:
+        score_display = f"Score: {user_stat['score']:.1f}" if user_stat else "Score: N/A"
         st.markdown(f'<div style="background-color: #E3F2FD; padding: 20px; border-radius: 10px; border-left: 5px solid #1E88E5;">'
                   f'<h2 style="color: #1E88E5;">👤 {username}</h2>'
                   f'<p style="font-size: 16px;">Analyzing contributions for {selected_year}</p>'
+                  f'<p style="font-size: 14px; font-weight: bold;">{score_display}</p>'
                   f'<p><a href="https://huggingface.co/{username}" target="_blank" style="color: #1E88E5; font-weight: bold;">View Profile</a></p>'
                   f'</div>', unsafe_allow_html=True)
         st.markdown(f'<div style="background-color: #F3E5F5; padding: 20px; border-radius: 10px; border-left: 5px solid #9C27B0;">'
                   f'<h3 style="color: #9C27B0;">About This Analysis</h3>'
                   f'<p>This dashboard analyzes {username}\'s contributions to Hugging Face in {selected_year}, including models, datasets, and spaces.</p>'
+                  f'<p style="font-style: italic; font-size: 12px;">* Rankings are based on contribution scores combining repos and commit activity.</p>'
                   f'</div>', unsafe_allow_html=True)
+    with st.spinner(f"Fetching detailed contribution data for {username}..."):
         # Initialize variables for tracking
         overall_rank = None
         spaces_rank = None
               f'<img src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg" style="width: 200px; margin-bottom: 30px;">'
               f'<h2>Welcome to Hugging Face Contributions Dashboard</h2>'
               f'<p style="font-size: 1.2rem;">Please select a contributor from the sidebar to view their activity.</p>'
+              f'</div>', unsafe_allow_html=True)