Spaces:
Running
Running
import streamlit as st | |
from huggingface_hub import HfApi | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from datetime import datetime | |
from concurrent.futures import ThreadPoolExecutor, as_completed | |
from functools import lru_cache | |
import time | |
import requests | |
from collections import Counter | |
st.set_page_config(page_title="HF Contributions", layout="wide", initial_sidebar_state="expanded") | |
# Set custom sidebar width - UPDATED to 40% of the screen | |
st.markdown(""" | |
<style> | |
[data-testid="stSidebar"] { | |
min-width: 40vw !important; | |
max-width: 40vw !important; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
api = HfApi() | |
# Cache for API responses | |
def cached_repo_info(repo_id, repo_type): | |
return api.repo_info(repo_id=repo_id, repo_type=repo_type) | |
def cached_list_commits(repo_id, repo_type): | |
return list(api.list_repo_commits(repo_id=repo_id, repo_type=repo_type)) | |
def cached_list_items(username, kind): | |
if kind == "model": | |
return list(api.list_models(author=username)) | |
elif kind == "dataset": | |
return list(api.list_datasets(author=username)) | |
elif kind == "space": | |
return list(api.list_spaces(author=username)) | |
return [] | |
# Function to fetch trending accounts and create stats | |
def get_trending_accounts(limit=100): | |
try: | |
trending_data = {"spaces": [], "models": []} | |
# Get spaces for stats calculation | |
spaces_response = requests.get("https://huggingface.co/api/spaces", | |
params={"limit": 10000}, | |
timeout=30) | |
# Get models for stats calculation | |
models_response = requests.get("https://huggingface.co/api/models", | |
params={"limit": 10000}, | |
timeout=30) | |
# Process spaces data | |
spaces_owners = [] | |
if spaces_response.status_code == 200: | |
spaces = spaces_response.json() | |
# Count spaces by owner | |
owner_counts_spaces = {} | |
for space in spaces: | |
if '/' in space.get('id', ''): | |
owner, _ = space.get('id', '').split('/', 1) | |
else: | |
owner = space.get('owner', '') | |
if owner != 'None': | |
owner_counts_spaces[owner] = owner_counts_spaces.get(owner, 0) + 1 | |
# Get top owners by count for spaces | |
top_owners_spaces = sorted(owner_counts_spaces.items(), key=lambda x: x[1], reverse=True)[:limit] | |
trending_data["spaces"] = top_owners_spaces | |
spaces_owners = [owner for owner, _ in top_owners_spaces] | |
# Process models data | |
models_owners = [] | |
if models_response.status_code == 200: | |
models = models_response.json() | |
# Count models by owner | |
owner_counts_models = {} | |
for model in models: | |
if '/' in model.get('id', ''): | |
owner, _ = model.get('id', '').split('/', 1) | |
else: | |
owner = model.get('owner', '') | |
if owner != 'None': | |
owner_counts_models[owner] = owner_counts_models.get(owner, 0) + 1 | |
# Get top owners by count for models | |
top_owners_models = sorted(owner_counts_models.items(), key=lambda x: x[1], reverse=True)[:limit] | |
trending_data["models"] = top_owners_models | |
models_owners = [owner for owner, _ in top_owners_models] | |
# Combine rankings for overall trending based on appearance in both lists | |
combined_score = {} | |
for i, owner in enumerate(spaces_owners): | |
if owner not in combined_score: | |
combined_score[owner] = 0 | |
combined_score[owner] += (limit - i) # Higher rank gives more points | |
for i, owner in enumerate(models_owners): | |
if owner not in combined_score: | |
combined_score[owner] = 0 | |
combined_score[owner] += (limit - i) # Higher rank gives more points | |
# Sort by combined score | |
sorted_combined = sorted(combined_score.items(), key=lambda x: x[1], reverse=True)[:limit] | |
trending_authors = [owner for owner, _ in sorted_combined] | |
return trending_authors, trending_data["spaces"], trending_data["models"] | |
except Exception as e: | |
st.error(f"Error fetching trending accounts: {str(e)}") | |
fallback_authors = ["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"] | |
return fallback_authors, [(author, 0) for author in fallback_authors], [(author, 0) for author in fallback_authors] | |
# Rate limiting | |
class RateLimiter: | |
def __init__(self, calls_per_second=10): | |
self.calls_per_second = calls_per_second | |
self.last_call = 0 | |
def wait(self): | |
current_time = time.time() | |
time_since_last_call = current_time - self.last_call | |
if time_since_last_call < (1.0 / self.calls_per_second): | |
time.sleep((1.0 / self.calls_per_second) - time_since_last_call) | |
self.last_call = time.time() | |
rate_limiter = RateLimiter() | |
# Function to fetch commits for a repository (optimized) | |
def fetch_commits_for_repo(repo_id, repo_type, username, selected_year): | |
try: | |
rate_limiter.wait() | |
# Skip private/gated repos upfront | |
repo_info = cached_repo_info(repo_id, repo_type) | |
if repo_info.private or (hasattr(repo_info, 'gated') and repo_info.gated): | |
return [], [] | |
# Get initial commit date | |
initial_commit_date = pd.to_datetime(repo_info.created_at).tz_localize(None).date() | |
commit_dates = [] | |
commit_count = 0 | |
# Add initial commit if it's from the selected year | |
if initial_commit_date.year == selected_year: | |
commit_dates.append(initial_commit_date) | |
commit_count += 1 | |
# Get all commits | |
commits = cached_list_commits(repo_id, repo_type) | |
for commit in commits: | |
commit_date = pd.to_datetime(commit.created_at).tz_localize(None).date() | |
if commit_date.year == selected_year: | |
commit_dates.append(commit_date) | |
commit_count += 1 | |
return commit_dates, commit_count | |
except Exception: | |
return [], 0 | |
# Function to get commit events for a user (optimized) | |
def get_commit_events(username, kind=None, selected_year=None): | |
commit_dates = [] | |
items_with_type = [] | |
kinds = [kind] if kind else ["model", "dataset", "space"] | |
for k in kinds: | |
try: | |
items = cached_list_items(username, k) | |
items_with_type.extend((item, k) for item in items) | |
repo_ids = [item.id for item in items] | |
# Optimized parallel fetch with chunking | |
chunk_size = 5 # Process 5 repos at a time | |
for i in range(0, len(repo_ids), chunk_size): | |
chunk = repo_ids[i:i + chunk_size] | |
with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor: | |
future_to_repo = { | |
executor.submit(fetch_commits_for_repo, repo_id, k, username, selected_year): repo_id | |
for repo_id in chunk | |
} | |
for future in as_completed(future_to_repo): | |
repo_commits, repo_count = future.result() | |
if repo_commits: # Only extend if we got commits | |
commit_dates.extend(repo_commits) | |
except Exception as e: | |
st.warning(f"Error fetching {k}s for {username}: {str(e)}") | |
# Create DataFrame with all commits | |
df = pd.DataFrame(commit_dates, columns=["date"]) | |
if not df.empty: | |
df = df.drop_duplicates() # Remove any duplicate dates | |
return df, items_with_type | |
# Calendar heatmap function (optimized) | |
def make_calendar_heatmap(df, title, year): | |
if df.empty: | |
st.info(f"No {title.lower()} found for {year}.") | |
return | |
# Optimize DataFrame operations | |
df["count"] = 1 | |
df = df.groupby("date", as_index=False).sum() | |
df["date"] = pd.to_datetime(df["date"]) | |
# Create date range more efficiently | |
start = pd.Timestamp(f"{year}-01-01") | |
end = pd.Timestamp(f"{year}-12-31") | |
all_days = pd.date_range(start=start, end=end) | |
# Optimize DataFrame creation and merging | |
heatmap_data = pd.DataFrame({"date": all_days, "count": 0}) | |
heatmap_data = heatmap_data.merge(df, on="date", how="left", suffixes=("", "_y")) | |
heatmap_data["count"] = heatmap_data["count_y"].fillna(0) | |
heatmap_data = heatmap_data.drop("count_y", axis=1) | |
# Calculate week and day of week more efficiently | |
heatmap_data["dow"] = heatmap_data["date"].dt.dayofweek | |
heatmap_data["week"] = (heatmap_data["date"] - start).dt.days // 7 | |
# Create pivot table more efficiently | |
pivot = heatmap_data.pivot(index="dow", columns="week", values="count").fillna(0) | |
# Optimize month labels calculation | |
month_labels = pd.date_range(start, end, freq="MS").strftime("%b") | |
month_positions = pd.date_range(start, end, freq="MS").map(lambda x: (x - start).days // 7) | |
# Create custom colormap with specific boundaries | |
from matplotlib.colors import ListedColormap, BoundaryNorm | |
colors = ['#ebedf0', '#9be9a8', '#40c463', '#30a14e', '#216e39'] # GitHub-style green colors | |
bounds = [0, 1, 3, 11, 31, float('inf')] # Boundaries for color transitions | |
cmap = ListedColormap(colors) | |
norm = BoundaryNorm(bounds, cmap.N) | |
# Create plot more efficiently | |
fig, ax = plt.subplots(figsize=(12, 1.2)) | |
# Convert pivot values to integers to ensure proper color mapping | |
pivot_int = pivot.astype(int) | |
# Create heatmap with explicit vmin and vmax | |
sns.heatmap(pivot_int, ax=ax, cmap=cmap, norm=norm, linewidths=0.5, linecolor="white", | |
square=True, cbar=False, yticklabels=["M", "T", "W", "T", "F", "S", "S"]) | |
ax.set_title(f"{title}", fontsize=12, pad=10) | |
ax.set_xlabel("") | |
ax.set_ylabel("") | |
ax.set_xticks(month_positions) | |
ax.set_xticklabels(month_labels, fontsize=8) | |
ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=8) | |
st.pyplot(fig) | |
# Function to create a fancy contribution radar chart | |
def create_contribution_radar(username, models_count, spaces_count, datasets_count, commits_count): | |
# Create radar chart for contribution metrics | |
categories = ['Models', 'Spaces', 'Datasets', 'Activity'] | |
values = [models_count, spaces_count, datasets_count, commits_count] | |
# Normalize values for better visualization | |
max_vals = [100, 100, 50, 500] # Reasonable max values for each category | |
normalized = [min(v/m, 1.0) for v, m in zip(values, max_vals)] | |
# Create radar chart | |
angles = np.linspace(0, 2*np.pi, len(categories), endpoint=False).tolist() | |
angles += angles[:1] # Close the loop | |
normalized += normalized[:1] # Close the loop | |
fig, ax = plt.subplots(figsize=(6, 6), subplot_kw={'polar': True}) | |
# Add background grid | |
ax.set_theta_offset(np.pi / 2) | |
ax.set_theta_direction(-1) | |
ax.set_thetagrids(np.degrees(angles[:-1]), categories) | |
# Draw the chart | |
ax.fill(angles, normalized, color='#4CAF50', alpha=0.25) | |
ax.plot(angles, normalized, color='#4CAF50', linewidth=2) | |
# Add value labels | |
for i, val in enumerate(values): | |
angle = angles[i] | |
x = normalized[i] * np.cos(angle) | |
y = normalized[i] * np.sin(angle) | |
ax.text(angle, normalized[i] + 0.05, str(val), | |
ha='center', va='center', fontsize=10, | |
fontweight='bold') | |
ax.set_title(f"{username}'s Contribution Profile", fontsize=15, pad=20) | |
return fig | |
# Function to create contribution distribution pie chart | |
def create_contribution_pie(model_commits, dataset_commits, space_commits): | |
labels = ['Models', 'Datasets', 'Spaces'] | |
sizes = [model_commits, dataset_commits, space_commits] | |
# Filter out zero values | |
filtered_labels = [label for label, size in zip(labels, sizes) if size > 0] | |
filtered_sizes = [size for size in sizes if size > 0] | |
if not filtered_sizes: | |
return None # No data to show | |
fig, ax = plt.subplots(figsize=(6, 6)) | |
colors = ['#FF9800', '#2196F3', '#4CAF50'] | |
filtered_colors = [color for color, size in zip(colors, sizes) if size > 0] | |
# Create exploded pie chart | |
explode = [0.05] * len(filtered_sizes) # Explode all slices slightly | |
ax.pie(filtered_sizes, labels=filtered_labels, colors=filtered_colors, | |
autopct='%1.1f%%', startangle=90, shadow=True, explode=explode) | |
ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle | |
ax.set_title('Distribution of Contributions by Type', fontsize=15) | |
return fig | |
# Function to create monthly activity chart | |
def create_monthly_activity(df, year): | |
if df.empty: | |
return None | |
# Aggregate by month | |
df['date'] = pd.to_datetime(df['date']) | |
df['month'] = df['date'].dt.strftime('%b') | |
monthly_counts = df.groupby('month')['date'].count().reindex( | |
pd.date_range(start=f'{year}-01-01', end=f'{year}-12-31', freq='MS').strftime('%b') | |
).fillna(0) | |
# Create bar chart | |
fig, ax = plt.subplots(figsize=(12, 5)) | |
months = monthly_counts.index | |
counts = monthly_counts.values | |
bars = ax.bar(months, counts, color='#2196F3') | |
# Highlight the month with most activity | |
if counts.max() > 0: | |
max_idx = counts.argmax() | |
bars[max_idx].set_color('#FF5722') | |
# Add labels and styling | |
ax.set_title(f'Monthly Activity in {year}', fontsize=15) | |
ax.set_xlabel('Month', fontsize=12) | |
ax.set_ylabel('Number of Contributions', fontsize=12) | |
# Add value labels on top of bars | |
for i, count in enumerate(counts): | |
if count > 0: | |
ax.text(i, count + 0.5, str(int(count)), ha='center', fontsize=10) | |
# Add grid for better readability | |
ax.grid(axis='y', linestyle='--', alpha=0.7) | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
return fig | |
# Function to render follower growth simulation | |
def simulate_follower_data(username, spaces_count, models_count, total_commits): | |
# Simulate follower growth based on contribution metrics | |
# This is just a simulation for visual purposes | |
import numpy as np | |
from datetime import timedelta | |
# Start with a base number of followers proportional to contribution metrics | |
base_followers = max(10, int((spaces_count * 2 + models_count * 3 + total_commits/10) / 6)) | |
# Generate timestamps for the past year | |
end_date = datetime.now() | |
start_date = end_date - timedelta(days=365) | |
dates = pd.date_range(start=start_date, end=end_date, freq='W') # Weekly data points | |
# Generate follower growth with some randomness | |
followers = [] | |
current = base_followers / 2 # Start from half the base | |
for i in range(len(dates)): | |
growth_factor = 1 + (np.random.random() * 0.1) # Random growth between 0% and 10% | |
current = current * growth_factor | |
followers.append(int(current)) | |
# Ensure end value matches our base_followers estimate | |
followers[-1] = base_followers | |
# Create the chart | |
fig, ax = plt.subplots(figsize=(12, 5)) | |
ax.plot(dates, followers, marker='o', linestyle='-', color='#9C27B0', markersize=5) | |
# Add styling | |
ax.set_title(f"Estimated Follower Growth for {username}", fontsize=15) | |
ax.set_xlabel("Date", fontsize=12) | |
ax.set_ylabel("Followers", fontsize=12) | |
# Add grid for better readability | |
ax.grid(True, linestyle='--', alpha=0.7) | |
# Format date axis | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
return fig | |
# Function to create ranking position visualization | |
def create_ranking_chart(username, overall_rank, spaces_rank, models_rank): | |
if not (overall_rank or spaces_rank or models_rank): | |
return None | |
# Create a horizontal bar chart for rankings | |
fig, ax = plt.subplots(figsize=(10, 4)) | |
categories = [] | |
positions = [] | |
colors = [] | |
if overall_rank: | |
categories.append('Overall') | |
positions.append(101 - overall_rank) # Invert rank for visualization (higher is better) | |
colors.append('#673AB7') | |
if spaces_rank: | |
categories.append('Spaces') | |
positions.append(101 - spaces_rank) | |
colors.append('#2196F3') | |
if models_rank: | |
categories.append('Models') | |
positions.append(101 - models_rank) | |
colors.append('#FF9800') | |
# Create horizontal bars | |
bars = ax.barh(categories, positions, color=colors, alpha=0.7) | |
# Add rank values as text | |
for i, bar in enumerate(bars): | |
rank_val = 0 | |
if categories[i] == 'Overall': rank_val = overall_rank | |
elif categories[i] == 'Spaces': rank_val = spaces_rank | |
elif categories[i] == 'Models': rank_val = models_rank | |
ax.text(bar.get_width() + 1, bar.get_y() + bar.get_height()/2, | |
f'Rank #{rank_val}', va='center', fontsize=10, fontweight='bold') | |
# Set chart properties | |
ax.set_xlim(0, 100) | |
ax.set_title(f"Ranking Positions for {username} (Top 100)", fontsize=15) | |
ax.set_xlabel("Percentile (higher is better)", fontsize=12) | |
# Add a vertical line at 90th percentile to highlight top 10 | |
ax.axvline(x=90, color='red', linestyle='--', alpha=0.5) | |
ax.text(91, 0.5, 'Top 10', color='red', fontsize=10, rotation=90, va='center') | |
# Invert x-axis to show ranking position more intuitively | |
ax.invert_xaxis() | |
plt.tight_layout() | |
return fig | |
# Import additional libraries for advanced visualizations | |
import numpy as np | |
# Fetch trending accounts with a loading spinner (do this once at the beginning) | |
with st.spinner("Loading trending accounts..."): | |
trending_accounts, top_owners_spaces, top_owners_models = get_trending_accounts(limit=100) | |
# Sidebar | |
with st.sidebar: | |
st.title("π€ Contributor") | |
# Create tabs for Spaces and Models rankings - ONLY SHOWING FIRST TWO TABS | |
tab1, tab2 = st.tabs([ | |
"Top 100 Overall Contributors", | |
"Top 100 by Spaces & Models" | |
]) | |
with tab1: | |
# Show combined trending accounts list | |
st.subheader("π₯ Top 100 Overall Contributors") | |
# Display the top 100 accounts list | |
st.markdown("### Combined Contributors Ranking") | |
# Create a data frame for the table | |
if trending_accounts: | |
# Create a mapping from username to Spaces and Models rankings | |
spaces_rank = {owner: idx+1 for idx, (owner, _) in enumerate(top_owners_spaces)} | |
models_rank = {owner: idx+1 for idx, (owner, _) in enumerate(top_owners_models)} | |
# Create the overall ranking dataframe | |
overall_data = [] | |
for idx, username in enumerate(trending_accounts[:100]): | |
# Use strings for all rankings to avoid type conversion issues | |
spaces_position = str(spaces_rank.get(username, "-")) | |
models_position = str(models_rank.get(username, "-")) | |
overall_data.append([username, spaces_position, models_position]) | |
ranking_data_overall = pd.DataFrame( | |
overall_data, | |
columns=["Contributor", "Spaces Rank", "Models Rank"] | |
) | |
ranking_data_overall.index = ranking_data_overall.index + 1 # Start index from 1 for ranking | |
st.dataframe( | |
ranking_data_overall, | |
column_config={ | |
"Contributor": st.column_config.TextColumn("Contributor"), | |
"Spaces Rank": st.column_config.TextColumn("Spaces Rank (top 100)"), | |
"Models Rank": st.column_config.TextColumn("Models Rank (top 100)") | |
}, | |
use_container_width=True, | |
hide_index=False | |
) | |
with tab2: | |
# Show trending accounts list by Spaces | |
st.subheader("π Top 100 by Spaces & Models") | |
# Display the top 100 accounts list | |
st.markdown("### Spaces Contributors Ranking") | |
# Create a data frame for the table | |
if top_owners_spaces: | |
ranking_data_spaces = pd.DataFrame(top_owners_spaces[:100], columns=["Contributor", "Spaces Count"]) | |
ranking_data_spaces.index = ranking_data_spaces.index + 1 # Start index from 1 for ranking | |
st.dataframe( | |
ranking_data_spaces, | |
column_config={ | |
"Contributor": st.column_config.TextColumn("Contributor"), | |
"Spaces Count": st.column_config.NumberColumn("Spaces Count (based on top 500 spaces)", format="%d") | |
}, | |
use_container_width=True, | |
hide_index=False | |
) | |
# Add stats expander with visualization | |
with st.expander("View Top 30 Spaces Contributors Chart"): | |
# Create a bar chart for top 30 contributors | |
if top_owners_spaces: | |
chart_data = pd.DataFrame(top_owners_spaces[:30], columns=["Owner", "Spaces Count"]) | |
fig, ax = plt.subplots(figsize=(10, 8)) | |
bars = ax.barh(chart_data["Owner"], chart_data["Spaces Count"]) | |
# Add color gradient to bars | |
for i, bar in enumerate(bars): | |
bar.set_color(plt.cm.viridis(i/len(bars))) | |
ax.set_title("Top 30 Contributors by Number of Spaces") | |
ax.set_xlabel("Number of Spaces") | |
plt.tight_layout() | |
st.pyplot(fig) | |
# Display the top 100 Models accounts list (ADDED SECTION) | |
st.markdown("### Models Contributors Ranking") | |
# Create a data frame for the Models table | |
if top_owners_models: | |
ranking_data_models = pd.DataFrame(top_owners_models[:100], columns=["Contributor", "Models Count"]) | |
ranking_data_models.index = ranking_data_models.index + 1 # Start index from 1 for ranking | |
st.dataframe( | |
ranking_data_models, | |
column_config={ | |
"Contributor": st.column_config.TextColumn("Contributor"), | |
"Models Count": st.column_config.NumberColumn("Models Count (based on top 500 models)", format="%d") | |
}, | |
use_container_width=True, | |
hide_index=False | |
) | |
# Add stats expander with visualization for Models (ADDED SECTION) | |
with st.expander("View Top 30 Models Contributors Chart"): | |
# Create a bar chart for top 30 models contributors | |
if top_owners_models: | |
chart_data = pd.DataFrame(top_owners_models[:30], columns=["Owner", "Models Count"]) | |
fig, ax = plt.subplots(figsize=(10, 8)) | |
bars = ax.barh(chart_data["Owner"], chart_data["Models Count"]) | |
# Add color gradient to bars | |
for i, bar in enumerate(bars): | |
bar.set_color(plt.cm.plasma(i/len(bars))) # Using a different colormap for distinction | |
ax.set_title("Top 30 Contributors by Number of Models") | |
ax.set_xlabel("Number of Models") | |
plt.tight_layout() | |
st.pyplot(fig) | |
# Display trending accounts selection dropdown | |
st.subheader("Select Contributor") | |
selected_trending = st.selectbox( | |
"Select trending account", | |
options=trending_accounts[:100], # Limit to top 100 | |
index=0 if trending_accounts else None, | |
key="trending_selectbox" | |
) | |
# Custom account input option | |
st.markdown("<div style='text-align: center; margin: 10px 0;'>OR</div>", unsafe_allow_html=True) | |
custom = st.text_input("Enter username/org", label_visibility="collapsed") | |
# Set username based on selection or custom input | |
if custom.strip(): | |
username = custom.strip() | |
elif selected_trending: | |
username = selected_trending | |
else: | |
username = "facebook" # Default fallback | |
# Year selection | |
st.subheader("ποΈ Time Period") | |
year_options = list(range(datetime.now().year, 2017, -1)) | |
selected_year = st.selectbox("Select Year", options=year_options) | |
# Additional options for customization | |
st.subheader("βοΈ Display Options") | |
show_models = st.checkbox("Show Models", value=True) | |
show_datasets = st.checkbox("Show Datasets", value=True) | |
show_spaces = st.checkbox("Show Spaces", value=True) | |
# Main Content | |
st.title("π€ Hugging Face Contributions") | |
if username: | |
with st.spinner(f"Fetching commit data for {username}..."): | |
# Initialize variables for tracking | |
overall_rank = None | |
spaces_rank = None | |
models_rank = None | |
spaces_count = 0 | |
models_count = 0 | |
datasets_count = 0 | |
# Display contributor rank if in top 100 | |
if username in trending_accounts[:100]: | |
overall_rank = trending_accounts.index(username) + 1 | |
st.success(f"π {username} is ranked #{overall_rank} in the top trending contributors!") | |
# Find user in spaces ranking | |
for i, (owner, count) in enumerate(top_owners_spaces): | |
if owner == username: | |
spaces_rank = i+1 | |
spaces_count = count | |
st.info(f"π Spaces Ranking: #{spaces_rank} with {count} spaces") | |
break | |
# Find user in models ranking | |
for i, (owner, count) in enumerate(top_owners_models): | |
if owner == username: | |
models_rank = i+1 | |
models_count = count | |
st.info(f"π§ Models Ranking: #{models_rank} with {count} models") | |
break | |
# Display combined ranking info | |
combined_info = [] | |
if spaces_rank and spaces_rank <= 100: | |
combined_info.append(f"Spaces: #{spaces_rank}") | |
if models_rank and models_rank <= 100: | |
combined_info.append(f"Models: #{models_rank}") | |
if combined_info: | |
st.success(f"Combined Rankings (Top 100): {', '.join(combined_info)}") | |
# Add ranking visualization | |
rank_chart = create_ranking_chart(username, overall_rank, spaces_rank, models_rank) | |
if rank_chart: | |
st.pyplot(rank_chart) | |
# Create a dictionary to store commits by type | |
commits_by_type = {} | |
commit_counts_by_type = {} | |
# Determine which types to fetch based on checkboxes | |
types_to_fetch = [] | |
if show_models: | |
types_to_fetch.append("model") | |
if show_datasets: | |
types_to_fetch.append("dataset") | |
if show_spaces: | |
types_to_fetch.append("space") | |
if not types_to_fetch: | |
st.warning("Please select at least one content type to display (Models, Datasets, or Spaces)") | |
st.stop() | |
# Fetch commits for each selected type | |
for kind in types_to_fetch: | |
try: | |
items = cached_list_items(username, kind) | |
# Update counts for radar chart | |
if kind == "model": | |
models_count = len(items) | |
elif kind == "dataset": | |
items = cached_list_items(username, kind) | |
# Update counts for radar chart | |
if kind == "model": | |
models_count = len(items) | |
elif kind == "dataset": | |
datasets_count = len(items) | |
elif kind == "space": | |
spaces_count = len(items) | |
repo_ids = [item.id for item in items] | |
st.info(f"Found {len(repo_ids)} {kind}s for {username}") | |
# Process repos in chunks | |
chunk_size = 5 | |
total_commits = 0 | |
all_commit_dates = [] | |
progress_bar = st.progress(0) | |
for i in range(0, len(repo_ids), chunk_size): | |
chunk = repo_ids[i:i + chunk_size] | |
with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor: | |
future_to_repo = { | |
executor.submit(fetch_commits_for_repo, repo_id, kind, username, selected_year): repo_id | |
for repo_id in chunk | |
} | |
for future in as_completed(future_to_repo): | |
repo_commits, repo_count = future.result() | |
if repo_commits: | |
all_commit_dates.extend(repo_commits) | |
total_commits += repo_count | |
# Update progress | |
progress = min(1.0, (i + len(chunk)) / max(1, len(repo_ids))) | |
progress_bar.progress(progress) | |
# Complete progress | |
progress_bar.progress(1.0) | |
commits_by_type[kind] = all_commit_dates | |
commit_counts_by_type[kind] = total_commits | |
except Exception as e: | |
st.warning(f"Error fetching {kind}s for {username}: {str(e)}") | |
commits_by_type[kind] = [] | |
commit_counts_by_type[kind] = 0 | |
# Calculate total commits across all types | |
total_commits = sum(commit_counts_by_type.values()) | |
st.subheader(f"{username}'s Activity in {selected_year}") | |
# Profile information | |
profile_col1, profile_col2 = st.columns([1, 3]) | |
with profile_col1: | |
# Skip avatar image display since it's causing problems | |
st.info(f"Profile: {username}") | |
st.metric("Total Commits", total_commits) | |
# Show contributor rank if in top owners | |
for owner, count in top_owners_spaces: | |
if owner.lower() == username.lower(): | |
st.metric("Spaces Count", count) | |
break | |
st.markdown(f"[View Profile on Hugging Face](https://huggingface.co/{username})") | |
with profile_col2: | |
# Display contribution radar chart | |
radar_fig = create_contribution_radar(username, models_count, spaces_count, datasets_count, total_commits) | |
st.pyplot(radar_fig) | |
# Create DataFrame for all commits | |
all_commits = [] | |
for commits in commits_by_type.values(): | |
all_commits.extend(commits) | |
all_df = pd.DataFrame(all_commits, columns=["date"]) | |
if not all_df.empty: | |
all_df = all_df.drop_duplicates() # Remove any duplicate dates | |
# Monthly activity chart | |
st.subheader(f"Monthly Activity Pattern ({selected_year})") | |
monthly_fig = create_monthly_activity(all_df, selected_year) | |
if monthly_fig: | |
st.pyplot(monthly_fig) | |
else: | |
st.info(f"No activity data available for {username} in {selected_year}") | |
# Calendar heatmap for all commits | |
st.subheader(f"Contribution Calendar ({selected_year})") | |
make_calendar_heatmap(all_df, "All Commits", selected_year) | |
# Contribution distribution pie chart | |
st.subheader("Contribution Distribution by Type") | |
model_commits = commit_counts_by_type.get("model", 0) | |
dataset_commits = commit_counts_by_type.get("dataset", 0) | |
space_commits = commit_counts_by_type.get("space", 0) | |
pie_chart = create_contribution_pie(model_commits, dataset_commits, space_commits) | |
if pie_chart: | |
st.pyplot(pie_chart) | |
else: | |
st.info("No contribution data available to show distribution") | |
# Follower growth simulation | |
st.subheader(f"Follower Growth Simulation") | |
st.caption("Based on contribution metrics - for visualization purposes only") | |
follower_chart = simulate_follower_data(username, spaces_count, models_count, total_commits) | |
st.pyplot(follower_chart) | |
# Add analysis message | |
if total_commits > 0: | |
st.subheader("π Analytics Summary") | |
# Contribution pattern analysis | |
monthly_df = pd.DataFrame(all_commits, columns=["date"]) | |
monthly_df['date'] = pd.to_datetime(monthly_df['date']) | |
monthly_df['month'] = monthly_df['date'].dt.month | |
if not monthly_df.empty: | |
most_active_month = monthly_df['month'].value_counts().idxmax() | |
month_name = datetime(2020, most_active_month, 1).strftime('%B') | |
st.markdown(f""" | |
### Activity Analysis for {username} | |
- **Total Activity**: {total_commits} contributions in {selected_year} | |
- **Most Active Month**: {month_name} with {monthly_df['month'].value_counts().max()} contributions | |
- **Repository Breakdown**: {models_count} Models, {spaces_count} Spaces, {datasets_count} Datasets | |
""") | |
# Add ranking context if available | |
if overall_rank: | |
percentile = 100 - overall_rank | |
st.markdown(f""" | |
### Ranking Analysis | |
- **Overall Ranking**: #{overall_rank} (Top {percentile}% of contributors) | |
""") | |
if spaces_rank and spaces_rank <= 10: | |
st.markdown(f"- π **Elite Spaces Contributor**: Top 10 ({spaces_rank}) in Spaces contributions") | |
elif spaces_rank and spaces_rank <= 30: | |
st.markdown(f"- β¨ **Outstanding Spaces Contributor**: Top 30 ({spaces_rank}) in Spaces contributions") | |
if models_rank and models_rank <= 10: | |
st.markdown(f"- π **Elite Models Contributor**: Top 10 ({models_rank}) in Models contributions") | |
elif models_rank and models_rank <= 30: | |
st.markdown(f"- β¨ **Outstanding Models Contributor**: Top 30 ({models_rank}) in Models contributions") | |
# Metrics and heatmaps for each selected type | |
st.subheader("Detailed Category Analysis") | |
cols = st.columns(len(types_to_fetch)) if types_to_fetch else st.columns(1) | |
for i, (kind, emoji, label) in enumerate([ | |
("model", "π§ ", "Models"), | |
("dataset", "π¦", "Datasets"), | |
("space", "π", "Spaces") | |
]): | |
if kind in types_to_fetch: | |
with cols[types_to_fetch.index(kind)]: | |
try: | |
total = len(cached_list_items(username, kind)) | |
commits = commits_by_type.get(kind, []) | |
commit_count = commit_counts_by_type.get(kind, 0) | |
df_kind = pd.DataFrame(commits, columns=["date"]) | |
if not df_kind.empty: | |
df_kind = df_kind.drop_duplicates() # Remove any duplicate dates | |
st.metric(f"{emoji} {label}", total) | |
st.metric(f"Commits in {selected_year}", commit_count) | |
make_calendar_heatmap(df_kind, f"{label} Commits", selected_year) | |
except Exception as e: | |
st.warning(f"Error processing {label}: {str(e)}") | |
st.metric(f"{emoji} {label}", 0) | |
st.metric(f"Commits in {selected_year}", 0) | |
make_calendar_heatmap(pd.DataFrame(), f"{label} Commits", selected_year) | |
else: | |
st.info("Please select an account from the sidebar to view contributions.") |