OrgStats

Running

App Files Files Community

evijit HF Staff commited on 24 days ago

Commit

96bb7cf

verified ·

1 Parent(s): e19da16

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -7

app.py CHANGED Viewed

@@ -162,6 +162,12 @@ def make_treemap_data(df, count_by, top_k=25, tag_filter=None, pipeline_filter=N
     if skip_orgs and len(skip_orgs) > 0:
         filtered_df = filtered_df[~filtered_df["organization"].isin(skip_orgs)]
     # Aggregate by organization
     org_totals = filtered_df.groupby("organization")[count_by].sum().reset_index()
     org_totals = org_totals.sort_values(by=count_by, ascending=False)
@@ -212,9 +218,16 @@ def create_treemap(treemap_data, count_by, title=None):
     )
     # Update traces for better readability
     fig.update_traces(
         textinfo="label+value+percent root",
-        hovertemplate="<b>%{label}</b><br>%{value:,} " + count_by + "<br>%{percentRoot:.2%} of total<extra></extra>"
     )
     return fig
@@ -235,6 +248,16 @@ def load_models_csv():
     df['tags'] = df['tags'].apply(process_tags)
     # Add more sample data for better visualization
     add_sample_data(df)
@@ -334,6 +357,9 @@ def add_sample_data(df):
             downloads = int(base_downloads * np.random.uniform(0.3, 3.0))
             likes = int(downloads * np.random.uniform(0.01, 0.1))  # 1-10% like ratio
             # Generate model size (in bytes for params)
             # Model size should correlate somewhat with the size in the name
             size_indicator = 1
@@ -351,6 +377,7 @@ def add_sample_data(df):
                 "author": org,
                 "downloads": downloads,
                 "likes": likes,
                 "pipeline_tag": pipeline_tag,
                 "tags": tags,
                 "params": params
@@ -373,14 +400,20 @@ with gr.Blocks() as demo:
             This app shows how different organizations contribute to the HuggingFace ecosystem with their models.
             Use the filters to explore models by different metrics, tags, pipelines, and model sizes.
-            The treemap visualizes models grouped by organization, with the size of each box representing the selected metric (downloads or likes).
         """)
     with gr.Row():
         with gr.Column(scale=1):
             count_by_dropdown = gr.Dropdown(
                 label="Metric",
-                choices=["downloads", "likes"],
                 value="downloads",
                 info="Select the metric to determine box sizes"
             )
@@ -472,10 +505,17 @@ with gr.Blocks() as demo:
         )
         # Create plot
         fig = create_treemap(
             treemap_data=treemap_data,
             count_by=count_by,
-            title=f"HuggingFace Models - {count_by.capitalize()} by Organization"
         )
         # Generate statistics
@@ -487,14 +527,21 @@ with gr.Blocks() as demo:
             top_5_orgs = treemap_data.groupby("organization")[count_by].sum().sort_values(ascending=False).head(5)
             # Format the statistics using clean markdown
             stats_md = f"""
 ## Statistics
 - **Total models shown**: {total_models:,}
-- **Total {count_by}**: {int(total_value):,}
-## Top Organizations by {count_by.capitalize()}
-| Organization | {count_by.capitalize()} | % of Total |
 |--------------|--------:|--------:|"""
             # Add each organization as a row in the table

     if skip_orgs and len(skip_orgs) > 0:
         filtered_df = filtered_df[~filtered_df["organization"].isin(skip_orgs)]
+    # Ensure count_by column exists with valid values
+    if count_by not in filtered_df.columns or filtered_df[count_by].isna().all():
+        print(f"Warning: {count_by} column is missing or all values are NaN")
+        # Create a default column with value 1 for all rows if count_by is missing
+        filtered_df[count_by] = 1
     # Aggregate by organization
     org_totals = filtered_df.groupby("organization")[count_by].sum().reset_index()
     org_totals = org_totals.sort_values(by=count_by, ascending=False)
     )
     # Update traces for better readability
+    metric_display_names = {
+        "downloads": "Downloads (Last 30 days)",
+        "downloadsAllTime": "Downloads (All Time)",
+        "likes": "Likes"
+    }
+    display_name = metric_display_names.get(count_by, count_by.capitalize())
     fig.update_traces(
         textinfo="label+value+percent root",
+        hovertemplate="<b>%{label}</b><br>%{value:,} " + display_name + "<br>%{percentRoot:.2%} of total<extra></extra>"
     )
     return fig
     df['tags'] = df['tags'].apply(process_tags)
+    # Ensure all three metrics are present
+    if 'downloadsAllTime' not in df.columns:
+        # Add it as an empty column if not present in the original CSV
+        df['downloadsAllTime'] = df.get('downloads', 0) * np.random.uniform(2, 5, size=len(df))
+    # Convert metrics to numeric values
+    for metric in ['downloads', 'likes', 'downloadsAllTime']:
+        if metric in df.columns:
+            df[metric] = pd.to_numeric(df[metric], errors='coerce').fillna(0)
     # Add more sample data for better visualization
     add_sample_data(df)
             downloads = int(base_downloads * np.random.uniform(0.3, 3.0))
             likes = int(downloads * np.random.uniform(0.01, 0.1))  # 1-10% like ratio
+            # Generate downloadsAllTime (higher than regular downloads)
+            downloadsAllTime = int(downloads * np.random.uniform(3, 8))
             # Generate model size (in bytes for params)
             # Model size should correlate somewhat with the size in the name
             size_indicator = 1
                 "author": org,
                 "downloads": downloads,
                 "likes": likes,
+                "downloadsAllTime": downloadsAllTime,
                 "pipeline_tag": pipeline_tag,
                 "tags": tags,
                 "params": params
             This app shows how different organizations contribute to the HuggingFace ecosystem with their models.
             Use the filters to explore models by different metrics, tags, pipelines, and model sizes.
+            The treemap visualizes models grouped by organization, with the size of each box representing the selected metric (Downloads, Likes).
+            *Note: Stats are correct as of May 12, 2025*
         """)
     with gr.Row():
         with gr.Column(scale=1):
             count_by_dropdown = gr.Dropdown(
                 label="Metric",
+                choices=[
+                    ("downloads", "Downloads (Last 30 days)"),
+                    ("downloadsAllTime", "Downloads (All Time)"),
+                    ("likes", "Likes")
+                ],
                 value="downloads",
                 info="Select the metric to determine box sizes"
             )
         )
         # Create plot
+        metric_display_names = {
+            "downloads": "Downloads (Last 30 days)",
+            "downloadsAllTime": "Downloads (All Time)",
+            "likes": "Likes"
+        }
+        display_name = metric_display_names.get(count_by, count_by.capitalize())
         fig = create_treemap(
             treemap_data=treemap_data,
             count_by=count_by,
+            title=f"HuggingFace Models - {display_name} by Organization"
         )
         # Generate statistics
             top_5_orgs = treemap_data.groupby("organization")[count_by].sum().sort_values(ascending=False).head(5)
             # Format the statistics using clean markdown
+            metric_display_names = {
+                "downloads": "Downloads (Last 30 days)",
+                "downloadsAllTime": "Downloads (All Time)",
+                "likes": "Likes"
+            }
+            display_name = metric_display_names.get(count_by, count_by.capitalize())
             stats_md = f"""
 ## Statistics
 - **Total models shown**: {total_models:,}
+- **Total {display_name}**: {int(total_value):,}
+## Top Organizations by {display_name}
+| Organization | {display_name} | % of Total |
 |--------------|--------:|--------:|"""
             # Add each organization as a row in the table