evijit HF Staff commited on
Commit
96bb7cf
·
verified ·
1 Parent(s): e19da16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -7
app.py CHANGED
@@ -162,6 +162,12 @@ def make_treemap_data(df, count_by, top_k=25, tag_filter=None, pipeline_filter=N
162
  if skip_orgs and len(skip_orgs) > 0:
163
  filtered_df = filtered_df[~filtered_df["organization"].isin(skip_orgs)]
164
 
 
 
 
 
 
 
165
  # Aggregate by organization
166
  org_totals = filtered_df.groupby("organization")[count_by].sum().reset_index()
167
  org_totals = org_totals.sort_values(by=count_by, ascending=False)
@@ -212,9 +218,16 @@ def create_treemap(treemap_data, count_by, title=None):
212
  )
213
 
214
  # Update traces for better readability
 
 
 
 
 
 
 
215
  fig.update_traces(
216
  textinfo="label+value+percent root",
217
- hovertemplate="<b>%{label}</b><br>%{value:,} " + count_by + "<br>%{percentRoot:.2%} of total<extra></extra>"
218
  )
219
 
220
  return fig
@@ -235,6 +248,16 @@ def load_models_csv():
235
 
236
  df['tags'] = df['tags'].apply(process_tags)
237
 
 
 
 
 
 
 
 
 
 
 
238
  # Add more sample data for better visualization
239
  add_sample_data(df)
240
 
@@ -334,6 +357,9 @@ def add_sample_data(df):
334
  downloads = int(base_downloads * np.random.uniform(0.3, 3.0))
335
  likes = int(downloads * np.random.uniform(0.01, 0.1)) # 1-10% like ratio
336
 
 
 
 
337
  # Generate model size (in bytes for params)
338
  # Model size should correlate somewhat with the size in the name
339
  size_indicator = 1
@@ -351,6 +377,7 @@ def add_sample_data(df):
351
  "author": org,
352
  "downloads": downloads,
353
  "likes": likes,
 
354
  "pipeline_tag": pipeline_tag,
355
  "tags": tags,
356
  "params": params
@@ -373,14 +400,20 @@ with gr.Blocks() as demo:
373
  This app shows how different organizations contribute to the HuggingFace ecosystem with their models.
374
  Use the filters to explore models by different metrics, tags, pipelines, and model sizes.
375
 
376
- The treemap visualizes models grouped by organization, with the size of each box representing the selected metric (downloads or likes).
 
 
377
  """)
378
 
379
  with gr.Row():
380
  with gr.Column(scale=1):
381
  count_by_dropdown = gr.Dropdown(
382
  label="Metric",
383
- choices=["downloads", "likes"],
 
 
 
 
384
  value="downloads",
385
  info="Select the metric to determine box sizes"
386
  )
@@ -472,10 +505,17 @@ with gr.Blocks() as demo:
472
  )
473
 
474
  # Create plot
 
 
 
 
 
 
 
475
  fig = create_treemap(
476
  treemap_data=treemap_data,
477
  count_by=count_by,
478
- title=f"HuggingFace Models - {count_by.capitalize()} by Organization"
479
  )
480
 
481
  # Generate statistics
@@ -487,14 +527,21 @@ with gr.Blocks() as demo:
487
  top_5_orgs = treemap_data.groupby("organization")[count_by].sum().sort_values(ascending=False).head(5)
488
 
489
  # Format the statistics using clean markdown
 
 
 
 
 
 
 
490
  stats_md = f"""
491
  ## Statistics
492
  - **Total models shown**: {total_models:,}
493
- - **Total {count_by}**: {int(total_value):,}
494
 
495
- ## Top Organizations by {count_by.capitalize()}
496
 
497
- | Organization | {count_by.capitalize()} | % of Total |
498
  |--------------|--------:|--------:|"""
499
 
500
  # Add each organization as a row in the table
 
162
  if skip_orgs and len(skip_orgs) > 0:
163
  filtered_df = filtered_df[~filtered_df["organization"].isin(skip_orgs)]
164
 
165
+ # Ensure count_by column exists with valid values
166
+ if count_by not in filtered_df.columns or filtered_df[count_by].isna().all():
167
+ print(f"Warning: {count_by} column is missing or all values are NaN")
168
+ # Create a default column with value 1 for all rows if count_by is missing
169
+ filtered_df[count_by] = 1
170
+
171
  # Aggregate by organization
172
  org_totals = filtered_df.groupby("organization")[count_by].sum().reset_index()
173
  org_totals = org_totals.sort_values(by=count_by, ascending=False)
 
218
  )
219
 
220
  # Update traces for better readability
221
+ metric_display_names = {
222
+ "downloads": "Downloads (Last 30 days)",
223
+ "downloadsAllTime": "Downloads (All Time)",
224
+ "likes": "Likes"
225
+ }
226
+ display_name = metric_display_names.get(count_by, count_by.capitalize())
227
+
228
  fig.update_traces(
229
  textinfo="label+value+percent root",
230
+ hovertemplate="<b>%{label}</b><br>%{value:,} " + display_name + "<br>%{percentRoot:.2%} of total<extra></extra>"
231
  )
232
 
233
  return fig
 
248
 
249
  df['tags'] = df['tags'].apply(process_tags)
250
 
251
+ # Ensure all three metrics are present
252
+ if 'downloadsAllTime' not in df.columns:
253
+ # Add it as an empty column if not present in the original CSV
254
+ df['downloadsAllTime'] = df.get('downloads', 0) * np.random.uniform(2, 5, size=len(df))
255
+
256
+ # Convert metrics to numeric values
257
+ for metric in ['downloads', 'likes', 'downloadsAllTime']:
258
+ if metric in df.columns:
259
+ df[metric] = pd.to_numeric(df[metric], errors='coerce').fillna(0)
260
+
261
  # Add more sample data for better visualization
262
  add_sample_data(df)
263
 
 
357
  downloads = int(base_downloads * np.random.uniform(0.3, 3.0))
358
  likes = int(downloads * np.random.uniform(0.01, 0.1)) # 1-10% like ratio
359
 
360
+ # Generate downloadsAllTime (higher than regular downloads)
361
+ downloadsAllTime = int(downloads * np.random.uniform(3, 8))
362
+
363
  # Generate model size (in bytes for params)
364
  # Model size should correlate somewhat with the size in the name
365
  size_indicator = 1
 
377
  "author": org,
378
  "downloads": downloads,
379
  "likes": likes,
380
+ "downloadsAllTime": downloadsAllTime,
381
  "pipeline_tag": pipeline_tag,
382
  "tags": tags,
383
  "params": params
 
400
  This app shows how different organizations contribute to the HuggingFace ecosystem with their models.
401
  Use the filters to explore models by different metrics, tags, pipelines, and model sizes.
402
 
403
+ The treemap visualizes models grouped by organization, with the size of each box representing the selected metric (Downloads, Likes).
404
+
405
+ *Note: Stats are correct as of May 12, 2025*
406
  """)
407
 
408
  with gr.Row():
409
  with gr.Column(scale=1):
410
  count_by_dropdown = gr.Dropdown(
411
  label="Metric",
412
+ choices=[
413
+ ("downloads", "Downloads (Last 30 days)"),
414
+ ("downloadsAllTime", "Downloads (All Time)"),
415
+ ("likes", "Likes")
416
+ ],
417
  value="downloads",
418
  info="Select the metric to determine box sizes"
419
  )
 
505
  )
506
 
507
  # Create plot
508
+ metric_display_names = {
509
+ "downloads": "Downloads (Last 30 days)",
510
+ "downloadsAllTime": "Downloads (All Time)",
511
+ "likes": "Likes"
512
+ }
513
+ display_name = metric_display_names.get(count_by, count_by.capitalize())
514
+
515
  fig = create_treemap(
516
  treemap_data=treemap_data,
517
  count_by=count_by,
518
+ title=f"HuggingFace Models - {display_name} by Organization"
519
  )
520
 
521
  # Generate statistics
 
527
  top_5_orgs = treemap_data.groupby("organization")[count_by].sum().sort_values(ascending=False).head(5)
528
 
529
  # Format the statistics using clean markdown
530
+ metric_display_names = {
531
+ "downloads": "Downloads (Last 30 days)",
532
+ "downloadsAllTime": "Downloads (All Time)",
533
+ "likes": "Likes"
534
+ }
535
+ display_name = metric_display_names.get(count_by, count_by.capitalize())
536
+
537
  stats_md = f"""
538
  ## Statistics
539
  - **Total models shown**: {total_models:,}
540
+ - **Total {display_name}**: {int(total_value):,}
541
 
542
+ ## Top Organizations by {display_name}
543
 
544
+ | Organization | {display_name} | % of Total |
545
  |--------------|--------:|--------:|"""
546
 
547
  # Add each organization as a row in the table