Update app.py
Browse files
app.py
CHANGED
@@ -162,6 +162,12 @@ def make_treemap_data(df, count_by, top_k=25, tag_filter=None, pipeline_filter=N
|
|
162 |
if skip_orgs and len(skip_orgs) > 0:
|
163 |
filtered_df = filtered_df[~filtered_df["organization"].isin(skip_orgs)]
|
164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
# Aggregate by organization
|
166 |
org_totals = filtered_df.groupby("organization")[count_by].sum().reset_index()
|
167 |
org_totals = org_totals.sort_values(by=count_by, ascending=False)
|
@@ -212,9 +218,16 @@ def create_treemap(treemap_data, count_by, title=None):
|
|
212 |
)
|
213 |
|
214 |
# Update traces for better readability
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
fig.update_traces(
|
216 |
textinfo="label+value+percent root",
|
217 |
-
hovertemplate="<b>%{label}</b><br>%{value:,} " +
|
218 |
)
|
219 |
|
220 |
return fig
|
@@ -235,6 +248,16 @@ def load_models_csv():
|
|
235 |
|
236 |
df['tags'] = df['tags'].apply(process_tags)
|
237 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
# Add more sample data for better visualization
|
239 |
add_sample_data(df)
|
240 |
|
@@ -334,6 +357,9 @@ def add_sample_data(df):
|
|
334 |
downloads = int(base_downloads * np.random.uniform(0.3, 3.0))
|
335 |
likes = int(downloads * np.random.uniform(0.01, 0.1)) # 1-10% like ratio
|
336 |
|
|
|
|
|
|
|
337 |
# Generate model size (in bytes for params)
|
338 |
# Model size should correlate somewhat with the size in the name
|
339 |
size_indicator = 1
|
@@ -351,6 +377,7 @@ def add_sample_data(df):
|
|
351 |
"author": org,
|
352 |
"downloads": downloads,
|
353 |
"likes": likes,
|
|
|
354 |
"pipeline_tag": pipeline_tag,
|
355 |
"tags": tags,
|
356 |
"params": params
|
@@ -373,14 +400,20 @@ with gr.Blocks() as demo:
|
|
373 |
This app shows how different organizations contribute to the HuggingFace ecosystem with their models.
|
374 |
Use the filters to explore models by different metrics, tags, pipelines, and model sizes.
|
375 |
|
376 |
-
The treemap visualizes models grouped by organization, with the size of each box representing the selected metric (
|
|
|
|
|
377 |
""")
|
378 |
|
379 |
with gr.Row():
|
380 |
with gr.Column(scale=1):
|
381 |
count_by_dropdown = gr.Dropdown(
|
382 |
label="Metric",
|
383 |
-
choices=[
|
|
|
|
|
|
|
|
|
384 |
value="downloads",
|
385 |
info="Select the metric to determine box sizes"
|
386 |
)
|
@@ -472,10 +505,17 @@ with gr.Blocks() as demo:
|
|
472 |
)
|
473 |
|
474 |
# Create plot
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
475 |
fig = create_treemap(
|
476 |
treemap_data=treemap_data,
|
477 |
count_by=count_by,
|
478 |
-
title=f"HuggingFace Models - {
|
479 |
)
|
480 |
|
481 |
# Generate statistics
|
@@ -487,14 +527,21 @@ with gr.Blocks() as demo:
|
|
487 |
top_5_orgs = treemap_data.groupby("organization")[count_by].sum().sort_values(ascending=False).head(5)
|
488 |
|
489 |
# Format the statistics using clean markdown
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
490 |
stats_md = f"""
|
491 |
## Statistics
|
492 |
- **Total models shown**: {total_models:,}
|
493 |
-
- **Total {
|
494 |
|
495 |
-
## Top Organizations by {
|
496 |
|
497 |
-
| Organization | {
|
498 |
|--------------|--------:|--------:|"""
|
499 |
|
500 |
# Add each organization as a row in the table
|
|
|
162 |
if skip_orgs and len(skip_orgs) > 0:
|
163 |
filtered_df = filtered_df[~filtered_df["organization"].isin(skip_orgs)]
|
164 |
|
165 |
+
# Ensure count_by column exists with valid values
|
166 |
+
if count_by not in filtered_df.columns or filtered_df[count_by].isna().all():
|
167 |
+
print(f"Warning: {count_by} column is missing or all values are NaN")
|
168 |
+
# Create a default column with value 1 for all rows if count_by is missing
|
169 |
+
filtered_df[count_by] = 1
|
170 |
+
|
171 |
# Aggregate by organization
|
172 |
org_totals = filtered_df.groupby("organization")[count_by].sum().reset_index()
|
173 |
org_totals = org_totals.sort_values(by=count_by, ascending=False)
|
|
|
218 |
)
|
219 |
|
220 |
# Update traces for better readability
|
221 |
+
metric_display_names = {
|
222 |
+
"downloads": "Downloads (Last 30 days)",
|
223 |
+
"downloadsAllTime": "Downloads (All Time)",
|
224 |
+
"likes": "Likes"
|
225 |
+
}
|
226 |
+
display_name = metric_display_names.get(count_by, count_by.capitalize())
|
227 |
+
|
228 |
fig.update_traces(
|
229 |
textinfo="label+value+percent root",
|
230 |
+
hovertemplate="<b>%{label}</b><br>%{value:,} " + display_name + "<br>%{percentRoot:.2%} of total<extra></extra>"
|
231 |
)
|
232 |
|
233 |
return fig
|
|
|
248 |
|
249 |
df['tags'] = df['tags'].apply(process_tags)
|
250 |
|
251 |
+
# Ensure all three metrics are present
|
252 |
+
if 'downloadsAllTime' not in df.columns:
|
253 |
+
# Add it as an empty column if not present in the original CSV
|
254 |
+
df['downloadsAllTime'] = df.get('downloads', 0) * np.random.uniform(2, 5, size=len(df))
|
255 |
+
|
256 |
+
# Convert metrics to numeric values
|
257 |
+
for metric in ['downloads', 'likes', 'downloadsAllTime']:
|
258 |
+
if metric in df.columns:
|
259 |
+
df[metric] = pd.to_numeric(df[metric], errors='coerce').fillna(0)
|
260 |
+
|
261 |
# Add more sample data for better visualization
|
262 |
add_sample_data(df)
|
263 |
|
|
|
357 |
downloads = int(base_downloads * np.random.uniform(0.3, 3.0))
|
358 |
likes = int(downloads * np.random.uniform(0.01, 0.1)) # 1-10% like ratio
|
359 |
|
360 |
+
# Generate downloadsAllTime (higher than regular downloads)
|
361 |
+
downloadsAllTime = int(downloads * np.random.uniform(3, 8))
|
362 |
+
|
363 |
# Generate model size (in bytes for params)
|
364 |
# Model size should correlate somewhat with the size in the name
|
365 |
size_indicator = 1
|
|
|
377 |
"author": org,
|
378 |
"downloads": downloads,
|
379 |
"likes": likes,
|
380 |
+
"downloadsAllTime": downloadsAllTime,
|
381 |
"pipeline_tag": pipeline_tag,
|
382 |
"tags": tags,
|
383 |
"params": params
|
|
|
400 |
This app shows how different organizations contribute to the HuggingFace ecosystem with their models.
|
401 |
Use the filters to explore models by different metrics, tags, pipelines, and model sizes.
|
402 |
|
403 |
+
The treemap visualizes models grouped by organization, with the size of each box representing the selected metric (Downloads, Likes).
|
404 |
+
|
405 |
+
*Note: Stats are correct as of May 12, 2025*
|
406 |
""")
|
407 |
|
408 |
with gr.Row():
|
409 |
with gr.Column(scale=1):
|
410 |
count_by_dropdown = gr.Dropdown(
|
411 |
label="Metric",
|
412 |
+
choices=[
|
413 |
+
("downloads", "Downloads (Last 30 days)"),
|
414 |
+
("downloadsAllTime", "Downloads (All Time)"),
|
415 |
+
("likes", "Likes")
|
416 |
+
],
|
417 |
value="downloads",
|
418 |
info="Select the metric to determine box sizes"
|
419 |
)
|
|
|
505 |
)
|
506 |
|
507 |
# Create plot
|
508 |
+
metric_display_names = {
|
509 |
+
"downloads": "Downloads (Last 30 days)",
|
510 |
+
"downloadsAllTime": "Downloads (All Time)",
|
511 |
+
"likes": "Likes"
|
512 |
+
}
|
513 |
+
display_name = metric_display_names.get(count_by, count_by.capitalize())
|
514 |
+
|
515 |
fig = create_treemap(
|
516 |
treemap_data=treemap_data,
|
517 |
count_by=count_by,
|
518 |
+
title=f"HuggingFace Models - {display_name} by Organization"
|
519 |
)
|
520 |
|
521 |
# Generate statistics
|
|
|
527 |
top_5_orgs = treemap_data.groupby("organization")[count_by].sum().sort_values(ascending=False).head(5)
|
528 |
|
529 |
# Format the statistics using clean markdown
|
530 |
+
metric_display_names = {
|
531 |
+
"downloads": "Downloads (Last 30 days)",
|
532 |
+
"downloadsAllTime": "Downloads (All Time)",
|
533 |
+
"likes": "Likes"
|
534 |
+
}
|
535 |
+
display_name = metric_display_names.get(count_by, count_by.capitalize())
|
536 |
+
|
537 |
stats_md = f"""
|
538 |
## Statistics
|
539 |
- **Total models shown**: {total_models:,}
|
540 |
+
- **Total {display_name}**: {int(total_value):,}
|
541 |
|
542 |
+
## Top Organizations by {display_name}
|
543 |
|
544 |
+
| Organization | {display_name} | % of Total |
|
545 |
|--------------|--------:|--------:|"""
|
546 |
|
547 |
# Add each organization as a row in the table
|