evijit HF Staff commited on
Commit
18ef775
·
verified ·
1 Parent(s): 96bb7cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -136
app.py CHANGED
@@ -248,146 +248,18 @@ def load_models_csv():
248
 
249
  df['tags'] = df['tags'].apply(process_tags)
250
 
251
- # Ensure all three metrics are present
252
- if 'downloadsAllTime' not in df.columns:
253
- # Add it as an empty column if not present in the original CSV
254
- df['downloadsAllTime'] = df.get('downloads', 0) * np.random.uniform(2, 5, size=len(df))
255
-
256
- # Convert metrics to numeric values
257
- for metric in ['downloads', 'likes', 'downloadsAllTime']:
258
  if metric in df.columns:
259
  df[metric] = pd.to_numeric(df[metric], errors='coerce').fillna(0)
260
-
261
- # Add more sample data for better visualization
262
- add_sample_data(df)
263
 
264
  return df
265
 
266
- def add_sample_data(df):
267
- """Add more sample data to make the visualization more interesting"""
268
- # Top organizations to include
269
- orgs = ['openai', 'meta', 'google', 'microsoft', 'anthropic', 'nvidia', 'huggingface',
270
- 'deepseek-ai', 'stability-ai', 'mistralai', 'cerebras', 'databricks', 'together',
271
- 'facebook', 'amazon', 'deepmind', 'cohere', 'bigscience', 'eleutherai']
272
-
273
- # Common model name formats
274
- model_name_patterns = [
275
- "model-{size}-{version}",
276
- "{prefix}-{size}b",
277
- "{prefix}-{size}b-{variant}",
278
- "llama-{size}b-{variant}",
279
- "gpt-{variant}-{size}b",
280
- "{prefix}-instruct-{size}b",
281
- "{prefix}-chat-{size}b",
282
- "{prefix}-coder-{size}b",
283
- "stable-diffusion-{version}",
284
- "whisper-{size}",
285
- "bert-{size}-{variant}",
286
- "roberta-{size}",
287
- "t5-{size}",
288
- "{prefix}-vision-{size}b"
289
- ]
290
-
291
- # Common name parts
292
- prefixes = ["falcon", "llama", "mistral", "gpt", "phi", "gemma", "qwen", "yi", "mpt", "bloom"]
293
- sizes = ["7", "13", "34", "70", "1", "3", "7b", "13b", "70b", "8b", "2b", "1b", "0.5b", "small", "base", "large", "huge"]
294
- variants = ["chat", "instruct", "base", "v1.0", "v2", "beta", "turbo", "fast", "xl", "xxl"]
295
-
296
- # Generate sample data
297
- sample_data = []
298
- for org_idx, org in enumerate(orgs):
299
- # Create 5-10 models per organization
300
- num_models = np.random.randint(5, 11)
301
-
302
- for i in range(num_models):
303
- # Create realistic model name
304
- pattern = np.random.choice(model_name_patterns)
305
- prefix = np.random.choice(prefixes)
306
- size = np.random.choice(sizes)
307
- version = f"v{np.random.randint(1, 4)}"
308
- variant = np.random.choice(variants)
309
-
310
- model_name = pattern.format(
311
- prefix=prefix,
312
- size=size,
313
- version=version,
314
- variant=variant
315
- )
316
-
317
- model_id = f"{org}/{model_name}"
318
-
319
- # Select a realistic pipeline tag based on name
320
- if "diffusion" in model_name or "image" in model_name:
321
- pipeline_tag = np.random.choice(["text-to-image", "image-to-image", "image-segmentation"])
322
- elif "whisper" in model_name or "speech" in model_name:
323
- pipeline_tag = np.random.choice(["automatic-speech-recognition", "text-to-speech"])
324
- elif "coder" in model_name or "code" in model_name:
325
- pipeline_tag = "text-generation"
326
- elif "bert" in model_name or "roberta" in model_name:
327
- pipeline_tag = np.random.choice(["fill-mask", "text-classification", "token-classification"])
328
- elif "vision" in model_name:
329
- pipeline_tag = np.random.choice(["image-classification", "image-to-text", "visual-question-answering"])
330
- else:
331
- pipeline_tag = "text-generation" # Most common
332
-
333
- # Generate realistic tags
334
- tags = [pipeline_tag]
335
-
336
- if "text-generation" in pipeline_tag:
337
- tags.extend(["language-model", "text", "gpt", "llm"])
338
- if "instruct" in model_name:
339
- tags.append("instruction-following")
340
- if "chat" in model_name:
341
- tags.append("chat")
342
- elif "speech" in pipeline_tag:
343
- tags.extend(["audio", "speech", "voice"])
344
- elif "image" in pipeline_tag:
345
- tags.extend(["vision", "image", "diffusion"])
346
-
347
- # Add language tags
348
- if np.random.random() < 0.8: # 80% chance for English
349
- tags.append("en")
350
- if np.random.random() < 0.3: # 30% chance for multilingual
351
- tags.append("multilingual")
352
-
353
- # Generate downloads and likes (weighted by org position for variety)
354
- # Earlier orgs get more downloads to make the visualization interesting
355
- popularity_factor = (len(orgs) - org_idx) / len(orgs) # 1.0 to 0.0
356
- base_downloads = 10000 * (10 ** (2 * popularity_factor))
357
- downloads = int(base_downloads * np.random.uniform(0.3, 3.0))
358
- likes = int(downloads * np.random.uniform(0.01, 0.1)) # 1-10% like ratio
359
-
360
- # Generate downloadsAllTime (higher than regular downloads)
361
- downloadsAllTime = int(downloads * np.random.uniform(3, 8))
362
-
363
- # Generate model size (in bytes for params)
364
- # Model size should correlate somewhat with the size in the name
365
- size_indicator = 1
366
- for s in ["70b", "13b", "7b", "3b", "2b", "1b", "large", "huge", "xl", "xxl"]:
367
- if s in model_name.lower():
368
- size_indicator = float(s.replace("b", "")) if s[0].isdigit() else 3
369
- break
370
-
371
- # Size in bytes
372
- params = int(np.random.uniform(0.5, 2.0) * size_indicator * 1e9)
373
-
374
- # Create model entry
375
- model = {
376
- "id": model_id,
377
- "author": org,
378
- "downloads": downloads,
379
- "likes": likes,
380
- "downloadsAllTime": downloadsAllTime,
381
- "pipeline_tag": pipeline_tag,
382
- "tags": tags,
383
- "params": params
384
- }
385
-
386
- sample_data.append(model)
387
-
388
- # Convert sample data to DataFrame and append to original
389
- sample_df = pd.DataFrame(sample_data)
390
- return pd.concat([df, sample_df], ignore_index=True)
391
 
392
  # Create Gradio interface
393
  with gr.Blocks() as demo:
@@ -535,7 +407,7 @@ with gr.Blocks() as demo:
535
  display_name = metric_display_names.get(count_by, count_by.capitalize())
536
 
537
  stats_md = f"""
538
- ## Statistics
539
  - **Total models shown**: {total_models:,}
540
  - **Total {display_name}**: {int(total_value):,}
541
 
 
248
 
249
  df['tags'] = df['tags'].apply(process_tags)
250
 
251
+ # Ensure all required metrics are present and convert to numeric
252
+ required_metrics = ['downloads', 'likes', 'downloadsAllTime']
253
+ for metric in required_metrics:
 
 
 
 
254
  if metric in df.columns:
255
  df[metric] = pd.to_numeric(df[metric], errors='coerce').fillna(0)
256
+ else:
257
+ print(f"Warning: '{metric}' not found in CSV. Creating empty column.")
258
+ df[metric] = 0
259
 
260
  return df
261
 
262
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
 
264
  # Create Gradio interface
265
  with gr.Blocks() as demo:
 
407
  display_name = metric_display_names.get(count_by, count_by.capitalize())
408
 
409
  stats_md = f"""
410
+ ## Statistics as of May 12, 2025
411
  - **Total models shown**: {total_models:,}
412
  - **Total {display_name}**: {int(total_value):,}
413