evijit HF Staff commited on
Commit
51362b4
·
verified ·
1 Parent(s): 3043125

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -126
app.py CHANGED
@@ -235,133 +235,8 @@ def load_models_csv():
235
 
236
  df['tags'] = df['tags'].apply(process_tags)
237
 
238
- # Add more sample data for better visualization
239
- add_sample_data(df)
240
-
241
  return df
242
 
243
- def add_sample_data(df):
244
- """Add more sample data to make the visualization more interesting"""
245
- # Top organizations to include
246
- orgs = ['openai', 'meta', 'google', 'microsoft', 'anthropic', 'nvidia', 'huggingface',
247
- 'deepseek-ai', 'stability-ai', 'mistralai', 'cerebras', 'databricks', 'together',
248
- 'facebook', 'amazon', 'deepmind', 'cohere', 'bigscience', 'eleutherai']
249
-
250
- # Common model name formats
251
- model_name_patterns = [
252
- "model-{size}-{version}",
253
- "{prefix}-{size}b",
254
- "{prefix}-{size}b-{variant}",
255
- "llama-{size}b-{variant}",
256
- "gpt-{variant}-{size}b",
257
- "{prefix}-instruct-{size}b",
258
- "{prefix}-chat-{size}b",
259
- "{prefix}-coder-{size}b",
260
- "stable-diffusion-{version}",
261
- "whisper-{size}",
262
- "bert-{size}-{variant}",
263
- "roberta-{size}",
264
- "t5-{size}",
265
- "{prefix}-vision-{size}b"
266
- ]
267
-
268
- # Common name parts
269
- prefixes = ["falcon", "llama", "mistral", "gpt", "phi", "gemma", "qwen", "yi", "mpt", "bloom"]
270
- sizes = ["7", "13", "34", "70", "1", "3", "7b", "13b", "70b", "8b", "2b", "1b", "0.5b", "small", "base", "large", "huge"]
271
- variants = ["chat", "instruct", "base", "v1.0", "v2", "beta", "turbo", "fast", "xl", "xxl"]
272
-
273
- # Generate sample data
274
- sample_data = []
275
- for org_idx, org in enumerate(orgs):
276
- # Create 5-10 models per organization
277
- num_models = np.random.randint(5, 11)
278
-
279
- for i in range(num_models):
280
- # Create realistic model name
281
- pattern = np.random.choice(model_name_patterns)
282
- prefix = np.random.choice(prefixes)
283
- size = np.random.choice(sizes)
284
- version = f"v{np.random.randint(1, 4)}"
285
- variant = np.random.choice(variants)
286
-
287
- model_name = pattern.format(
288
- prefix=prefix,
289
- size=size,
290
- version=version,
291
- variant=variant
292
- )
293
-
294
- model_id = f"{org}/{model_name}"
295
-
296
- # Select a realistic pipeline tag based on name
297
- if "diffusion" in model_name or "image" in model_name:
298
- pipeline_tag = np.random.choice(["text-to-image", "image-to-image", "image-segmentation"])
299
- elif "whisper" in model_name or "speech" in model_name:
300
- pipeline_tag = np.random.choice(["automatic-speech-recognition", "text-to-speech"])
301
- elif "coder" in model_name or "code" in model_name:
302
- pipeline_tag = "text-generation"
303
- elif "bert" in model_name or "roberta" in model_name:
304
- pipeline_tag = np.random.choice(["fill-mask", "text-classification", "token-classification"])
305
- elif "vision" in model_name:
306
- pipeline_tag = np.random.choice(["image-classification", "image-to-text", "visual-question-answering"])
307
- else:
308
- pipeline_tag = "text-generation" # Most common
309
-
310
- # Generate realistic tags
311
- tags = [pipeline_tag]
312
-
313
- if "text-generation" in pipeline_tag:
314
- tags.extend(["language-model", "text", "gpt", "llm"])
315
- if "instruct" in model_name:
316
- tags.append("instruction-following")
317
- if "chat" in model_name:
318
- tags.append("chat")
319
- elif "speech" in pipeline_tag:
320
- tags.extend(["audio", "speech", "voice"])
321
- elif "image" in pipeline_tag:
322
- tags.extend(["vision", "image", "diffusion"])
323
-
324
- # Add language tags
325
- if np.random.random() < 0.8: # 80% chance for English
326
- tags.append("en")
327
- if np.random.random() < 0.3: # 30% chance for multilingual
328
- tags.append("multilingual")
329
-
330
- # Generate downloads and likes (weighted by org position for variety)
331
- # Earlier orgs get more downloads to make the visualization interesting
332
- popularity_factor = (len(orgs) - org_idx) / len(orgs) # 1.0 to 0.0
333
- base_downloads = 10000 * (10 ** (2 * popularity_factor))
334
- downloads = int(base_downloads * np.random.uniform(0.3, 3.0))
335
- likes = int(downloads * np.random.uniform(0.01, 0.1)) # 1-10% like ratio
336
-
337
- # Generate model size (in bytes for params)
338
- # Model size should correlate somewhat with the size in the name
339
- size_indicator = 1
340
- for s in ["70b", "13b", "7b", "3b", "2b", "1b", "large", "huge", "xl", "xxl"]:
341
- if s in model_name.lower():
342
- size_indicator = float(s.replace("b", "")) if s[0].isdigit() else 3
343
- break
344
-
345
- # Size in bytes
346
- params = int(np.random.uniform(0.5, 2.0) * size_indicator * 1e9)
347
-
348
- # Create model entry
349
- model = {
350
- "id": model_id,
351
- "author": org,
352
- "downloads": downloads,
353
- "likes": likes,
354
- "pipeline_tag": pipeline_tag,
355
- "tags": tags,
356
- "params": params
357
- }
358
-
359
- sample_data.append(model)
360
-
361
- # Convert sample data to DataFrame and append to original
362
- sample_df = pd.DataFrame(sample_data)
363
- return pd.concat([df, sample_df], ignore_index=True)
364
-
365
  # Create Gradio interface
366
  with gr.Blocks() as demo:
367
  models_data = gr.State() # To store loaded data
@@ -426,7 +301,7 @@ with gr.Blocks() as demo:
426
 
427
  skip_orgs_textbox = gr.Textbox(
428
  label="Organizations to Skip (comma-separated)",
429
- placeholder="e.g., openai, meta, huggingface",
430
  info="Enter names of organizations to exclude from the visualization"
431
  )
432
 
 
235
 
236
  df['tags'] = df['tags'].apply(process_tags)
237
 
 
 
 
238
  return df
239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  # Create Gradio interface
241
  with gr.Blocks() as demo:
242
  models_data = gr.State() # To store loaded data
 
301
 
302
  skip_orgs_textbox = gr.Textbox(
303
  label="Organizations to Skip (comma-separated)",
304
+ placeholder="e.g., TheBloke, MaziyarPanahi, unsloth, modularai, Gensyn, bartowski",
305
  info="Enter names of organizations to exclude from the visualization"
306
  )
307