Update app.py
Browse files
app.py
CHANGED
@@ -248,146 +248,18 @@ def load_models_csv():
|
|
248 |
|
249 |
df['tags'] = df['tags'].apply(process_tags)
|
250 |
|
251 |
-
# Ensure all
|
252 |
-
|
253 |
-
|
254 |
-
df['downloadsAllTime'] = df.get('downloads', 0) * np.random.uniform(2, 5, size=len(df))
|
255 |
-
|
256 |
-
# Convert metrics to numeric values
|
257 |
-
for metric in ['downloads', 'likes', 'downloadsAllTime']:
|
258 |
if metric in df.columns:
|
259 |
df[metric] = pd.to_numeric(df[metric], errors='coerce').fillna(0)
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
|
264 |
return df
|
265 |
|
266 |
-
|
267 |
-
"""Add more sample data to make the visualization more interesting"""
|
268 |
-
# Top organizations to include
|
269 |
-
orgs = ['openai', 'meta', 'google', 'microsoft', 'anthropic', 'nvidia', 'huggingface',
|
270 |
-
'deepseek-ai', 'stability-ai', 'mistralai', 'cerebras', 'databricks', 'together',
|
271 |
-
'facebook', 'amazon', 'deepmind', 'cohere', 'bigscience', 'eleutherai']
|
272 |
-
|
273 |
-
# Common model name formats
|
274 |
-
model_name_patterns = [
|
275 |
-
"model-{size}-{version}",
|
276 |
-
"{prefix}-{size}b",
|
277 |
-
"{prefix}-{size}b-{variant}",
|
278 |
-
"llama-{size}b-{variant}",
|
279 |
-
"gpt-{variant}-{size}b",
|
280 |
-
"{prefix}-instruct-{size}b",
|
281 |
-
"{prefix}-chat-{size}b",
|
282 |
-
"{prefix}-coder-{size}b",
|
283 |
-
"stable-diffusion-{version}",
|
284 |
-
"whisper-{size}",
|
285 |
-
"bert-{size}-{variant}",
|
286 |
-
"roberta-{size}",
|
287 |
-
"t5-{size}",
|
288 |
-
"{prefix}-vision-{size}b"
|
289 |
-
]
|
290 |
-
|
291 |
-
# Common name parts
|
292 |
-
prefixes = ["falcon", "llama", "mistral", "gpt", "phi", "gemma", "qwen", "yi", "mpt", "bloom"]
|
293 |
-
sizes = ["7", "13", "34", "70", "1", "3", "7b", "13b", "70b", "8b", "2b", "1b", "0.5b", "small", "base", "large", "huge"]
|
294 |
-
variants = ["chat", "instruct", "base", "v1.0", "v2", "beta", "turbo", "fast", "xl", "xxl"]
|
295 |
-
|
296 |
-
# Generate sample data
|
297 |
-
sample_data = []
|
298 |
-
for org_idx, org in enumerate(orgs):
|
299 |
-
# Create 5-10 models per organization
|
300 |
-
num_models = np.random.randint(5, 11)
|
301 |
-
|
302 |
-
for i in range(num_models):
|
303 |
-
# Create realistic model name
|
304 |
-
pattern = np.random.choice(model_name_patterns)
|
305 |
-
prefix = np.random.choice(prefixes)
|
306 |
-
size = np.random.choice(sizes)
|
307 |
-
version = f"v{np.random.randint(1, 4)}"
|
308 |
-
variant = np.random.choice(variants)
|
309 |
-
|
310 |
-
model_name = pattern.format(
|
311 |
-
prefix=prefix,
|
312 |
-
size=size,
|
313 |
-
version=version,
|
314 |
-
variant=variant
|
315 |
-
)
|
316 |
-
|
317 |
-
model_id = f"{org}/{model_name}"
|
318 |
-
|
319 |
-
# Select a realistic pipeline tag based on name
|
320 |
-
if "diffusion" in model_name or "image" in model_name:
|
321 |
-
pipeline_tag = np.random.choice(["text-to-image", "image-to-image", "image-segmentation"])
|
322 |
-
elif "whisper" in model_name or "speech" in model_name:
|
323 |
-
pipeline_tag = np.random.choice(["automatic-speech-recognition", "text-to-speech"])
|
324 |
-
elif "coder" in model_name or "code" in model_name:
|
325 |
-
pipeline_tag = "text-generation"
|
326 |
-
elif "bert" in model_name or "roberta" in model_name:
|
327 |
-
pipeline_tag = np.random.choice(["fill-mask", "text-classification", "token-classification"])
|
328 |
-
elif "vision" in model_name:
|
329 |
-
pipeline_tag = np.random.choice(["image-classification", "image-to-text", "visual-question-answering"])
|
330 |
-
else:
|
331 |
-
pipeline_tag = "text-generation" # Most common
|
332 |
-
|
333 |
-
# Generate realistic tags
|
334 |
-
tags = [pipeline_tag]
|
335 |
-
|
336 |
-
if "text-generation" in pipeline_tag:
|
337 |
-
tags.extend(["language-model", "text", "gpt", "llm"])
|
338 |
-
if "instruct" in model_name:
|
339 |
-
tags.append("instruction-following")
|
340 |
-
if "chat" in model_name:
|
341 |
-
tags.append("chat")
|
342 |
-
elif "speech" in pipeline_tag:
|
343 |
-
tags.extend(["audio", "speech", "voice"])
|
344 |
-
elif "image" in pipeline_tag:
|
345 |
-
tags.extend(["vision", "image", "diffusion"])
|
346 |
-
|
347 |
-
# Add language tags
|
348 |
-
if np.random.random() < 0.8: # 80% chance for English
|
349 |
-
tags.append("en")
|
350 |
-
if np.random.random() < 0.3: # 30% chance for multilingual
|
351 |
-
tags.append("multilingual")
|
352 |
-
|
353 |
-
# Generate downloads and likes (weighted by org position for variety)
|
354 |
-
# Earlier orgs get more downloads to make the visualization interesting
|
355 |
-
popularity_factor = (len(orgs) - org_idx) / len(orgs) # 1.0 to 0.0
|
356 |
-
base_downloads = 10000 * (10 ** (2 * popularity_factor))
|
357 |
-
downloads = int(base_downloads * np.random.uniform(0.3, 3.0))
|
358 |
-
likes = int(downloads * np.random.uniform(0.01, 0.1)) # 1-10% like ratio
|
359 |
-
|
360 |
-
# Generate downloadsAllTime (higher than regular downloads)
|
361 |
-
downloadsAllTime = int(downloads * np.random.uniform(3, 8))
|
362 |
-
|
363 |
-
# Generate model size (in bytes for params)
|
364 |
-
# Model size should correlate somewhat with the size in the name
|
365 |
-
size_indicator = 1
|
366 |
-
for s in ["70b", "13b", "7b", "3b", "2b", "1b", "large", "huge", "xl", "xxl"]:
|
367 |
-
if s in model_name.lower():
|
368 |
-
size_indicator = float(s.replace("b", "")) if s[0].isdigit() else 3
|
369 |
-
break
|
370 |
-
|
371 |
-
# Size in bytes
|
372 |
-
params = int(np.random.uniform(0.5, 2.0) * size_indicator * 1e9)
|
373 |
-
|
374 |
-
# Create model entry
|
375 |
-
model = {
|
376 |
-
"id": model_id,
|
377 |
-
"author": org,
|
378 |
-
"downloads": downloads,
|
379 |
-
"likes": likes,
|
380 |
-
"downloadsAllTime": downloadsAllTime,
|
381 |
-
"pipeline_tag": pipeline_tag,
|
382 |
-
"tags": tags,
|
383 |
-
"params": params
|
384 |
-
}
|
385 |
-
|
386 |
-
sample_data.append(model)
|
387 |
-
|
388 |
-
# Convert sample data to DataFrame and append to original
|
389 |
-
sample_df = pd.DataFrame(sample_data)
|
390 |
-
return pd.concat([df, sample_df], ignore_index=True)
|
391 |
|
392 |
# Create Gradio interface
|
393 |
with gr.Blocks() as demo:
|
@@ -535,7 +407,7 @@ with gr.Blocks() as demo:
|
|
535 |
display_name = metric_display_names.get(count_by, count_by.capitalize())
|
536 |
|
537 |
stats_md = f"""
|
538 |
-
## Statistics
|
539 |
- **Total models shown**: {total_models:,}
|
540 |
- **Total {display_name}**: {int(total_value):,}
|
541 |
|
|
|
248 |
|
249 |
df['tags'] = df['tags'].apply(process_tags)
|
250 |
|
251 |
+
# Ensure all required metrics are present and convert to numeric
|
252 |
+
required_metrics = ['downloads', 'likes', 'downloadsAllTime']
|
253 |
+
for metric in required_metrics:
|
|
|
|
|
|
|
|
|
254 |
if metric in df.columns:
|
255 |
df[metric] = pd.to_numeric(df[metric], errors='coerce').fillna(0)
|
256 |
+
else:
|
257 |
+
print(f"Warning: '{metric}' not found in CSV. Creating empty column.")
|
258 |
+
df[metric] = 0
|
259 |
|
260 |
return df
|
261 |
|
262 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
|
264 |
# Create Gradio interface
|
265 |
with gr.Blocks() as demo:
|
|
|
407 |
display_name = metric_display_names.get(count_by, count_by.capitalize())
|
408 |
|
409 |
stats_md = f"""
|
410 |
+
## Statistics as of May 12, 2025
|
411 |
- **Total models shown**: {total_models:,}
|
412 |
- **Total {display_name}**: {int(total_value):,}
|
413 |
|