Update app.py
Browse files
app.py
CHANGED
@@ -235,133 +235,8 @@ def load_models_csv():
|
|
235 |
|
236 |
df['tags'] = df['tags'].apply(process_tags)
|
237 |
|
238 |
-
# Add more sample data for better visualization
|
239 |
-
add_sample_data(df)
|
240 |
-
|
241 |
return df
|
242 |
|
243 |
-
def add_sample_data(df):
|
244 |
-
"""Add more sample data to make the visualization more interesting"""
|
245 |
-
# Top organizations to include
|
246 |
-
orgs = ['openai', 'meta', 'google', 'microsoft', 'anthropic', 'nvidia', 'huggingface',
|
247 |
-
'deepseek-ai', 'stability-ai', 'mistralai', 'cerebras', 'databricks', 'together',
|
248 |
-
'facebook', 'amazon', 'deepmind', 'cohere', 'bigscience', 'eleutherai']
|
249 |
-
|
250 |
-
# Common model name formats
|
251 |
-
model_name_patterns = [
|
252 |
-
"model-{size}-{version}",
|
253 |
-
"{prefix}-{size}b",
|
254 |
-
"{prefix}-{size}b-{variant}",
|
255 |
-
"llama-{size}b-{variant}",
|
256 |
-
"gpt-{variant}-{size}b",
|
257 |
-
"{prefix}-instruct-{size}b",
|
258 |
-
"{prefix}-chat-{size}b",
|
259 |
-
"{prefix}-coder-{size}b",
|
260 |
-
"stable-diffusion-{version}",
|
261 |
-
"whisper-{size}",
|
262 |
-
"bert-{size}-{variant}",
|
263 |
-
"roberta-{size}",
|
264 |
-
"t5-{size}",
|
265 |
-
"{prefix}-vision-{size}b"
|
266 |
-
]
|
267 |
-
|
268 |
-
# Common name parts
|
269 |
-
prefixes = ["falcon", "llama", "mistral", "gpt", "phi", "gemma", "qwen", "yi", "mpt", "bloom"]
|
270 |
-
sizes = ["7", "13", "34", "70", "1", "3", "7b", "13b", "70b", "8b", "2b", "1b", "0.5b", "small", "base", "large", "huge"]
|
271 |
-
variants = ["chat", "instruct", "base", "v1.0", "v2", "beta", "turbo", "fast", "xl", "xxl"]
|
272 |
-
|
273 |
-
# Generate sample data
|
274 |
-
sample_data = []
|
275 |
-
for org_idx, org in enumerate(orgs):
|
276 |
-
# Create 5-10 models per organization
|
277 |
-
num_models = np.random.randint(5, 11)
|
278 |
-
|
279 |
-
for i in range(num_models):
|
280 |
-
# Create realistic model name
|
281 |
-
pattern = np.random.choice(model_name_patterns)
|
282 |
-
prefix = np.random.choice(prefixes)
|
283 |
-
size = np.random.choice(sizes)
|
284 |
-
version = f"v{np.random.randint(1, 4)}"
|
285 |
-
variant = np.random.choice(variants)
|
286 |
-
|
287 |
-
model_name = pattern.format(
|
288 |
-
prefix=prefix,
|
289 |
-
size=size,
|
290 |
-
version=version,
|
291 |
-
variant=variant
|
292 |
-
)
|
293 |
-
|
294 |
-
model_id = f"{org}/{model_name}"
|
295 |
-
|
296 |
-
# Select a realistic pipeline tag based on name
|
297 |
-
if "diffusion" in model_name or "image" in model_name:
|
298 |
-
pipeline_tag = np.random.choice(["text-to-image", "image-to-image", "image-segmentation"])
|
299 |
-
elif "whisper" in model_name or "speech" in model_name:
|
300 |
-
pipeline_tag = np.random.choice(["automatic-speech-recognition", "text-to-speech"])
|
301 |
-
elif "coder" in model_name or "code" in model_name:
|
302 |
-
pipeline_tag = "text-generation"
|
303 |
-
elif "bert" in model_name or "roberta" in model_name:
|
304 |
-
pipeline_tag = np.random.choice(["fill-mask", "text-classification", "token-classification"])
|
305 |
-
elif "vision" in model_name:
|
306 |
-
pipeline_tag = np.random.choice(["image-classification", "image-to-text", "visual-question-answering"])
|
307 |
-
else:
|
308 |
-
pipeline_tag = "text-generation" # Most common
|
309 |
-
|
310 |
-
# Generate realistic tags
|
311 |
-
tags = [pipeline_tag]
|
312 |
-
|
313 |
-
if "text-generation" in pipeline_tag:
|
314 |
-
tags.extend(["language-model", "text", "gpt", "llm"])
|
315 |
-
if "instruct" in model_name:
|
316 |
-
tags.append("instruction-following")
|
317 |
-
if "chat" in model_name:
|
318 |
-
tags.append("chat")
|
319 |
-
elif "speech" in pipeline_tag:
|
320 |
-
tags.extend(["audio", "speech", "voice"])
|
321 |
-
elif "image" in pipeline_tag:
|
322 |
-
tags.extend(["vision", "image", "diffusion"])
|
323 |
-
|
324 |
-
# Add language tags
|
325 |
-
if np.random.random() < 0.8: # 80% chance for English
|
326 |
-
tags.append("en")
|
327 |
-
if np.random.random() < 0.3: # 30% chance for multilingual
|
328 |
-
tags.append("multilingual")
|
329 |
-
|
330 |
-
# Generate downloads and likes (weighted by org position for variety)
|
331 |
-
# Earlier orgs get more downloads to make the visualization interesting
|
332 |
-
popularity_factor = (len(orgs) - org_idx) / len(orgs) # 1.0 to 0.0
|
333 |
-
base_downloads = 10000 * (10 ** (2 * popularity_factor))
|
334 |
-
downloads = int(base_downloads * np.random.uniform(0.3, 3.0))
|
335 |
-
likes = int(downloads * np.random.uniform(0.01, 0.1)) # 1-10% like ratio
|
336 |
-
|
337 |
-
# Generate model size (in bytes for params)
|
338 |
-
# Model size should correlate somewhat with the size in the name
|
339 |
-
size_indicator = 1
|
340 |
-
for s in ["70b", "13b", "7b", "3b", "2b", "1b", "large", "huge", "xl", "xxl"]:
|
341 |
-
if s in model_name.lower():
|
342 |
-
size_indicator = float(s.replace("b", "")) if s[0].isdigit() else 3
|
343 |
-
break
|
344 |
-
|
345 |
-
# Size in bytes
|
346 |
-
params = int(np.random.uniform(0.5, 2.0) * size_indicator * 1e9)
|
347 |
-
|
348 |
-
# Create model entry
|
349 |
-
model = {
|
350 |
-
"id": model_id,
|
351 |
-
"author": org,
|
352 |
-
"downloads": downloads,
|
353 |
-
"likes": likes,
|
354 |
-
"pipeline_tag": pipeline_tag,
|
355 |
-
"tags": tags,
|
356 |
-
"params": params
|
357 |
-
}
|
358 |
-
|
359 |
-
sample_data.append(model)
|
360 |
-
|
361 |
-
# Convert sample data to DataFrame and append to original
|
362 |
-
sample_df = pd.DataFrame(sample_data)
|
363 |
-
return pd.concat([df, sample_df], ignore_index=True)
|
364 |
-
|
365 |
# Create Gradio interface
|
366 |
with gr.Blocks() as demo:
|
367 |
models_data = gr.State() # To store loaded data
|
@@ -426,7 +301,7 @@ with gr.Blocks() as demo:
|
|
426 |
|
427 |
skip_orgs_textbox = gr.Textbox(
|
428 |
label="Organizations to Skip (comma-separated)",
|
429 |
-
placeholder="e.g.,
|
430 |
info="Enter names of organizations to exclude from the visualization"
|
431 |
)
|
432 |
|
|
|
235 |
|
236 |
df['tags'] = df['tags'].apply(process_tags)
|
237 |
|
|
|
|
|
|
|
238 |
return df
|
239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
# Create Gradio interface
|
241 |
with gr.Blocks() as demo:
|
242 |
models_data = gr.State() # To store loaded data
|
|
|
301 |
|
302 |
skip_orgs_textbox = gr.Textbox(
|
303 |
label="Organizations to Skip (comma-separated)",
|
304 |
+
placeholder="e.g., TheBloke, MaziyarPanahi, unsloth, modularai, Gensyn, bartowski",
|
305 |
info="Enter names of organizations to exclude from the visualization"
|
306 |
)
|
307 |
|