OrgStats

Running

App Files Files Community

evijit HF Staff commited on 18 days ago

Commit

51362b4

verified ·

1 Parent(s): 3043125

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -126

app.py CHANGED Viewed

@@ -235,133 +235,8 @@ def load_models_csv():
     df['tags'] = df['tags'].apply(process_tags)
-    # Add more sample data for better visualization
-    add_sample_data(df)
     return df
-def add_sample_data(df):
-    """Add more sample data to make the visualization more interesting"""
-    # Top organizations to include
-    orgs = ['openai', 'meta', 'google', 'microsoft', 'anthropic', 'nvidia', 'huggingface',
-            'deepseek-ai', 'stability-ai', 'mistralai', 'cerebras', 'databricks', 'together',
-            'facebook', 'amazon', 'deepmind', 'cohere', 'bigscience', 'eleutherai']
-    # Common model name formats
-    model_name_patterns = [
-        "model-{size}-{version}",
-        "{prefix}-{size}b",
-        "{prefix}-{size}b-{variant}",
-        "llama-{size}b-{variant}",
-        "gpt-{variant}-{size}b",
-        "{prefix}-instruct-{size}b",
-        "{prefix}-chat-{size}b",
-        "{prefix}-coder-{size}b",
-        "stable-diffusion-{version}",
-        "whisper-{size}",
-        "bert-{size}-{variant}",
-        "roberta-{size}",
-        "t5-{size}",
-        "{prefix}-vision-{size}b"
-    ]
-    # Common name parts
-    prefixes = ["falcon", "llama", "mistral", "gpt", "phi", "gemma", "qwen", "yi", "mpt", "bloom"]
-    sizes = ["7", "13", "34", "70", "1", "3", "7b", "13b", "70b", "8b", "2b", "1b", "0.5b", "small", "base", "large", "huge"]
-    variants = ["chat", "instruct", "base", "v1.0", "v2", "beta", "turbo", "fast", "xl", "xxl"]
-    # Generate sample data
-    sample_data = []
-    for org_idx, org in enumerate(orgs):
-        # Create 5-10 models per organization
-        num_models = np.random.randint(5, 11)
-        for i in range(num_models):
-            # Create realistic model name
-            pattern = np.random.choice(model_name_patterns)
-            prefix = np.random.choice(prefixes)
-            size = np.random.choice(sizes)
-            version = f"v{np.random.randint(1, 4)}"
-            variant = np.random.choice(variants)
-            model_name = pattern.format(
-                prefix=prefix,
-                size=size,
-                version=version,
-                variant=variant
-            )
-            model_id = f"{org}/{model_name}"
-            # Select a realistic pipeline tag based on name
-            if "diffusion" in model_name or "image" in model_name:
-                pipeline_tag = np.random.choice(["text-to-image", "image-to-image", "image-segmentation"])
-            elif "whisper" in model_name or "speech" in model_name:
-                pipeline_tag = np.random.choice(["automatic-speech-recognition", "text-to-speech"])
-            elif "coder" in model_name or "code" in model_name:
-                pipeline_tag = "text-generation"
-            elif "bert" in model_name or "roberta" in model_name:
-                pipeline_tag = np.random.choice(["fill-mask", "text-classification", "token-classification"])
-            elif "vision" in model_name:
-                pipeline_tag = np.random.choice(["image-classification", "image-to-text", "visual-question-answering"])
-            else:
-                pipeline_tag = "text-generation"  # Most common
-            # Generate realistic tags
-            tags = [pipeline_tag]
-            if "text-generation" in pipeline_tag:
-                tags.extend(["language-model", "text", "gpt", "llm"])
-                if "instruct" in model_name:
-                    tags.append("instruction-following")
-                if "chat" in model_name:
-                    tags.append("chat")
-            elif "speech" in pipeline_tag:
-                tags.extend(["audio", "speech", "voice"])
-            elif "image" in pipeline_tag:
-                tags.extend(["vision", "image", "diffusion"])
-            # Add language tags
-            if np.random.random() < 0.8:  # 80% chance for English
-                tags.append("en")
-            if np.random.random() < 0.3:  # 30% chance for multilingual
-                tags.append("multilingual")
-            # Generate downloads and likes (weighted by org position for variety)
-            # Earlier orgs get more downloads to make the visualization interesting
-            popularity_factor = (len(orgs) - org_idx) / len(orgs)  # 1.0 to 0.0
-            base_downloads = 10000 * (10 ** (2 * popularity_factor))
-            downloads = int(base_downloads * np.random.uniform(0.3, 3.0))
-            likes = int(downloads * np.random.uniform(0.01, 0.1))  # 1-10% like ratio
-            # Generate model size (in bytes for params)
-            # Model size should correlate somewhat with the size in the name
-            size_indicator = 1
-            for s in ["70b", "13b", "7b", "3b", "2b", "1b", "large", "huge", "xl", "xxl"]:
-                if s in model_name.lower():
-                    size_indicator = float(s.replace("b", "")) if s[0].isdigit() else 3
-                    break
-            # Size in bytes
-            params = int(np.random.uniform(0.5, 2.0) * size_indicator * 1e9)
-            # Create model entry
-            model = {
-                "id": model_id,
-                "author": org,
-                "downloads": downloads,
-                "likes": likes,
-                "pipeline_tag": pipeline_tag,
-                "tags": tags,
-                "params": params
-            }
-            sample_data.append(model)
-    # Convert sample data to DataFrame and append to original
-    sample_df = pd.DataFrame(sample_data)
-    return pd.concat([df, sample_df], ignore_index=True)
 # Create Gradio interface
 with gr.Blocks() as demo:
     models_data = gr.State()  # To store loaded data
@@ -426,7 +301,7 @@ with gr.Blocks() as demo:
             skip_orgs_textbox = gr.Textbox(
                 label="Organizations to Skip (comma-separated)",
-                placeholder="e.g., openai, meta, huggingface",
                 info="Enter names of organizations to exclude from the visualization"
             )

     df['tags'] = df['tags'].apply(process_tags)
     return df
 # Create Gradio interface
 with gr.Blocks() as demo:
     models_data = gr.State()  # To store loaded data
             skip_orgs_textbox = gr.Textbox(
                 label="Organizations to Skip (comma-separated)",
+                placeholder="e.g., TheBloke, MaziyarPanahi, unsloth, modularai, Gensyn, bartowski",
                 info="Enter names of organizations to exclude from the visualization"
             )