Spaces:

fdaudens
/

model-stats-search-keywords

Sleeping

File size: 4,082 Bytes

0da2f8d
d4fbda3
 
0da2f8d
 
d4fbda3
 
 
 
 
 
 
 
 
 
 
0da2f8d
d4fbda3
 
0da2f8d
d4fbda3
 
 
bd3fe94
 
 
 
d4fbda3
 
 
 
0da2f8d
d4fbda3
 
 
 
bd3fe94
 
 
 
 
 
 
 
d4fbda3
 
 
bd3fe94
 
d4fbda3
 
 
 
 
 
bd3fe94
 
 
 
 
 
 
d4fbda3
bd3fe94
d4fbda3
8c6fcfb
 
 
d4fbda3
 
 
bd3fe94
d4fbda3
 
 
 
 
 
 
 
 
 
 
0da2f8d
 
4a901f1
 
d4fbda3
bd3fe94
d4fbda3
 
 
 
 
 
 
 
8c6fcfb
 
 
 
 
d4fbda3
 
 
 
8c6fcfb
 
 
 
d4fbda3
 
 
 
 
 
 
 
 
0da2f8d

import tempfile
import csv
import pandas as pd
import gradio as gr
from huggingface_hub import HfApi
from pathlib import Path

def get_model_stats(search_term):
    # Initialize the API
    api = HfApi()
    
    # Create a temporary file for the CSV
    temp_dir = tempfile.mkdtemp()
    output_file = Path(temp_dir) / f"{search_term}_models_alltime.csv"
    
    # Get the generator of models with the working sort parameter
    print(f"Fetching {search_term} models with download statistics...")
    models_generator = api.list_models(
        search=search_term, 
        expand=["downloads", "downloadsAllTime"],  # Get both 30-day and all-time downloads
        sort="_id"  # Sort by ID to avoid timeout issues
    )
    
    # Initialize counters for total downloads
    total_30day_downloads = 0
    total_alltime_downloads = 0
    
    # Create and write to CSV
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        csv_writer = csv.writer(csvfile)
        # Write header
        csv_writer.writerow(["Model ID", "Downloads (30 days)", "Downloads (All Time)"])
        
        # Process models
        model_count = 0
        for model in models_generator:
            # Get download counts
            downloads_30day = getattr(model, 'downloads', 0)
            downloads_alltime = getattr(model, 'downloads_all_time', 0)
            
            # Add to totals
            total_30day_downloads += downloads_30day
            total_alltime_downloads += downloads_alltime
            
            # Write to CSV
            csv_writer.writerow([
                getattr(model, 'id', "Unknown"),
                downloads_30day,
                downloads_alltime
            ])
            model_count += 1
    
    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(output_file)
    
    # Create status message with total downloads
    status_message = (
        f"Found {model_count} models for search term '{search_term}'\n"
        f"Total 30-day downloads: {total_30day_downloads:,}\n"
        f"Total all-time downloads: {total_alltime_downloads:,}"
    )
    
    # Return both the DataFrame, status message, and the CSV file path
    return df, status_message, str(output_file)

def format_model_link(model_id):
    return f'<a href="https://huggingface.co/{model_id}" target="_blank">{model_id}</a>'

# Create the Gradio interface
with gr.Blocks(title="Hugging Face Model Statistics") as demo:
    gr.Markdown("# Hugging Face Model Statistics")
    gr.Markdown("Enter a search term to find model statistics from Hugging Face Hub")
    
    with gr.Row():
        search_input = gr.Textbox(
            label="Search Term",
            placeholder="Enter a model name or keyword (e.g., 'gemma', 'llama')",
            value="gemma"
        )
        search_button = gr.Button("Search")
    
    with gr.Row():
        output_table = gr.Dataframe(
            headers=["Model ID", "Downloads (30 days)", "Downloads (All Time)"],
            datatype=["str", "number", "number"],
            label="Model Statistics",
            wrap=True
        )
        status_message = gr.Textbox(label="Status", lines=3)  # Increased lines to show all stats
    
    with gr.Row():
        download_button = gr.Button("Download CSV")
        csv_file = gr.File(label="CSV File", visible=False)
    
    # Store the CSV file path in a state
    csv_path = gr.State()
    
    def process_results(df, status, csv_path):
        # Format the model IDs as clickable links
        df['Model ID'] = df['Model ID'].apply(format_model_link)
        return df, status, csv_path
    
    search_button.click(
        fn=get_model_stats,
        inputs=search_input,
        outputs=[output_table, status_message, csv_path]
    ).then(
        fn=process_results,
        inputs=[output_table, status_message, csv_path],
        outputs=[output_table, status_message, csv_path]
    )
    
    download_button.click(
        fn=lambda x: x,
        inputs=csv_path,
        outputs=csv_file
    )

if __name__ == "__main__":
    demo.launch()