File size: 4,178 Bytes
0da2f8d
d4fbda3
 
0da2f8d
 
d4fbda3
 
 
 
 
 
 
 
 
 
 
0da2f8d
d4fbda3
 
0da2f8d
d4fbda3
 
 
bd3fe94
 
 
 
d4fbda3
 
 
 
0da2f8d
d4fbda3
 
 
 
bd3fe94
 
 
 
 
 
 
 
d4fbda3
 
 
bd3fe94
 
d4fbda3
 
 
 
 
 
bd3fe94
 
 
 
 
 
 
d4fbda3
bd3fe94
d4fbda3
 
 
 
bd3fe94
d4fbda3
 
 
 
 
 
 
 
 
 
5b391e3
 
 
 
 
 
 
 
d4fbda3
 
 
 
 
 
 
 
8c6fcfb
5b391e3
 
 
 
 
 
 
 
8c6fcfb
 
d4fbda3
 
 
 
8c6fcfb
 
 
 
d4fbda3
 
 
 
 
 
 
 
 
0da2f8d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import tempfile
import csv
import pandas as pd
import gradio as gr
from huggingface_hub import HfApi
from pathlib import Path

def get_model_stats(search_term):
    # Initialize the API
    api = HfApi()
    
    # Create a temporary file for the CSV
    temp_dir = tempfile.mkdtemp()
    output_file = Path(temp_dir) / f"{search_term}_models_alltime.csv"
    
    # Get the generator of models with the working sort parameter
    print(f"Fetching {search_term} models with download statistics...")
    models_generator = api.list_models(
        search=search_term, 
        expand=["downloads", "downloadsAllTime"],  # Get both 30-day and all-time downloads
        sort="_id"  # Sort by ID to avoid timeout issues
    )
    
    # Initialize counters for total downloads
    total_30day_downloads = 0
    total_alltime_downloads = 0
    
    # Create and write to CSV
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        csv_writer = csv.writer(csvfile)
        # Write header
        csv_writer.writerow(["Model ID", "Downloads (30 days)", "Downloads (All Time)"])
        
        # Process models
        model_count = 0
        for model in models_generator:
            # Get download counts
            downloads_30day = getattr(model, 'downloads', 0)
            downloads_alltime = getattr(model, 'downloads_all_time', 0)
            
            # Add to totals
            total_30day_downloads += downloads_30day
            total_alltime_downloads += downloads_alltime
            
            # Write to CSV
            csv_writer.writerow([
                getattr(model, 'id', "Unknown"),
                downloads_30day,
                downloads_alltime
            ])
            model_count += 1
    
    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(output_file)
    
    # Create status message with total downloads
    status_message = (
        f"Found {model_count} models for search term '{search_term}'\n"
        f"Total 30-day downloads: {total_30day_downloads:,}\n"
        f"Total all-time downloads: {total_alltime_downloads:,}"
    )
    
    # Return both the DataFrame, status message, and the CSV file path
    return df, status_message, str(output_file)

# Create the Gradio interface
with gr.Blocks(title="Hugging Face Model Statistics") as demo:
    gr.Markdown("# Hugging Face Model Statistics")
    gr.Markdown("Enter a search term to find model statistics from Hugging Face Hub")
    
    with gr.Row():
        search_input = gr.Textbox(
            label="Search Term",
            placeholder="Enter a model name or keyword (e.g., 'gemma', 'llama')",
            value="gemma"
        )
        search_button = gr.Button("Search")
    
    with gr.Row():
        with gr.Column():
            output_table = gr.Dataframe(
                headers=["Model ID", "Downloads (30 days)", "Downloads (All Time)"],
                datatype=["str", "number", "number"],
                label="Model Statistics",
                wrap=True
            )
            status_message = gr.Textbox(label="Status", lines=3)
    
    with gr.Row():
        download_button = gr.Button("Download CSV")
        csv_file = gr.File(label="CSV File", visible=False)
    
    # Store the CSV file path in a state
    csv_path = gr.State()
    
    def process_results(df, status, csv_path):
        # Create HTML links for each model
        html_links = []
        for model_id in df['Model ID']:
            html_links.append(f'<a href="https://huggingface.co/{model_id}" target="_blank">{model_id}</a>')
        
        # Update the DataFrame with HTML links
        df['Model ID'] = html_links
        
        return df, status, csv_path
    
    search_button.click(
        fn=get_model_stats,
        inputs=search_input,
        outputs=[output_table, status_message, csv_path]
    ).then(
        fn=process_results,
        inputs=[output_table, status_message, csv_path],
        outputs=[output_table, status_message, csv_path]
    )
    
    download_button.click(
        fn=lambda x: x,
        inputs=csv_path,
        outputs=csv_file
    )

if __name__ == "__main__":
    demo.launch()