orionweller commited on
Commit
de87f7e
Β·
verified Β·
1 Parent(s): c37921c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +235 -35
app.py CHANGED
@@ -1,51 +1,251 @@
1
  import gradio as gr
2
- from transformers import AutoModel
3
  import torch
 
 
4
 
5
- def count_parameters(model_path):
6
  try:
 
 
 
7
  # Load model on CPU
8
  model = AutoModel.from_pretrained(model_path, device_map="cpu", trust_remote_code=True)
9
 
10
- # Count trainable parameters (accounting for weight tying)
 
 
 
 
 
 
11
  unique_params = {}
12
- for name, p in model.named_parameters():
13
- if p.requires_grad:
14
- unique_params[p.data_ptr()] = (name, p.numel())
15
 
16
- trainable_params = sum(numel for _, numel in unique_params.values())
 
 
 
 
 
17
 
18
- # Count total parameters (accounting for weight tying)
19
- unique_params = {}
20
- for name, p in model.named_parameters():
21
- unique_params[p.data_ptr()] = (name, p.numel())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- total_params = sum(numel for _, numel in unique_params.values())
24
 
25
- # Format numbers with commas for readability
26
- return f"""
27
- Total Parameters: {total_params:,}
28
- Trainable Parameters: {trainable_params:,}
29
- """
30
  except Exception as e:
31
- return f"Error loading model: {str(e)}"
32
-
33
- # Create Gradio interface
34
- demo = gr.Interface(
35
- fn=count_parameters,
36
- inputs=gr.Textbox(
37
- label="Enter Hugging Face Model Path",
38
- placeholder="e.g., bert-base-uncased"
39
- ),
40
- outputs=gr.Textbox(label="Parameter Count"),
41
- title="Hugging Face Model Parameter Counter",
42
- description="Enter a Hugging Face model path to see its parameter count.",
43
- examples=[
44
- ["bert-base-uncased"],
45
- ["gpt2"],
46
- ["roberta-base"]
47
- ]
48
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  if __name__ == "__main__":
51
  demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModel, AutoConfig
3
  import torch
4
+ import json
5
+ from collections import defaultdict, OrderedDict
6
 
7
+ def analyze_model_parameters(model_path, show_layer_details=False):
8
  try:
9
+ # Load model configuration first
10
+ config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
11
+
12
  # Load model on CPU
13
  model = AutoModel.from_pretrained(model_path, device_map="cpu", trust_remote_code=True)
14
 
15
+ # Initialize counters
16
+ total_params = 0
17
+ trainable_params = 0
18
+ embedding_params = 0
19
+ non_embedding_params = 0
20
+
21
+ # Track unique parameters to handle weight tying
22
  unique_params = {}
23
+ param_details = []
24
+ layer_breakdown = defaultdict(lambda: {'total': 0, 'trainable': 0, 'params': []})
 
25
 
26
+ # Embedding layer patterns (common names for embedding layers)
27
+ embedding_patterns = [
28
+ 'embeddings', 'embed', 'wte', 'wpe', 'word_embedding',
29
+ 'position_embedding', 'token_embedding', 'embed_tokens',
30
+ 'embed_positions', 'embed_layer_norm'
31
+ ]
32
 
33
+ def is_embedding_param(name):
34
+ name_lower = name.lower()
35
+ return any(pattern in name_lower for pattern in embedding_patterns)
36
+
37
+ def get_layer_name(param_name):
38
+ """Extract layer information from parameter name"""
39
+ parts = param_name.split('.')
40
+ if len(parts) >= 2:
41
+ # Handle common transformer architectures
42
+ if 'layer' in parts or 'layers' in parts:
43
+ for i, part in enumerate(parts):
44
+ if part in ['layer', 'layers'] and i + 1 < len(parts):
45
+ try:
46
+ layer_num = int(parts[i + 1])
47
+ return f"Layer {layer_num}"
48
+ except ValueError:
49
+ pass
50
+ # Handle other patterns
51
+ if 'encoder' in parts:
52
+ return "Encoder"
53
+ elif 'decoder' in parts:
54
+ return "Decoder"
55
+ elif any(emb in param_name.lower() for emb in embedding_patterns):
56
+ return "Embeddings"
57
+ elif 'classifier' in param_name.lower() or 'head' in param_name.lower():
58
+ return "Classification Head"
59
+ elif 'pooler' in param_name.lower():
60
+ return "Pooler"
61
+ elif 'ln' in param_name.lower() or 'norm' in param_name.lower():
62
+ return "Layer Norm"
63
+ return "Other"
64
+
65
+ # Analyze all parameters
66
+ for name, param in model.named_parameters():
67
+ param_size = param.numel()
68
+ is_trainable = param.requires_grad
69
+ is_embedding = is_embedding_param(name)
70
+ layer_name = get_layer_name(name)
71
+
72
+ # Handle weight tying by using data pointer
73
+ ptr = param.data_ptr()
74
+ if ptr not in unique_params:
75
+ unique_params[ptr] = {
76
+ 'name': name,
77
+ 'size': param_size,
78
+ 'trainable': is_trainable,
79
+ 'embedding': is_embedding,
80
+ 'layer': layer_name,
81
+ 'shape': list(param.shape)
82
+ }
83
+
84
+ # Add to totals
85
+ total_params += param_size
86
+ if is_trainable:
87
+ trainable_params += param_size
88
+ if is_embedding:
89
+ embedding_params += param_size
90
+ else:
91
+ non_embedding_params += param_size
92
+
93
+ # Add to layer breakdown
94
+ layer_breakdown[layer_name]['total'] += param_size
95
+ if is_trainable:
96
+ layer_breakdown[layer_name]['trainable'] += param_size
97
+
98
+ # Add parameter details
99
+ param_details.append({
100
+ 'name': name,
101
+ 'shape': list(param.shape),
102
+ 'size': param_size,
103
+ 'trainable': is_trainable,
104
+ 'embedding': is_embedding,
105
+ 'layer': layer_name,
106
+ 'shared': ptr in [p['ptr'] for p in param_details if 'ptr' in p],
107
+ 'ptr': ptr
108
+ })
109
+
110
+ # Add to layer breakdown details
111
+ layer_breakdown[layer_name]['params'].append({
112
+ 'name': name,
113
+ 'shape': list(param.shape),
114
+ 'size': param_size,
115
+ 'trainable': is_trainable
116
+ })
117
+
118
+ # Format the summary
119
+ summary = f"""
120
+ πŸ” **MODEL ANALYSIS: {model_path}**
121
+
122
+ πŸ“Š **PARAMETER SUMMARY**
123
+ β”œβ”€β”€ Total Parameters: {total_params:,}
124
+ β”œβ”€β”€ Trainable Parameters: {trainable_params:,}
125
+ β”œβ”€β”€ Non-trainable Parameters: {total_params - trainable_params:,}
126
+ └── Trainable Percentage: {(trainable_params/total_params*100):.1f}%
127
+
128
+ 🧠 **PARAMETER BREAKDOWN**
129
+ β”œβ”€β”€ Embedding Parameters: {embedding_params:,} ({embedding_params/total_params*100:.1f}%)
130
+ └── Non-embedding Parameters: {non_embedding_params:,} ({non_embedding_params/total_params*100:.1f}%)
131
+
132
+ πŸ“‹ **MODEL INFO**
133
+ β”œβ”€β”€ Model Type: {config.model_type if hasattr(config, 'model_type') else 'Unknown'}
134
+ β”œβ”€β”€ Architecture: {config.architectures[0] if hasattr(config, 'architectures') and config.architectures else 'Unknown'}
135
+ └── Hidden Size: {getattr(config, 'hidden_size', 'Unknown')}
136
+ """
137
+
138
+ # Add layer breakdown summary
139
+ if layer_breakdown:
140
+ summary += "\nπŸ—οΈ **LAYER BREAKDOWN SUMMARY**\n"
141
+ sorted_layers = sorted(layer_breakdown.items(), key=lambda x: (
142
+ 0 if x[0] == "Embeddings" else
143
+ 1 if x[0].startswith("Layer") else
144
+ 2 if x[0] == "Layer Norm" else
145
+ 3 if x[0] == "Pooler" else
146
+ 4 if x[0] == "Classification Head" else 5
147
+ ))
148
+
149
+ for layer_name, info in sorted_layers:
150
+ percentage = info['total'] / total_params * 100
151
+ summary += f"β”œβ”€β”€ {layer_name}: {info['total']:,} params ({percentage:.1f}%)\n"
152
+
153
+ # Detailed layer breakdown if requested
154
+ layer_details = ""
155
+ if show_layer_details:
156
+ layer_details = "\n" + "="*60 + "\n"
157
+ layer_details += "πŸ” **DETAILED LAYER-BY-LAYER BREAKDOWN**\n"
158
+ layer_details += "="*60 + "\n"
159
+
160
+ for layer_name, info in sorted_layers:
161
+ layer_details += f"\nπŸ“ **{layer_name.upper()}**\n"
162
+ layer_details += f" Total: {info['total']:,} | Trainable: {info['trainable']:,}\n"
163
+ layer_details += f" Parameters:\n"
164
+
165
+ for param_info in info['params']:
166
+ trainable_mark = "βœ“" if param_info['trainable'] else "βœ—"
167
+ layer_details += f" {trainable_mark} {param_info['name']}: {param_info['shape']} β†’ {param_info['size']:,}\n"
168
 
169
+ return summary + layer_details
170
 
 
 
 
 
 
171
  except Exception as e:
172
+ return f"❌ **Error loading model:** {str(e)}\n\nPlease check that the model path is correct and the model is accessible."
173
+
174
+ def count_parameters_basic(model_path):
175
+ """Basic parameter counting without layer details"""
176
+ return analyze_model_parameters(model_path, show_layer_details=False)
177
+
178
+ def count_parameters_detailed(model_path):
179
+ """Detailed parameter counting with layer-by-layer breakdown"""
180
+ return analyze_model_parameters(model_path, show_layer_details=True)
181
+
182
+ # Create Gradio interface with multiple outputs
183
+ with gr.Blocks(title="πŸ€— Advanced HuggingFace Model Parameter Analyzer", theme=gr.themes.Soft()) as demo:
184
+ gr.Markdown("""
185
+ # πŸ€— Advanced HuggingFace Model Parameter Analyzer
186
+
187
+ Enter any HuggingFace model path to get detailed parameter analysis including:
188
+ - **Total & trainable parameter counts**
189
+ - **Embedding vs non-embedding breakdown**
190
+ - **Layer-by-layer analysis**
191
+ - **Weight sharing detection**
192
+ """)
193
+
194
+ with gr.Row():
195
+ with gr.Column(scale=2):
196
+ model_input = gr.Textbox(
197
+ label="πŸ” HuggingFace Model Path",
198
+ placeholder="e.g., bert-base-uncased, gpt2, microsoft/DialoGPT-medium",
199
+ value="bert-base-uncased"
200
+ )
201
+
202
+ with gr.Column(scale=1):
203
+ analyze_btn = gr.Button("πŸ“Š Analyze Model", variant="primary")
204
+ detailed_btn = gr.Button("πŸ” Detailed Analysis", variant="secondary")
205
+
206
+ output_text = gr.Textbox(
207
+ label="πŸ“‹ Analysis Results",
208
+ lines=20,
209
+ max_lines=50,
210
+ show_copy_button=True
211
+ )
212
+
213
+ # Event handlers
214
+ analyze_btn.click(
215
+ fn=count_parameters_basic,
216
+ inputs=model_input,
217
+ outputs=output_text
218
+ )
219
+
220
+ detailed_btn.click(
221
+ fn=count_parameters_detailed,
222
+ inputs=model_input,
223
+ outputs=output_text
224
+ )
225
+
226
+ # Example models
227
+ gr.Examples(
228
+ examples=[
229
+ ["bert-base-uncased"],
230
+ ["gpt2"],
231
+ ["roberta-base"],
232
+ ["distilbert-base-uncased"],
233
+ ["microsoft/DialoGPT-medium"],
234
+ ["facebook/bart-base"],
235
+ ["t5-small"],
236
+ ["google/flan-t5-small"]
237
+ ],
238
+ inputs=model_input,
239
+ label="🎯 Example Models"
240
+ )
241
+
242
+ gr.Markdown("""
243
+ ### πŸ“ Notes:
244
+ - **Weight tying detection**: Automatically handles shared parameters (e.g., input/output embeddings)
245
+ - **Layer categorization**: Groups parameters by transformer layers, embeddings, etc.
246
+ - **Detailed analysis**: Click "Detailed Analysis" for parameter-by-parameter breakdown
247
+ - **Model compatibility**: Works with most HuggingFace transformer models
248
+ """)
249
 
250
  if __name__ == "__main__":
251
  demo.launch()