Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,51 +1,251 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import AutoModel
|
3 |
import torch
|
|
|
|
|
4 |
|
5 |
-
def
|
6 |
try:
|
|
|
|
|
|
|
7 |
# Load model on CPU
|
8 |
model = AutoModel.from_pretrained(model_path, device_map="cpu", trust_remote_code=True)
|
9 |
|
10 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
unique_params = {}
|
12 |
-
|
13 |
-
|
14 |
-
unique_params[p.data_ptr()] = (name, p.numel())
|
15 |
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
|
24 |
|
25 |
-
# Format numbers with commas for readability
|
26 |
-
return f"""
|
27 |
-
Total Parameters: {total_params:,}
|
28 |
-
Trainable Parameters: {trainable_params:,}
|
29 |
-
"""
|
30 |
except Exception as e:
|
31 |
-
return f"Error loading model
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
)
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
if __name__ == "__main__":
|
51 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoModel, AutoConfig
|
3 |
import torch
|
4 |
+
import json
|
5 |
+
from collections import defaultdict, OrderedDict
|
6 |
|
7 |
+
def analyze_model_parameters(model_path, show_layer_details=False):
|
8 |
try:
|
9 |
+
# Load model configuration first
|
10 |
+
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
|
11 |
+
|
12 |
# Load model on CPU
|
13 |
model = AutoModel.from_pretrained(model_path, device_map="cpu", trust_remote_code=True)
|
14 |
|
15 |
+
# Initialize counters
|
16 |
+
total_params = 0
|
17 |
+
trainable_params = 0
|
18 |
+
embedding_params = 0
|
19 |
+
non_embedding_params = 0
|
20 |
+
|
21 |
+
# Track unique parameters to handle weight tying
|
22 |
unique_params = {}
|
23 |
+
param_details = []
|
24 |
+
layer_breakdown = defaultdict(lambda: {'total': 0, 'trainable': 0, 'params': []})
|
|
|
25 |
|
26 |
+
# Embedding layer patterns (common names for embedding layers)
|
27 |
+
embedding_patterns = [
|
28 |
+
'embeddings', 'embed', 'wte', 'wpe', 'word_embedding',
|
29 |
+
'position_embedding', 'token_embedding', 'embed_tokens',
|
30 |
+
'embed_positions', 'embed_layer_norm'
|
31 |
+
]
|
32 |
|
33 |
+
def is_embedding_param(name):
|
34 |
+
name_lower = name.lower()
|
35 |
+
return any(pattern in name_lower for pattern in embedding_patterns)
|
36 |
+
|
37 |
+
def get_layer_name(param_name):
|
38 |
+
"""Extract layer information from parameter name"""
|
39 |
+
parts = param_name.split('.')
|
40 |
+
if len(parts) >= 2:
|
41 |
+
# Handle common transformer architectures
|
42 |
+
if 'layer' in parts or 'layers' in parts:
|
43 |
+
for i, part in enumerate(parts):
|
44 |
+
if part in ['layer', 'layers'] and i + 1 < len(parts):
|
45 |
+
try:
|
46 |
+
layer_num = int(parts[i + 1])
|
47 |
+
return f"Layer {layer_num}"
|
48 |
+
except ValueError:
|
49 |
+
pass
|
50 |
+
# Handle other patterns
|
51 |
+
if 'encoder' in parts:
|
52 |
+
return "Encoder"
|
53 |
+
elif 'decoder' in parts:
|
54 |
+
return "Decoder"
|
55 |
+
elif any(emb in param_name.lower() for emb in embedding_patterns):
|
56 |
+
return "Embeddings"
|
57 |
+
elif 'classifier' in param_name.lower() or 'head' in param_name.lower():
|
58 |
+
return "Classification Head"
|
59 |
+
elif 'pooler' in param_name.lower():
|
60 |
+
return "Pooler"
|
61 |
+
elif 'ln' in param_name.lower() or 'norm' in param_name.lower():
|
62 |
+
return "Layer Norm"
|
63 |
+
return "Other"
|
64 |
+
|
65 |
+
# Analyze all parameters
|
66 |
+
for name, param in model.named_parameters():
|
67 |
+
param_size = param.numel()
|
68 |
+
is_trainable = param.requires_grad
|
69 |
+
is_embedding = is_embedding_param(name)
|
70 |
+
layer_name = get_layer_name(name)
|
71 |
+
|
72 |
+
# Handle weight tying by using data pointer
|
73 |
+
ptr = param.data_ptr()
|
74 |
+
if ptr not in unique_params:
|
75 |
+
unique_params[ptr] = {
|
76 |
+
'name': name,
|
77 |
+
'size': param_size,
|
78 |
+
'trainable': is_trainable,
|
79 |
+
'embedding': is_embedding,
|
80 |
+
'layer': layer_name,
|
81 |
+
'shape': list(param.shape)
|
82 |
+
}
|
83 |
+
|
84 |
+
# Add to totals
|
85 |
+
total_params += param_size
|
86 |
+
if is_trainable:
|
87 |
+
trainable_params += param_size
|
88 |
+
if is_embedding:
|
89 |
+
embedding_params += param_size
|
90 |
+
else:
|
91 |
+
non_embedding_params += param_size
|
92 |
+
|
93 |
+
# Add to layer breakdown
|
94 |
+
layer_breakdown[layer_name]['total'] += param_size
|
95 |
+
if is_trainable:
|
96 |
+
layer_breakdown[layer_name]['trainable'] += param_size
|
97 |
+
|
98 |
+
# Add parameter details
|
99 |
+
param_details.append({
|
100 |
+
'name': name,
|
101 |
+
'shape': list(param.shape),
|
102 |
+
'size': param_size,
|
103 |
+
'trainable': is_trainable,
|
104 |
+
'embedding': is_embedding,
|
105 |
+
'layer': layer_name,
|
106 |
+
'shared': ptr in [p['ptr'] for p in param_details if 'ptr' in p],
|
107 |
+
'ptr': ptr
|
108 |
+
})
|
109 |
+
|
110 |
+
# Add to layer breakdown details
|
111 |
+
layer_breakdown[layer_name]['params'].append({
|
112 |
+
'name': name,
|
113 |
+
'shape': list(param.shape),
|
114 |
+
'size': param_size,
|
115 |
+
'trainable': is_trainable
|
116 |
+
})
|
117 |
+
|
118 |
+
# Format the summary
|
119 |
+
summary = f"""
|
120 |
+
π **MODEL ANALYSIS: {model_path}**
|
121 |
+
|
122 |
+
π **PARAMETER SUMMARY**
|
123 |
+
βββ Total Parameters: {total_params:,}
|
124 |
+
βββ Trainable Parameters: {trainable_params:,}
|
125 |
+
βββ Non-trainable Parameters: {total_params - trainable_params:,}
|
126 |
+
βββ Trainable Percentage: {(trainable_params/total_params*100):.1f}%
|
127 |
+
|
128 |
+
π§ **PARAMETER BREAKDOWN**
|
129 |
+
βββ Embedding Parameters: {embedding_params:,} ({embedding_params/total_params*100:.1f}%)
|
130 |
+
βββ Non-embedding Parameters: {non_embedding_params:,} ({non_embedding_params/total_params*100:.1f}%)
|
131 |
+
|
132 |
+
π **MODEL INFO**
|
133 |
+
βββ Model Type: {config.model_type if hasattr(config, 'model_type') else 'Unknown'}
|
134 |
+
βββ Architecture: {config.architectures[0] if hasattr(config, 'architectures') and config.architectures else 'Unknown'}
|
135 |
+
βββ Hidden Size: {getattr(config, 'hidden_size', 'Unknown')}
|
136 |
+
"""
|
137 |
+
|
138 |
+
# Add layer breakdown summary
|
139 |
+
if layer_breakdown:
|
140 |
+
summary += "\nποΈ **LAYER BREAKDOWN SUMMARY**\n"
|
141 |
+
sorted_layers = sorted(layer_breakdown.items(), key=lambda x: (
|
142 |
+
0 if x[0] == "Embeddings" else
|
143 |
+
1 if x[0].startswith("Layer") else
|
144 |
+
2 if x[0] == "Layer Norm" else
|
145 |
+
3 if x[0] == "Pooler" else
|
146 |
+
4 if x[0] == "Classification Head" else 5
|
147 |
+
))
|
148 |
+
|
149 |
+
for layer_name, info in sorted_layers:
|
150 |
+
percentage = info['total'] / total_params * 100
|
151 |
+
summary += f"βββ {layer_name}: {info['total']:,} params ({percentage:.1f}%)\n"
|
152 |
+
|
153 |
+
# Detailed layer breakdown if requested
|
154 |
+
layer_details = ""
|
155 |
+
if show_layer_details:
|
156 |
+
layer_details = "\n" + "="*60 + "\n"
|
157 |
+
layer_details += "π **DETAILED LAYER-BY-LAYER BREAKDOWN**\n"
|
158 |
+
layer_details += "="*60 + "\n"
|
159 |
+
|
160 |
+
for layer_name, info in sorted_layers:
|
161 |
+
layer_details += f"\nπ **{layer_name.upper()}**\n"
|
162 |
+
layer_details += f" Total: {info['total']:,} | Trainable: {info['trainable']:,}\n"
|
163 |
+
layer_details += f" Parameters:\n"
|
164 |
+
|
165 |
+
for param_info in info['params']:
|
166 |
+
trainable_mark = "β" if param_info['trainable'] else "β"
|
167 |
+
layer_details += f" {trainable_mark} {param_info['name']}: {param_info['shape']} β {param_info['size']:,}\n"
|
168 |
|
169 |
+
return summary + layer_details
|
170 |
|
|
|
|
|
|
|
|
|
|
|
171 |
except Exception as e:
|
172 |
+
return f"β **Error loading model:** {str(e)}\n\nPlease check that the model path is correct and the model is accessible."
|
173 |
+
|
174 |
+
def count_parameters_basic(model_path):
|
175 |
+
"""Basic parameter counting without layer details"""
|
176 |
+
return analyze_model_parameters(model_path, show_layer_details=False)
|
177 |
+
|
178 |
+
def count_parameters_detailed(model_path):
|
179 |
+
"""Detailed parameter counting with layer-by-layer breakdown"""
|
180 |
+
return analyze_model_parameters(model_path, show_layer_details=True)
|
181 |
+
|
182 |
+
# Create Gradio interface with multiple outputs
|
183 |
+
with gr.Blocks(title="π€ Advanced HuggingFace Model Parameter Analyzer", theme=gr.themes.Soft()) as demo:
|
184 |
+
gr.Markdown("""
|
185 |
+
# π€ Advanced HuggingFace Model Parameter Analyzer
|
186 |
+
|
187 |
+
Enter any HuggingFace model path to get detailed parameter analysis including:
|
188 |
+
- **Total & trainable parameter counts**
|
189 |
+
- **Embedding vs non-embedding breakdown**
|
190 |
+
- **Layer-by-layer analysis**
|
191 |
+
- **Weight sharing detection**
|
192 |
+
""")
|
193 |
+
|
194 |
+
with gr.Row():
|
195 |
+
with gr.Column(scale=2):
|
196 |
+
model_input = gr.Textbox(
|
197 |
+
label="π HuggingFace Model Path",
|
198 |
+
placeholder="e.g., bert-base-uncased, gpt2, microsoft/DialoGPT-medium",
|
199 |
+
value="bert-base-uncased"
|
200 |
+
)
|
201 |
+
|
202 |
+
with gr.Column(scale=1):
|
203 |
+
analyze_btn = gr.Button("π Analyze Model", variant="primary")
|
204 |
+
detailed_btn = gr.Button("π Detailed Analysis", variant="secondary")
|
205 |
+
|
206 |
+
output_text = gr.Textbox(
|
207 |
+
label="π Analysis Results",
|
208 |
+
lines=20,
|
209 |
+
max_lines=50,
|
210 |
+
show_copy_button=True
|
211 |
+
)
|
212 |
+
|
213 |
+
# Event handlers
|
214 |
+
analyze_btn.click(
|
215 |
+
fn=count_parameters_basic,
|
216 |
+
inputs=model_input,
|
217 |
+
outputs=output_text
|
218 |
+
)
|
219 |
+
|
220 |
+
detailed_btn.click(
|
221 |
+
fn=count_parameters_detailed,
|
222 |
+
inputs=model_input,
|
223 |
+
outputs=output_text
|
224 |
+
)
|
225 |
+
|
226 |
+
# Example models
|
227 |
+
gr.Examples(
|
228 |
+
examples=[
|
229 |
+
["bert-base-uncased"],
|
230 |
+
["gpt2"],
|
231 |
+
["roberta-base"],
|
232 |
+
["distilbert-base-uncased"],
|
233 |
+
["microsoft/DialoGPT-medium"],
|
234 |
+
["facebook/bart-base"],
|
235 |
+
["t5-small"],
|
236 |
+
["google/flan-t5-small"]
|
237 |
+
],
|
238 |
+
inputs=model_input,
|
239 |
+
label="π― Example Models"
|
240 |
+
)
|
241 |
+
|
242 |
+
gr.Markdown("""
|
243 |
+
### π Notes:
|
244 |
+
- **Weight tying detection**: Automatically handles shared parameters (e.g., input/output embeddings)
|
245 |
+
- **Layer categorization**: Groups parameters by transformer layers, embeddings, etc.
|
246 |
+
- **Detailed analysis**: Click "Detailed Analysis" for parameter-by-parameter breakdown
|
247 |
+
- **Model compatibility**: Works with most HuggingFace transformer models
|
248 |
+
""")
|
249 |
|
250 |
if __name__ == "__main__":
|
251 |
demo.launch()
|