File size: 9,927 Bytes
8abcf2d ef8c30b 834b1c6 8abcf2d ef8c30b d65669a bf52501 d65669a 5d70faf bf52501 5d70faf 3951475 bf52501 3951475 dcb01bb bf52501 dcb01bb d65669a dcb01bb ef8c30b 4483569 dcb01bb 2beb7b1 dcb01bb 2beb7b1 dcb01bb 2beb7b1 d65669a 2beb7b1 d65669a dcb01bb 2beb7b1 8abcf2d 4483569 2beb7b1 ee7c71e dcb01bb 8abcf2d 3951475 8abcf2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 |
import gradio as gr
from transformers import AutoConfig # Required for Hugging Face integration
from calc_params import calc_params # Import calc_params from the new file
# ---- Helper Functions ---- #
def get_hf_model_args(hf_model_name_or_path):
try:
config = AutoConfig.from_pretrained(hf_model_name_or_path, trust_remote_code=True).to_dict()
except Exception as e:
raise gr.Error(f"Error fetching Hugging Face model: {str(e)}")
# Extract relevant values from the config
num_layers = config.get("num_hidden_layers", None)
hidden_size = config.get("hidden_size", None)
num_attention_heads = config.get("num_attention_heads", None)
vocab_size = config.get("vocab_size", None)
sequence_length = config.get("max_position_embeddings", None)
return {
"num_layers": num_layers,
"hidden_size": hidden_size,
"num_attention_heads": num_attention_heads,
"vocab_size": vocab_size,
"sequence_length": sequence_length,
}
# ---- Update Gradio inputs with Hugging Face model config ---- #
def update_from_hf_model(hf_model_name_or_path):
model_params = get_hf_model_args(hf_model_name_or_path)
return (gr.update(value=model_params["num_layers"]),
gr.update(value=model_params["hidden_size"]),
gr.update(value=model_params["num_attention_heads"]),
gr.update(value=model_params["vocab_size"]),
gr.update(value=model_params["sequence_length"]),
"")
# ---- Memory Calculation ---- #
def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib):
model_params = get_hf_model_args(hf_model_name_or_path) if hf_model_name_or_path else None
if model_params:
num_layers = model_params["num_layers"] or num_layers
hidden_size = model_params["hidden_size"] or hidden_size
num_attention_heads = model_params["num_attention_heads"] or num_attention_heads
vocab_size = model_params["vocab_size"] or vocab_size
sequence_length = model_params["sequence_length"] or sequence_length
dp_degree = num_gpus / (tensor_parallel_size * pipeline_parallel_size)
embed_params = 2 * vocab_size * hidden_size
positional_params = hidden_size * sequence_length
ln_params = 8 * hidden_size * num_layers + (2 * hidden_size)
attention_params = int(2 * (1 + ffn_expansion_factor) * num_layers * hidden_size * hidden_size)
mlp_params = ffn_expansion_factor * num_layers * hidden_size * hidden_size
total_params = embed_params + positional_params + ln_params + attention_params + mlp_params
bytes_per_param = 2 if is_mixed_precision else 4
model_mem = total_params * bytes_per_param
per_gpu_mem_gib = (model_mem / (tensor_parallel_size * pipeline_parallel_size)) / 1024**3 + misc_mem_gib
return f"Per-GPU Memory Required for Training: {per_gpu_mem_gib:.2f} GiB"
# ---- Gradio Interface ---- #
with gr.Blocks() as demo:
with gr.Tabs():
# Memory Calculation Tab
with gr.TabItem("Memory Calculation"):
hf_model_name_or_path = gr.Textbox(
label="HuggingFace Model Name or Path",
info="Name of the HuggingFace Hub repository or the local file path for it"
)
num_gpus = gr.Number(
label="Number of GPUs",
value=1,
info="Number of GPUs used for training"
)
tensor_parallel_size = gr.Number(
label="Tensor Parallel Size",
value=1,
info="Tensor parallel degree (1 if not used)"
)
pipeline_parallel_size = gr.Number(
label="Pipeline Parallel Size",
value=1,
info="Pipeline parallel degree (1 if not used)"
)
batch_size_per_gpu = gr.Number(
label="Batch Size per GPU",
value=8,
info="Batch size per GPU"
)
sequence_length = gr.Number(
label="Sequence Length",
value=2048,
info="Sequence length used for training"
)
vocab_size = gr.Number(
label="Vocab Size",
value=51200,
info="How many tokens are in the embedding layer"
)
hidden_size = gr.Number(
label="Hidden Size",
value=6144,
info="Dimension of the model's hidden size"
)
num_attention_heads = gr.Number(
label="Number of Attention Heads",
value=64,
info="Number of attention heads used in the model"
)
num_layers = gr.Number(
label="Number of Layers",
value=44,
info="Number of transformer layers used in the model"
)
ffn_expansion_factor = gr.Number(
label="FFN Expansion Factor",
value=4,
info="How much the MLP hidden size expands"
)
is_mixed_precision = gr.Checkbox(
label="Mixed Precision",
value=True,
info="Whether mixed precision is enabled"
)
misc_mem_gib = gr.Number(
label="Miscellaneous Memory Overhead (GiB)",
value=5,
info="Miscellaneous memory overhead per GPU by DL frameworks, communication libraries, etc."
)
memory_result = gr.Textbox(label="Memory Calculation Result", interactive=False)
calc_memory_button = gr.Button("Calculate Memory")
calc_memory_button.click(
calc_mem,
inputs=[
hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib
],
outputs=memory_result
)
hf_model_name_or_path.change(
fn=update_from_hf_model,
inputs=[hf_model_name_or_path],
outputs=[num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length, memory_result]
)
# Parameter Calculation Tab
with gr.TabItem("Parameter Calculation"):
hf_model_name_or_path = gr.Textbox(
label="HuggingFace Model Name or Path",
info="Name of the HuggingFace Hub repository or the local file path for it"
)
vocab_size = gr.Number(
label="Vocab Size",
value=51200,
info="How many tokens are in the embedding layer"
)
tied_embeddings = gr.Checkbox(
label="Tied Embeddings",
value=False,
info="Whether embeddings are tied (shared between input and output)"
)
hidden_size = gr.Number(
label="Hidden Size",
value=6144,
info="Dimension of the model's hidden size"
)
sequence_length = gr.Number(
label="Sequence Length",
value=2048,
info="Sequence length used for training"
)
num_layers = gr.Number(
label="Number of Layers",
value=44,
info="Number of transformer layers used in the model"
)
ffn_expansion_factor = gr.Number(
label="FFN Expansion Factor",
value=4,
info="How much the MLP hidden size expands"
)
num_mlp_linears = gr.Number(
label="Number of Linear Layers per MLP Block",
value=2,
info="How many linear layers per MLP block"
)
kv_size_ratio = gr.Number(
label="KV Size Ratio",
value=1.0,
info="Ratio of total query heads to key/value heads. 1.0 for MHA, 1/num_attention_heads for MQA"
)
with gr.Accordion("MoE Parameters", open=False):
moe = gr.Checkbox(
label="MoE",
value=False,
info="Whether the model is MoE"
)
num_experts = gr.Number(
label="Number of Experts",
value=8,
info="Number of experts for MoE"
)
expert_interval = gr.Number(
label="Expert Interval",
value=1,
info="Expert interval for MoE"
)
topk = gr.Number(
label="Top k Routing",
value=1,
info="Top k routing for MoE"
)
param_result = gr.Textbox(label="Parameter Calculation Result", interactive=False)
calc_param_button = gr.Button("Calculate Parameters")
calc_param_button.click(calc_params,
inputs=[vocab_size, tied_embeddings, hidden_size, sequence_length, num_layers, moe, num_experts, expert_interval, topk, ffn_expansion_factor, num_mlp_linears, kv_size_ratio],
outputs=param_result)
hf_model_name_or_path.change(fn=update_from_hf_model,
inputs=[hf_model_name_or_path],
outputs=[num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length])
demo.launch()
|