File size: 6,845 Bytes
8abcf2d ef8c30b 834b1c6 8abcf2d ef8c30b d65669a bf52501 d65669a 5d70faf bf52501 5d70faf 3951475 bf52501 3951475 dcb01bb bf52501 dcb01bb d65669a dcb01bb ef8c30b 4483569 dcb01bb 5d70faf dcb01bb d65669a dcb01bb 3951475 4483569 8abcf2d 4483569 ee7c71e dcb01bb 8abcf2d 3951475 8abcf2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import gradio as gr
from transformers import AutoConfig # Required for Hugging Face integration
from calc_params import calc_params # Import calc_params from the new file
# ---- Helper Functions ---- #
def get_hf_model_args(hf_model_name_or_path):
try:
config = AutoConfig.from_pretrained(hf_model_name_or_path, trust_remote_code=True).to_dict()
except Exception as e:
raise gr.Error(f"Error fetching Hugging Face model: {str(e)}")
# Extract relevant values from the config
num_layers = config.get("num_hidden_layers", None)
hidden_size = config.get("hidden_size", None)
num_attention_heads = config.get("num_attention_heads", None)
vocab_size = config.get("vocab_size", None)
sequence_length = config.get("max_position_embeddings", None)
return {
"num_layers": num_layers,
"hidden_size": hidden_size,
"num_attention_heads": num_attention_heads,
"vocab_size": vocab_size,
"sequence_length": sequence_length,
}
# ---- Update Gradio inputs with Hugging Face model config ---- #
def update_from_hf_model(hf_model_name_or_path):
model_params = get_hf_model_args(hf_model_name_or_path)
return (gr.update(value=model_params["num_layers"]),
gr.update(value=model_params["hidden_size"]),
gr.update(value=model_params["num_attention_heads"]),
gr.update(value=model_params["vocab_size"]),
gr.update(value=model_params["sequence_length"]),
"")
# ---- Memory Calculation ---- #
def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib):
model_params = get_hf_model_args(hf_model_name_or_path) if hf_model_name_or_path else None
if model_params:
num_layers = model_params["num_layers"] or num_layers
hidden_size = model_params["hidden_size"] or hidden_size
num_attention_heads = model_params["num_attention_heads"] or num_attention_heads
vocab_size = model_params["vocab_size"] or vocab_size
sequence_length = model_params["sequence_length"] or sequence_length
dp_degree = num_gpus / (tensor_parallel_size * pipeline_parallel_size)
embed_params = 2 * vocab_size * hidden_size
positional_params = hidden_size * sequence_length
ln_params = 8 * hidden_size * num_layers + (2 * hidden_size)
attention_params = int(2 * (1 + ffn_expansion_factor) * num_layers * hidden_size * hidden_size)
mlp_params = ffn_expansion_factor * num_layers * hidden_size * hidden_size
total_params = embed_params + positional_params + ln_params + attention_params + mlp_params
bytes_per_param = 2 if is_mixed_precision else 4
model_mem = total_params * bytes_per_param
per_gpu_mem_gib = (model_mem / (tensor_parallel_size * pipeline_parallel_size)) / 1024**3 + misc_mem_gib
return f"Per-GPU Memory Required for Training: {per_gpu_mem_gib:.2f} GiB"
# ---- Gradio Interface ---- #
with gr.Blocks() as demo:
with gr.Tabs():
# Memory Calculation Tab
with gr.TabItem("Memory Calculation"):
hf_model_name_or_path = gr.Textbox(label="HuggingFace Model Name or Path (optional)", value="")
num_gpus = gr.Number(label="Number of GPUs", value=1)
tensor_parallel_size = gr.Number(label="Tensor Parallel Size", value=1)
pipeline_parallel_size = gr.Number(label="Pipeline Parallel Size", value=1)
batch_size_per_gpu = gr.Number(label="Batch Size per GPU", value=8)
sequence_length = gr.Number(label="Sequence Length", value=2048)
vocab_size = gr.Number(label="Vocab Size", value=51200)
hidden_size = gr.Number(label="Hidden Size", value=6144)
num_attention_heads = gr.Number(label="Number of Attention Heads", value=64)
num_layers = gr.Number(label="Number of Layers", value=44)
ffn_expansion_factor = gr.Number(label="FFN Expansion Factor", value=4)
is_mixed_precision = gr.Checkbox(label="Mixed Precision", value=True)
misc_mem_gib = gr.Number(label="Misc Memory Overhead (GiB)", value=5)
memory_result = gr.Textbox(label="Memory Calculation Result", interactive=False)
calc_memory_button = gr.Button("Calculate Memory")
calc_memory_button.click(calc_mem,
inputs=[hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib],
outputs=memory_result)
hf_model_name_or_path.change(fn=update_from_hf_model,
inputs=[hf_model_name_or_path],
outputs=[num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length, memory_result])
# Parameter Calculation Tab
with gr.TabItem("Parameter Calculation"):
hf_model_name_or_path = gr.Textbox(label="HuggingFace Model Name or Path (optional)", value="")
vocab_size = gr.Number(label="Vocab Size", value=51200)
tied_embeddings = gr.Checkbox(label="Tied Embeddings", value=False)
hidden_size = gr.Number(label="Hidden Size", value=6144)
sequence_length = gr.Number(label="Sequence Length", value=2048)
num_layers = gr.Number(label="Number of Layers", value=44)
ffn_expansion_factor = gr.Number(label="FFN Expansion Factor", value=4)
num_mlp_linears = gr.Number(label="Number of Linear Layers per MLP Block", value=2)
kv_size_ratio = gr.Number(label="KV Size Ratio", value=1.0)
with gr.Accordion("MoE Parameters", open=False):
moe = gr.Checkbox(label="MoE", value=False)
num_experts = gr.Number(label="Number of Experts", value=8)
expert_interval = gr.Number(label="Expert Interval", value=1)
topk = gr.Number(label="Top k Routing", value=1)
param_result = gr.Textbox(label="Parameter Calculation Result", interactive=False)
calc_param_button = gr.Button("Calculate Parameters")
calc_param_button.click(calc_params,
inputs=[vocab_size, tied_embeddings, hidden_size, sequence_length, num_layers, moe, num_experts, expert_interval, topk, ffn_expansion_factor, num_mlp_linears, kv_size_ratio],
outputs=param_result)
hf_model_name_or_path.change(fn=update_from_hf_model,
inputs=[hf_model_name_or_path],
outputs=[num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length])
demo.launch()
|