mtasic85 commited on
Commit
da80ae1
·
1 Parent(s): 1386dd6

grokadamw.GrokAdamW

Browse files
Files changed (1) hide show
  1. scripts/pretrain-core-model.yaml +1 -1
scripts/pretrain-core-model.yaml CHANGED
@@ -18,7 +18,7 @@ model_config:
18
  bias: False
19
  norm_class_name: "RMSNorm"
20
  mlp_class_name: "LLaMAMLP"
21
- intermediate_size: 1792
22
  norm_eps: 1e-5
23
  rope_base: 500000
24
  rope_adjustments:
 
18
  bias: False
19
  norm_class_name: "RMSNorm"
20
  mlp_class_name: "LLaMAMLP"
21
+ intermediate_size: 2688 # n_embd * 5.25
22
  norm_eps: 1e-5
23
  rope_base: 500000
24
  rope_adjustments: