reaperdoesntknow commited on
Commit
7d2b348
·
verified ·
1 Parent(s): 3398b2e

Upload MoAMetricLM

Browse files
Files changed (2) hide show
  1. config.json +4 -4
  2. pytorch_model.bin +2 -2
config.json CHANGED
@@ -18,7 +18,7 @@
18
  "eos_token_id": 151643,
19
  "ff_mult": 4,
20
  "ffn_hidden": 2048,
21
- "head_feature_heads": 16,
22
  "layer_scale_init_value": 0.0001,
23
  "learn_alpha": true,
24
  "learn_radius": true,
@@ -37,16 +37,16 @@
37
  "origin_init_scale": 0.0,
38
  "pad_token_id": 151643,
39
  "proj_drop": 0.1,
40
- "r_basis": 16,
41
  "radius_init": 3.0,
42
  "router_bias_heads": 4,
43
  "router_dropout": 0.1,
44
  "router_hidden": 2048,
45
  "router_init_temperature": 2.0,
46
- "router_temperature": 1.0,
47
  "router_topk": 3,
48
  "shared_kv_ratio": 0.55,
49
- "theta_base": 10000.0,
50
  "ti_reg_samples": 0,
51
  "ti_reg_weight": 0.0,
52
  "transformers_version": "4.56.1",
 
18
  "eos_token_id": 151643,
19
  "ff_mult": 4,
20
  "ffn_hidden": 2048,
21
+ "head_feature_heads": 32,
22
  "layer_scale_init_value": 0.0001,
23
  "learn_alpha": true,
24
  "learn_radius": true,
 
37
  "origin_init_scale": 0.0,
38
  "pad_token_id": 151643,
39
  "proj_drop": 0.1,
40
+ "r_basis": 4,
41
  "radius_init": 3.0,
42
  "router_bias_heads": 4,
43
  "router_dropout": 0.1,
44
  "router_hidden": 2048,
45
  "router_init_temperature": 2.0,
46
+ "router_temperature": 2.0,
47
  "router_topk": 3,
48
  "shared_kv_ratio": 0.55,
49
+ "theta_base": 100000.0,
50
  "ti_reg_samples": 0,
51
  "ti_reg_weight": 0.0,
52
  "transformers_version": "4.56.1",
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07c407460eb610ed97b9a73145bfed1f5110c72cf9edaf602dced9d281894a8b
3
- size 1711254331
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4eab1a92049e5fe16817ed4c13ff98ec452f76e5fbc5af6be3df94f9a812184
3
+ size 1712299451