QscQ commited on
Commit
c63f001
·
verified ·
1 Parent(s): 55a7282

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +8 -12
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "architectures": [
3
- "MiniMaxText01ForCausalLM"
4
  ],
5
  "attention_dropout": 0.0,
6
  "layer_types": [
@@ -85,10 +85,6 @@
85
  "linear_attention",
86
  "full_attention"
87
  ],
88
- "auto_map": {
89
- "AutoConfig": "configuration_minimax_text_01.MiniMaxText01Config",
90
- "AutoModelForCausalLM": "modeling_minimax_text_01.MiniMaxText01ForCausalLM"
91
- },
92
  "bos_token_id": null,
93
  "eos_token_id": null,
94
  "head_dim": 128,
@@ -96,14 +92,14 @@
96
  "hidden_size": 6144,
97
  "initializer_range": 0.02,
98
  "intermediate_size": 9216,
99
- "layernorm_full_attention_alpha": 3.5565588200778455,
100
- "layernorm_full_attention_beta": 1.0,
101
- "layernorm_linear_attention_alpha": 3.5565588200778455,
102
- "layernorm_linear_attention_beta": 1.0,
103
- "layernorm_mlp_alpha": 3.5565588200778455,
104
- "layernorm_mlp_beta": 1.0,
105
  "max_position_embeddings": 10240000,
106
- "model_type": "minimax_text_01",
107
  "num_attention_heads": 64,
108
  "num_experts_per_tok": 2,
109
  "num_hidden_layers": 80,
 
1
  {
2
  "architectures": [
3
+ "MiniMaxForCausalLM"
4
  ],
5
  "attention_dropout": 0.0,
6
  "layer_types": [
 
85
  "linear_attention",
86
  "full_attention"
87
  ],
 
 
 
 
88
  "bos_token_id": null,
89
  "eos_token_id": null,
90
  "head_dim": 128,
 
92
  "hidden_size": 6144,
93
  "initializer_range": 0.02,
94
  "intermediate_size": 9216,
95
+ "full_attn_alpha_factor": 3.5565588200778455,
96
+ "full_attn_beta_factor": 1.0,
97
+ "linear_attn_alpha_factor": 3.5565588200778455,
98
+ "linear_attn_beta_factor": 1.0,
99
+ "mlp_alpha_factor": 3.5565588200778455,
100
+ "mlp_beta_factor": 1.0,
101
  "max_position_embeddings": 10240000,
102
+ "model_type": "minimax",
103
  "num_attention_heads": 64,
104
  "num_experts_per_tok": 2,
105
  "num_hidden_layers": 80,