Kaguya-19 commited on
Commit
af37836
·
1 Parent(s): 974817f
README.md CHANGED
@@ -2,6 +2,16 @@
2
  license: apache-2.0
3
  ---
4
 
 
 
 
 
 
 
 
 
 
 
5
  ## News
6
 
7
  * [2025-06-05] 🚀🚀🚀 We have open-sourced **MiniCPM4-Survey**, a model built upon MiniCPM4-8B that is capable of generating trustworthy, long-form survey papers while maintaining competitive performance relative to significantly larger models.
 
2
  license: apache-2.0
3
  ---
4
 
5
+ ## MiniCPM4 Series
6
+ - [MiniCPM4-0.5B](https://huggingface.co/openbmb/MiniCPM4-0.5B)
7
+ - [MiniCPM4-8B](https://huggingface.co/openbmb/MiniCPM4-8B)
8
+ - [MiniCPM4-8B-Eagle-FRSpec](https://huggingface.co/openbmb/MiniCPM4-8B-Eagle-FRSpec)
9
+ - [MiniCPM4-8B-Eagle-FRSpec-QAT](https://huggingface.co/openbmb/MiniCPM4-8B-Eagle-FRSpec-QAT)
10
+ - [BitCPM4-0.5B](https://huggingface.co/openbmb/BitCPM4-0.5B)
11
+ - [BitCPM4-1B](https://huggingface.co/openbmb/BitCPM4-1B)
12
+ - [MiniCPM4-Survey](https://huggingface.co/openbmb/MiniCPM4-Survey): **<-- you are here**
13
+ - [MiniCPM4-MCP](https://huggingface.co/openbmb/MiniCPM4-MCP)
14
+
15
  ## News
16
 
17
  * [2025-06-05] 🚀🚀🚀 We have open-sourced **MiniCPM4-Survey**, a model built upon MiniCPM4-8B that is capable of generating trustworthy, long-form survey papers while maintaining competitive performance relative to significantly larger models.
config.json CHANGED
@@ -1,19 +1,18 @@
1
  {
 
2
  "architectures": [
3
  "MiniCPMForCausalLM"
4
  ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
  "auto_map": {
8
  "AutoConfig": "configuration_minicpm.MiniCPMConfig",
9
- "AutoModel": "modeling_minicpm.MiniCPMForCausalLM",
10
  "AutoModelForCausalLM": "modeling_minicpm.MiniCPMForCausalLM",
11
  "AutoModelForSeq2SeqLM": "modeling_minicpm.MiniCPMForCausalLM",
12
  "AutoModelForSequenceClassification": "modeling_minicpm.MiniCPMForSequenceClassification"
13
  },
14
  "bos_token_id": 1,
15
- "dim_model_base": 256,
16
- "eos_token_id": 73440,
17
  "hidden_act": "silu",
18
  "hidden_size": 4096,
19
  "initializer_range": 0.1,
@@ -23,151 +22,19 @@
23
  "num_attention_heads": 32,
24
  "num_hidden_layers": 32,
25
  "num_key_value_heads": 2,
26
- "pad_token_id": 73440,
27
- "pretraining_tp": 1,
28
  "rms_norm_eps": 1e-06,
29
  "rope_scaling": {
30
- "long_factor": [
31
- 0.9977997200264581,
32
- 1.014658295992452,
33
- 1.0349680404997148,
34
- 1.059429246056193,
35
- 1.0888815016813513,
36
- 1.1243301355211495,
37
- 1.166977103606075,
38
- 1.2182568066927284,
39
- 1.2798772354275727,
40
- 1.3538666751582975,
41
- 1.4426259039919596,
42
- 1.5489853358570191,
43
- 1.6762658237220625,
44
- 1.8283407612492941,
45
- 2.0096956085876183,
46
- 2.225478927469756,
47
- 2.481536379650452,
48
- 2.784415934557119,
49
- 3.1413289096347365,
50
- 3.560047844772632,
51
- 4.048719380066383,
52
- 4.615569542115128,
53
- 5.2684819496549835,
54
- 6.014438591970396,
55
- 6.858830049237097,
56
- 7.804668263503327,
57
- 8.851768731513417,
58
- 9.99600492938444,
59
- 11.228766118181639,
60
- 12.536757560834843,
61
- 13.902257701387796,
62
- 15.303885189125953,
63
- 16.717837610115794,
64
- 18.119465097853947,
65
- 19.484965238406907,
66
- 20.792956681060105,
67
- 22.02571786985731,
68
- 23.16995406772833,
69
- 24.217054535738416,
70
- 25.16289275000465,
71
- 26.007284207271347,
72
- 26.753240849586767,
73
- 27.40615325712662,
74
- 27.973003419175363,
75
- 28.461674954469114,
76
- 28.880393889607006,
77
- 29.237306864684626,
78
- 29.540186419591297,
79
- 29.79624387177199,
80
- 30.01202719065413,
81
- 30.193382037992453,
82
- 30.34545697551969,
83
- 30.47273746338473,
84
- 30.579096895249787,
85
- 30.66785612408345,
86
- 30.741845563814174,
87
- 30.80346599254902,
88
- 30.85474569563567,
89
- 30.897392663720595,
90
- 30.932841297560394,
91
- 30.962293553185553,
92
- 30.986754758742034,
93
- 31.007064503249293,
94
- 31.02392307921529
95
- ],
96
- "original_max_position_embeddings": 32768,
97
  "rope_type": "longrope",
98
- "short_factor": [
99
- 0.9977997200264581,
100
- 1.014658295992452,
101
- 1.0349680404997148,
102
- 1.059429246056193,
103
- 1.0888815016813513,
104
- 1.1243301355211495,
105
- 1.166977103606075,
106
- 1.2182568066927284,
107
- 1.2798772354275727,
108
- 1.3538666751582975,
109
- 1.4426259039919596,
110
- 1.5489853358570191,
111
- 1.6762658237220625,
112
- 1.8283407612492941,
113
- 2.0096956085876183,
114
- 2.225478927469756,
115
- 2.481536379650452,
116
- 2.784415934557119,
117
- 3.1413289096347365,
118
- 3.560047844772632,
119
- 4.048719380066383,
120
- 4.615569542115128,
121
- 5.2684819496549835,
122
- 6.014438591970396,
123
- 6.858830049237097,
124
- 7.804668263503327,
125
- 8.851768731513417,
126
- 9.99600492938444,
127
- 11.228766118181639,
128
- 12.536757560834843,
129
- 13.902257701387796,
130
- 15.303885189125953,
131
- 16.717837610115794,
132
- 18.119465097853947,
133
- 19.484965238406907,
134
- 20.792956681060105,
135
- 22.02571786985731,
136
- 23.16995406772833,
137
- 24.217054535738416,
138
- 25.16289275000465,
139
- 26.007284207271347,
140
- 26.753240849586767,
141
- 27.40615325712662,
142
- 27.973003419175363,
143
- 28.461674954469114,
144
- 28.880393889607006,
145
- 29.237306864684626,
146
- 29.540186419591297,
147
- 29.79624387177199,
148
- 30.01202719065413,
149
- 30.193382037992453,
150
- 30.34545697551969,
151
- 30.47273746338473,
152
- 30.579096895249787,
153
- 30.66785612408345,
154
- 30.741845563814174,
155
- 30.80346599254902,
156
- 30.85474569563567,
157
- 30.897392663720595,
158
- 30.932841297560394,
159
- 30.962293553185553,
160
- 30.986754758742034,
161
- 31.007064503249293,
162
- 31.02392307921529
163
- ]
164
  },
 
 
 
 
165
  "rope_theta": 10000.0,
166
- "scale_depth": 1.4,
167
  "scale_emb": 12,
168
- "tie_word_embeddings": false,
169
- "torch_dtype": "bfloat16",
170
- "transformers_version": "4.51.3",
171
- "use_cache": false,
172
- "vocab_size": 73448
173
- }
 
1
  {
2
+ "_name_or_path": "openbmb/MiniCPM4-8B",
3
  "architectures": [
4
  "MiniCPMForCausalLM"
5
  ],
 
 
6
  "auto_map": {
7
  "AutoConfig": "configuration_minicpm.MiniCPMConfig",
8
+ "AutoModel": "modeling_minicpm.MiniCPMModel",
9
  "AutoModelForCausalLM": "modeling_minicpm.MiniCPMForCausalLM",
10
  "AutoModelForSeq2SeqLM": "modeling_minicpm.MiniCPMForCausalLM",
11
  "AutoModelForSequenceClassification": "modeling_minicpm.MiniCPMForSequenceClassification"
12
  },
13
  "bos_token_id": 1,
14
+ "eos_token_id": [2, 73440],
15
+ "pad_token_id": 2,
16
  "hidden_act": "silu",
17
  "hidden_size": 4096,
18
  "initializer_range": 0.1,
 
22
  "num_attention_heads": 32,
23
  "num_hidden_layers": 32,
24
  "num_key_value_heads": 2,
 
 
25
  "rms_norm_eps": 1e-06,
26
  "rope_scaling": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  "rope_type": "longrope",
28
+ "long_factor": [0.9977997200264581, 1.014658295992452, 1.0349680404997148, 1.059429246056193, 1.0888815016813513, 1.1243301355211495, 1.166977103606075, 1.2182568066927284, 1.2798772354275727, 1.3538666751582975, 1.4426259039919596, 1.5489853358570191, 1.6762658237220625, 1.8283407612492941, 2.0096956085876183, 2.225478927469756, 2.481536379650452, 2.784415934557119, 3.1413289096347365, 3.560047844772632, 4.048719380066383, 4.615569542115128, 5.2684819496549835, 6.014438591970396, 6.858830049237097, 7.804668263503327, 8.851768731513417, 9.99600492938444, 11.228766118181639, 12.536757560834843, 13.902257701387796, 15.303885189125953, 16.717837610115794, 18.119465097853947, 19.484965238406907, 20.792956681060105, 22.02571786985731, 23.16995406772833, 24.217054535738416, 25.16289275000465, 26.007284207271347, 26.753240849586767, 27.40615325712662, 27.973003419175363, 28.461674954469114, 28.880393889607006, 29.237306864684626, 29.540186419591297, 29.79624387177199, 30.01202719065413, 30.193382037992453, 30.34545697551969, 30.47273746338473, 30.579096895249787, 30.66785612408345, 30.741845563814174, 30.80346599254902, 30.85474569563567, 30.897392663720595, 30.932841297560394, 30.962293553185553, 30.986754758742034, 31.007064503249293, 31.02392307921529],
29
+ "short_factor": [0.9977997200264581, 1.014658295992452, 1.0349680404997148, 1.059429246056193, 1.0888815016813513, 1.1243301355211495, 1.166977103606075, 1.2182568066927284, 1.2798772354275727, 1.3538666751582975, 1.4426259039919596, 1.5489853358570191, 1.6762658237220625, 1.8283407612492941, 2.0096956085876183, 2.225478927469756, 2.481536379650452, 2.784415934557119, 3.1413289096347365, 3.560047844772632, 4.048719380066383, 4.615569542115128, 5.2684819496549835, 6.014438591970396, 6.858830049237097, 7.804668263503327, 8.851768731513417, 9.99600492938444, 11.228766118181639, 12.536757560834843, 13.902257701387796, 15.303885189125953, 16.717837610115794, 18.119465097853947, 19.484965238406907, 20.792956681060105, 22.02571786985731, 23.16995406772833, 24.217054535738416, 25.16289275000465, 26.007284207271347, 26.753240849586767, 27.40615325712662, 27.973003419175363, 28.461674954469114, 28.880393889607006, 29.237306864684626, 29.540186419591297, 29.79624387177199, 30.01202719065413, 30.193382037992453, 30.34545697551969, 30.47273746338473, 30.579096895249787, 30.66785612408345, 30.741845563814174, 30.80346599254902, 30.85474569563567, 30.897392663720595, 30.932841297560394, 30.962293553185553, 30.986754758742034, 31.007064503249293, 31.02392307921529],
30
+ "original_max_position_embeddings": 32768
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  },
32
+ "torch_dtype": "bfloat16",
33
+ "transformers_version": "4.46.3",
34
+ "use_cache": true,
35
+ "vocab_size": 73448,
36
  "rope_theta": 10000.0,
 
37
  "scale_emb": 12,
38
+ "scale_depth": 1.4,
39
+ "tie_word_embeddings": false
40
+ }
 
 
 
configuration_minicpm.py CHANGED
@@ -1,4 +1,3 @@
1
- # coding=utf-8
2
  # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
3
  #
4
  # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
@@ -22,7 +21,6 @@
22
  from transformers.configuration_utils import PretrainedConfig
23
  from transformers.utils import logging
24
 
25
-
26
  logger = logging.get_logger(__name__)
27
 
28
  MINICPM_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
@@ -111,8 +109,8 @@ class MiniCPMConfig(PretrainedConfig):
111
  >>> configuration = model.config
112
  ```"""
113
 
114
- model_type = "minicpm"
115
- keys_to_ignore_at_inference = ["past_key_values"]
116
 
117
  def __init__(
118
  self,
@@ -122,7 +120,7 @@ class MiniCPMConfig(PretrainedConfig):
122
  num_hidden_layers=32,
123
  num_attention_heads=32,
124
  num_key_value_heads=None,
125
- hidden_act="silu",
126
  max_position_embeddings=2048,
127
  initializer_range=0.02,
128
  rms_norm_eps=1e-6,
@@ -139,8 +137,9 @@ class MiniCPMConfig(PretrainedConfig):
139
  scale_emb=1,
140
  dim_model_base=1,
141
  scale_depth=1,
142
- **kwargs,
143
- ):
 
144
  self.vocab_size = vocab_size
145
  self.max_position_embeddings = max_position_embeddings
146
  self.hidden_size = hidden_size
@@ -167,6 +166,9 @@ class MiniCPMConfig(PretrainedConfig):
167
  self.dim_model_base = dim_model_base
168
  self.scale_depth = scale_depth
169
 
 
 
 
170
  super().__init__(
171
  pad_token_id=pad_token_id,
172
  bos_token_id=bos_token_id,
@@ -176,7 +178,7 @@ class MiniCPMConfig(PretrainedConfig):
176
  )
177
  try:
178
  import flash_attn
179
- self._attn_implementation = "flash_attention_2"
180
  except:
181
  pass
182
 
@@ -189,12 +191,12 @@ class MiniCPMConfig(PretrainedConfig):
189
 
190
  if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2:
191
  raise ValueError(
192
- "`rope_scaling` must be a dictionary with with two fields, `type` and `factor`, "
193
- f"got {self.rope_scaling}"
194
  )
195
- rope_scaling_type = self.rope_scaling.get("type", None)
196
- rope_scaling_factor = self.rope_scaling.get("factor", None)
197
- if rope_scaling_type is None or rope_scaling_type not in ["linear", "dynamic"]:
198
  raise ValueError(
199
  f"`rope_scaling`'s type field must be one of ['linear', 'dynamic'], got {rope_scaling_type}"
200
  )
 
 
1
  # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
2
  #
3
  # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
 
21
  from transformers.configuration_utils import PretrainedConfig
22
  from transformers.utils import logging
23
 
 
24
  logger = logging.get_logger(__name__)
25
 
26
  MINICPM_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
 
109
  >>> configuration = model.config
110
  ```"""
111
 
112
+ model_type = 'minicpm'
113
+ keys_to_ignore_at_inference = ['past_key_values']
114
 
115
  def __init__(
116
  self,
 
120
  num_hidden_layers=32,
121
  num_attention_heads=32,
122
  num_key_value_heads=None,
123
+ hidden_act='silu',
124
  max_position_embeddings=2048,
125
  initializer_range=0.02,
126
  rms_norm_eps=1e-6,
 
137
  scale_emb=1,
138
  dim_model_base=1,
139
  scale_depth=1,
140
+ sparse_config=None,
141
+ **kwargs):
142
+
143
  self.vocab_size = vocab_size
144
  self.max_position_embeddings = max_position_embeddings
145
  self.hidden_size = hidden_size
 
166
  self.dim_model_base = dim_model_base
167
  self.scale_depth = scale_depth
168
 
169
+ # sparse config
170
+ self.sparse_config = sparse_config
171
+
172
  super().__init__(
173
  pad_token_id=pad_token_id,
174
  bos_token_id=bos_token_id,
 
178
  )
179
  try:
180
  import flash_attn
181
+ self._attn_implementation = 'flash_attention_2'
182
  except:
183
  pass
184
 
 
191
 
192
  if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2:
193
  raise ValueError(
194
+ '`rope_scaling` must be a dictionary with with two fields, `type` and `factor`, '
195
+ f'got {self.rope_scaling}'
196
  )
197
+ rope_scaling_type = self.rope_scaling.get('type', None)
198
+ rope_scaling_factor = self.rope_scaling.get('factor', None)
199
+ if rope_scaling_type is None or rope_scaling_type not in ['linear', 'dynamic']:
200
  raise ValueError(
201
  f"`rope_scaling`'s type field must be one of ['linear', 'dynamic'], got {rope_scaling_type}"
202
  )
generation_config.json CHANGED
@@ -1,8 +1,12 @@
1
  {
2
- "_from_model_config": true,
3
  "bos_token_id": 1,
4
- "eos_token_id": 73440,
5
- "pad_token_id": 73440,
6
- "transformers_version": "4.51.3",
7
- "use_cache": false
 
 
 
 
 
8
  }
 
1
  {
 
2
  "bos_token_id": 1,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 2,
6
+ 73440
7
+ ],
8
+ "pad_token_id": 2,
9
+ "temperature": 0.8,
10
+ "top_p": 0.8,
11
+ "transformers_version": "4.46.1"
12
  }
modeling_minicpm.py ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -23,13 +23,6 @@
23
  "rstrip": false,
24
  "single_word": false
25
  },
26
- "pad_token": {
27
- "content": "<|im_end|>",
28
- "lstrip": false,
29
- "normalized": false,
30
- "rstrip": false,
31
- "single_word": false
32
- },
33
  "unk_token": {
34
  "content": "<unk>",
35
  "lstrip": false,
 
23
  "rstrip": false,
24
  "single_word": false
25
  },
 
 
 
 
 
 
 
26
  "unk_token": {
27
  "content": "<unk>",
28
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -103,17 +103,14 @@
103
  "<|fim_suffix|>"
104
  ],
105
  "bos_token": "<s>",
106
- "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' }}{% endif %}{% endfor %}",
107
  "clean_up_tokenization_spaces": false,
108
  "eos_token": "<|im_end|>",
109
- "extra_special_tokens": {},
110
  "legacy": true,
111
  "model_max_length": 1000000000000000019884624838656,
112
- "pad_token": "<|im_end|>",
113
- "padding_side": "right",
114
  "sp_model_kwargs": {},
115
  "spaces_between_special_tokens": false,
116
- "split_special_tokens": false,
117
  "tokenizer_class": "LlamaTokenizer",
118
  "unk_token": "<unk>",
119
  "use_default_system_prompt": false
 
103
  "<|fim_suffix|>"
104
  ],
105
  "bos_token": "<s>",
106
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
107
  "clean_up_tokenization_spaces": false,
108
  "eos_token": "<|im_end|>",
 
109
  "legacy": true,
110
  "model_max_length": 1000000000000000019884624838656,
111
+ "pad_token": null,
 
112
  "sp_model_kwargs": {},
113
  "spaces_between_special_tokens": false,
 
114
  "tokenizer_class": "LlamaTokenizer",
115
  "unk_token": "<unk>",
116
  "use_default_system_prompt": false