Transformers
undfined commited on
Commit
f498593
·
verified ·
1 Parent(s): b303991

Fix decoder

Browse files
added_tokens.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
- "<|endofprompt|>": 180021,
3
  "<|endoftext|>": 180000,
4
- "<|extra_id_0|>": 180002,
5
  "<|extra_id_10|>": 180020,
6
  "<|extra_id_1|>": 180011,
7
  "<|extra_id_2|>": 180012,
@@ -12,13 +11,13 @@
12
  "<|extra_id_7|>": 180017,
13
  "<|extra_id_8|>": 180018,
14
  "<|extra_id_9|>": 180019,
15
- "<|fim_middle|>": 180004,
16
- "<|fim_prefix|>": 180003,
17
- "<|fim_suffix|>": 180005,
18
- "<|im_end|>": 180010,
19
- "<|im_start|>": 180009,
20
  "<|pad|>": 180001,
21
- "|||EMAIL_ADDRESS|||": 180007,
22
- "|||IP_ADDRESS|||": 180008,
23
- "|||PHONE_NUMBER|||": 180006
24
  }
 
1
  {
 
2
  "<|endoftext|>": 180000,
3
+ "<|extra_id_0|>": 180010,
4
  "<|extra_id_10|>": 180020,
5
  "<|extra_id_1|>": 180011,
6
  "<|extra_id_2|>": 180012,
 
11
  "<|extra_id_7|>": 180017,
12
  "<|extra_id_8|>": 180018,
13
  "<|extra_id_9|>": 180019,
14
+ "<|fim_middle|>": 180003,
15
+ "<|fim_prefix|>": 180002,
16
+ "<|fim_suffix|>": 180004,
17
+ "<|im_end|>": 180009,
18
+ "<|im_start|>": 180008,
19
  "<|pad|>": 180001,
20
+ "|||EMAIL_ADDRESS|||": 180006,
21
+ "|||IP_ADDRESS|||": 180007,
22
+ "|||PHONE_NUMBER|||": 180005
23
  }
special_tokens_map.json CHANGED
@@ -1,57 +1,51 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<|endoftext|>",
4
  "lstrip": false,
5
  "normalized": false,
6
- "single_word": false,
7
- "special": true
8
  },
9
  "eos_token": {
10
  "content": "<|endoftext|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
- "single_word": false,
15
- "special": true
16
  },
17
  "pad_token": {
18
  "content": "<|pad|>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
22
- "single_word": false,
23
- "special": true
24
  },
25
  "unk_token": {
26
  "content": "<|endoftext|>",
27
  "lstrip": false,
28
  "normalized": false,
29
  "rstrip": false,
30
- "single_word": false,
31
- "special": true
32
- },
33
- "additional_special_tokens": [
34
- "<|extra_id_0|>",
35
- "<|endoftext|>",
36
- "<|fim_prefix|>",
37
- "<|fim_middle|>",
38
- "<|fim_suffix|>",
39
- "|||PHONE_NUMBER|||",
40
- "|||EMAIL_ADDRESS|||",
41
- "|||IP_ADDRESS|||",
42
- "<|im_start|>",
43
- "<|im_end|>",
44
- "<|extra_id_1|>",
45
- "<|extra_id_2|>",
46
- "<|extra_id_3|>",
47
- "<|extra_id_4|>",
48
- "<|extra_id_5|>",
49
- "<|extra_id_6|>",
50
- "<|extra_id_7|>",
51
- "<|extra_id_8|>",
52
- "<|extra_id_9|>",
53
- "<|extra_id_10|>",
54
- "<|endofprompt|>",
55
- "<|pad|>"
56
- ]
57
  }
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|fim_prefix|>",
4
+ "<|fim_middle|>",
5
+ "<|fim_suffix|>",
6
+ "|||PHONE_NUMBER|||",
7
+ "|||EMAIL_ADDRESS|||",
8
+ "|||IP_ADDRESS|||",
9
+ "<|im_start|>",
10
+ "<|im_end|>",
11
+ "<|extra_id_0|>",
12
+ "<|extra_id_1|>",
13
+ "<|extra_id_2|>",
14
+ "<|extra_id_3|>",
15
+ "<|extra_id_4|>",
16
+ "<|extra_id_5|>",
17
+ "<|extra_id_6|>",
18
+ "<|extra_id_7|>",
19
+ "<|extra_id_8|>",
20
+ "<|extra_id_9|>",
21
+ "<|extra_id_10|>"
22
+ ],
23
  "bos_token": {
24
  "content": "<|endoftext|>",
25
  "lstrip": false,
26
  "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
  },
30
  "eos_token": {
31
  "content": "<|endoftext|>",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
35
+ "single_word": false
 
36
  },
37
  "pad_token": {
38
  "content": "<|pad|>",
39
  "lstrip": false,
40
  "normalized": false,
41
  "rstrip": false,
42
+ "single_word": false
 
43
  },
44
  "unk_token": {
45
  "content": "<|endoftext|>",
46
  "lstrip": false,
47
  "normalized": false,
48
  "rstrip": false,
49
+ "single_word": false
50
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be7041eb9d5f95ffaf1ce82ffa97c9f8bd85b0883127b182a6ca16a595a2738b
3
- size 13487219
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19f403d793a7ba69e4f49a40e7d592a5d4d1ac4f4e7224f18bc608ef268db68b
3
+ size 13491334
tokenizer_config.json CHANGED
@@ -18,7 +18,7 @@
18
  "special": true
19
  },
20
  "180002": {
21
- "content": "<|extra_id_0|>",
22
  "lstrip": false,
23
  "normalized": true,
24
  "rstrip": false,
@@ -26,7 +26,7 @@
26
  "special": false
27
  },
28
  "180003": {
29
- "content": "<|fim_prefix|>",
30
  "lstrip": false,
31
  "normalized": true,
32
  "rstrip": false,
@@ -34,7 +34,7 @@
34
  "special": false
35
  },
36
  "180004": {
37
- "content": "<|fim_middle|>",
38
  "lstrip": false,
39
  "normalized": true,
40
  "rstrip": false,
@@ -42,7 +42,7 @@
42
  "special": false
43
  },
44
  "180005": {
45
- "content": "<|fim_suffix|>",
46
  "lstrip": false,
47
  "normalized": true,
48
  "rstrip": false,
@@ -50,7 +50,7 @@
50
  "special": false
51
  },
52
  "180006": {
53
- "content": "|||PHONE_NUMBER|||",
54
  "lstrip": false,
55
  "normalized": true,
56
  "rstrip": false,
@@ -58,7 +58,7 @@
58
  "special": false
59
  },
60
  "180007": {
61
- "content": "|||EMAIL_ADDRESS|||",
62
  "lstrip": false,
63
  "normalized": true,
64
  "rstrip": false,
@@ -66,7 +66,7 @@
66
  "special": false
67
  },
68
  "180008": {
69
- "content": "|||IP_ADDRESS|||",
70
  "lstrip": false,
71
  "normalized": true,
72
  "rstrip": false,
@@ -74,7 +74,7 @@
74
  "special": false
75
  },
76
  "180009": {
77
- "content": "<|im_start|>",
78
  "lstrip": false,
79
  "normalized": true,
80
  "rstrip": false,
@@ -82,7 +82,7 @@
82
  "special": false
83
  },
84
  "180010": {
85
- "content": "<|im_end|>",
86
  "lstrip": false,
87
  "normalized": true,
88
  "rstrip": false,
@@ -168,19 +168,9 @@
168
  "rstrip": false,
169
  "single_word": false,
170
  "special": false
171
- },
172
- "180021": {
173
- "content": "<|endofprompt|>",
174
- "lstrip": false,
175
- "normalized": true,
176
- "rstrip": false,
177
- "single_word": false,
178
- "special": false
179
  }
180
  },
181
  "additional_special_tokens": [
182
- "<|extra_id_0|>",
183
- "<|endoftext|>",
184
  "<|fim_prefix|>",
185
  "<|fim_middle|>",
186
  "<|fim_suffix|>",
@@ -189,6 +179,7 @@
189
  "|||IP_ADDRESS|||",
190
  "<|im_start|>",
191
  "<|im_end|>",
 
192
  "<|extra_id_1|>",
193
  "<|extra_id_2|>",
194
  "<|extra_id_3|>",
@@ -198,9 +189,7 @@
198
  "<|extra_id_7|>",
199
  "<|extra_id_8|>",
200
  "<|extra_id_9|>",
201
- "<|extra_id_10|>",
202
- "<|endofprompt|>",
203
- "<|pad|>"
204
  ],
205
  "bos_token": "<|endoftext|>",
206
  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
 
18
  "special": true
19
  },
20
  "180002": {
21
+ "content": "<|fim_prefix|>",
22
  "lstrip": false,
23
  "normalized": true,
24
  "rstrip": false,
 
26
  "special": false
27
  },
28
  "180003": {
29
+ "content": "<|fim_middle|>",
30
  "lstrip": false,
31
  "normalized": true,
32
  "rstrip": false,
 
34
  "special": false
35
  },
36
  "180004": {
37
+ "content": "<|fim_suffix|>",
38
  "lstrip": false,
39
  "normalized": true,
40
  "rstrip": false,
 
42
  "special": false
43
  },
44
  "180005": {
45
+ "content": "|||PHONE_NUMBER|||",
46
  "lstrip": false,
47
  "normalized": true,
48
  "rstrip": false,
 
50
  "special": false
51
  },
52
  "180006": {
53
+ "content": "|||EMAIL_ADDRESS|||",
54
  "lstrip": false,
55
  "normalized": true,
56
  "rstrip": false,
 
58
  "special": false
59
  },
60
  "180007": {
61
+ "content": "|||IP_ADDRESS|||",
62
  "lstrip": false,
63
  "normalized": true,
64
  "rstrip": false,
 
66
  "special": false
67
  },
68
  "180008": {
69
+ "content": "<|im_start|>",
70
  "lstrip": false,
71
  "normalized": true,
72
  "rstrip": false,
 
74
  "special": false
75
  },
76
  "180009": {
77
+ "content": "<|im_end|>",
78
  "lstrip": false,
79
  "normalized": true,
80
  "rstrip": false,
 
82
  "special": false
83
  },
84
  "180010": {
85
+ "content": "<|extra_id_0|>",
86
  "lstrip": false,
87
  "normalized": true,
88
  "rstrip": false,
 
168
  "rstrip": false,
169
  "single_word": false,
170
  "special": false
 
 
 
 
 
 
 
 
171
  }
172
  },
173
  "additional_special_tokens": [
 
 
174
  "<|fim_prefix|>",
175
  "<|fim_middle|>",
176
  "<|fim_suffix|>",
 
179
  "|||IP_ADDRESS|||",
180
  "<|im_start|>",
181
  "<|im_end|>",
182
+ "<|extra_id_0|>",
183
  "<|extra_id_1|>",
184
  "<|extra_id_2|>",
185
  "<|extra_id_3|>",
 
189
  "<|extra_id_7|>",
190
  "<|extra_id_8|>",
191
  "<|extra_id_9|>",
192
+ "<|extra_id_10|>"
 
 
193
  ],
194
  "bos_token": "<|endoftext|>",
195
  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",