Transformers
undfined commited on
Commit
41ca44d
·
verified ·
1 Parent(s): cb21551

Added tokens again

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +27 -30
  2. tokenizer.json +2 -2
special_tokens_map.json CHANGED
@@ -1,54 +1,51 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<|endoftext|>",
4
  "lstrip": false,
5
  "normalized": false,
6
- "single_word": false,
7
- "special": true
8
  },
9
  "eos_token": {
10
  "content": "<|endoftext|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
- "single_word": false,
15
- "special": true
16
  },
17
  "pad_token": {
18
  "content": "<|pad|>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
22
- "single_word": false,
23
- "special": true
24
  },
25
  "unk_token": {
26
  "content": "<|endoftext|>",
27
  "lstrip": false,
28
  "normalized": false,
29
  "rstrip": false,
30
- "single_word": false,
31
- "special": true
32
- },
33
- "additional_special_tokens": [
34
- "<|fim_prefix|>",
35
- "<|fim_middle|>",
36
- "<|fim_suffix|>",
37
- "|||PHONE_NUMBER|||",
38
- "|||EMAIL_ADDRESS|||",
39
- "|||IP_ADDRESS|||",
40
- "<|im_start|>",
41
- "<|im_end|>",
42
- "<|extra_id_0|>",
43
- "<|extra_id_1|>",
44
- "<|extra_id_2|>",
45
- "<|extra_id_3|>",
46
- "<|extra_id_4|>",
47
- "<|extra_id_5|>",
48
- "<|extra_id_6|>",
49
- "<|extra_id_7|>",
50
- "<|extra_id_8|>",
51
- "<|extra_id_9|>",
52
- "<|extra_id_10|>"
53
- ]
54
  }
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|fim_prefix|>",
4
+ "<|fim_middle|>",
5
+ "<|fim_suffix|>",
6
+ "|||PHONE_NUMBER|||",
7
+ "|||EMAIL_ADDRESS|||",
8
+ "|||IP_ADDRESS|||",
9
+ "<|im_start|>",
10
+ "<|im_end|>",
11
+ "<|extra_id_0|>",
12
+ "<|extra_id_1|>",
13
+ "<|extra_id_2|>",
14
+ "<|extra_id_3|>",
15
+ "<|extra_id_4|>",
16
+ "<|extra_id_5|>",
17
+ "<|extra_id_6|>",
18
+ "<|extra_id_7|>",
19
+ "<|extra_id_8|>",
20
+ "<|extra_id_9|>",
21
+ "<|extra_id_10|>"
22
+ ],
23
  "bos_token": {
24
  "content": "<|endoftext|>",
25
  "lstrip": false,
26
  "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
  },
30
  "eos_token": {
31
  "content": "<|endoftext|>",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
35
+ "single_word": false
 
36
  },
37
  "pad_token": {
38
  "content": "<|pad|>",
39
  "lstrip": false,
40
  "normalized": false,
41
  "rstrip": false,
42
+ "single_word": false
 
43
  },
44
  "unk_token": {
45
  "content": "<|endoftext|>",
46
  "lstrip": false,
47
  "normalized": false,
48
  "rstrip": false,
49
+ "single_word": false
50
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be7041eb9d5f95ffaf1ce82ffa97c9f8bd85b0883127b182a6ca16a595a2738b
3
- size 13487219
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19f403d793a7ba69e4f49a40e7d592a5d4d1ac4f4e7224f18bc608ef268db68b
3
+ size 13491334