Spacyzipa commited on
Commit
c2cfcc2
·
1 Parent(s): 1923ba2

Training done

Browse files
added_tokens.json CHANGED
@@ -24,6 +24,8 @@
24
  "</s_hsn_code>": 57580,
25
  "</s_iban>": 57638,
26
  "</s_iec>": 57606,
 
 
27
  "</s_invoice_currency>": 57598,
28
  "</s_invoice_date>": 57534,
29
  "</s_invoice_no>": 57532,
@@ -88,6 +90,8 @@
88
  "<s_iban>": 57637,
89
  "<s_iec>": 57605,
90
  "<s_iitcdip>": 57523,
 
 
91
  "<s_invoice_currency>": 57597,
92
  "<s_invoice_date>": 57533,
93
  "<s_invoice_no>": 57531,
 
24
  "</s_hsn_code>": 57580,
25
  "</s_iban>": 57638,
26
  "</s_iec>": 57606,
27
+ "</s_invoice >": 57651,
28
+ "</s_invoice>": 57653,
29
  "</s_invoice_currency>": 57598,
30
  "</s_invoice_date>": 57534,
31
  "</s_invoice_no>": 57532,
 
90
  "<s_iban>": 57637,
91
  "<s_iec>": 57605,
92
  "<s_iitcdip>": 57523,
93
+ "<s_invoice >": 57650,
94
+ "<s_invoice>": 57652,
95
  "<s_invoice_currency>": 57597,
96
  "<s_invoice_date>": 57533,
97
  "<s_invoice_no>": 57531,
special_tokens_map.json CHANGED
@@ -3,9 +3,27 @@
3
  "<s_iitcdip>",
4
  "<s_synthdog>"
5
  ],
6
- "bos_token": "<s>",
7
- "cls_token": "<s>",
8
- "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "mask_token": {
10
  "content": "<mask>",
11
  "lstrip": true,
@@ -13,7 +31,25 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<pad>",
17
- "sep_token": "</s>",
18
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
 
3
  "<s_iitcdip>",
4
  "<s_synthdog>"
5
  ],
6
+ "bos_token": {
7
+ "content": "<s>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "cls_token": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "eos_token": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
  "mask_token": {
28
  "content": "<mask>",
29
  "lstrip": true,
 
31
  "rstrip": false,
32
  "single_word": false
33
  },
34
+ "pad_token": {
35
+ "content": "<pad>",
36
+ "lstrip": false,
37
+ "normalized": false,
38
+ "rstrip": false,
39
+ "single_word": false
40
+ },
41
+ "sep_token": {
42
+ "content": "</s>",
43
+ "lstrip": false,
44
+ "normalized": false,
45
+ "rstrip": false,
46
+ "single_word": false
47
+ },
48
+ "unk_token": {
49
+ "content": "<unk>",
50
+ "lstrip": false,
51
+ "normalized": false,
52
+ "rstrip": false,
53
+ "single_word": false
54
+ }
55
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1063,6 +1063,38 @@
1063
  "rstrip": false,
1064
  "single_word": false,
1065
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1066
  }
1067
  },
1068
  "additional_special_tokens": [
@@ -1074,11 +1106,18 @@
1074
  "cls_token": "<s>",
1075
  "eos_token": "</s>",
1076
  "mask_token": "<mask>",
 
1077
  "model_max_length": 1000000000000000019884624838656,
 
1078
  "pad_token": "<pad>",
 
 
1079
  "processor_class": "DonutProcessor",
1080
  "sep_token": "</s>",
1081
  "sp_model_kwargs": {},
 
1082
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
1083
  "unk_token": "<unk>"
1084
  }
 
1063
  "rstrip": false,
1064
  "single_word": false,
1065
  "special": false
1066
+ },
1067
+ "57650": {
1068
+ "content": "<s_invoice >",
1069
+ "lstrip": false,
1070
+ "normalized": true,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": false
1074
+ },
1075
+ "57651": {
1076
+ "content": "</s_invoice >",
1077
+ "lstrip": false,
1078
+ "normalized": true,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": false
1082
+ },
1083
+ "57652": {
1084
+ "content": "<s_invoice>",
1085
+ "lstrip": false,
1086
+ "normalized": true,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": false
1090
+ },
1091
+ "57653": {
1092
+ "content": "</s_invoice>",
1093
+ "lstrip": false,
1094
+ "normalized": true,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": false
1098
  }
1099
  },
1100
  "additional_special_tokens": [
 
1106
  "cls_token": "<s>",
1107
  "eos_token": "</s>",
1108
  "mask_token": "<mask>",
1109
+ "max_length": 768,
1110
  "model_max_length": 1000000000000000019884624838656,
1111
+ "pad_to_multiple_of": null,
1112
  "pad_token": "<pad>",
1113
+ "pad_token_type_id": 0,
1114
+ "padding_side": "right",
1115
  "processor_class": "DonutProcessor",
1116
  "sep_token": "</s>",
1117
  "sp_model_kwargs": {},
1118
+ "stride": 0,
1119
  "tokenizer_class": "XLMRobertaTokenizer",
1120
+ "truncation_side": "right",
1121
+ "truncation_strategy": "longest_first",
1122
  "unk_token": "<unk>"
1123
  }