Tingquan commited on
Commit
c69621d
·
verified ·
1 Parent(s): 7be6f39

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model_state.pdparams filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 151643,
3
+ "<|im_start|>": 151644,
4
+ "<|im_end|>": 151645,
5
+ "<|object_ref_start|>": 151646,
6
+ "<|object_ref_end|>": 151647,
7
+ "<|box_start|>": 151648,
8
+ "<|box_end|>": 151649,
9
+ "<|quad_start|>": 151650,
10
+ "<|quad_end|>": 151651,
11
+ "<|vision_start|>": 151652,
12
+ "<|vision_end|>": 151653,
13
+ "<|vision_pad|>": 151654,
14
+ "<|image_pad|>": 151655,
15
+ "<|video_pad|>": 151656
16
+ }
config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "paddle",
3
+ "Global": {
4
+ "model_name": "PP-DocBee-2B"
5
+ }
6
+ }
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "decode_strategy": "sampling",
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 151645,
7
+ 151643
8
+ ],
9
+ "pad_token_id": 151643,
10
+ "repetition_penalty": 1.05,
11
+ "temperature": 0.1,
12
+ "top_k": 1,
13
+ "top_p": 0.001
14
+ }
inference.yml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ mode: paddle
2
+ Global:
3
+ model_name: PP-DocBee-2B
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model_state.pdparams ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:692977ef9abd3dccb7d73b8b8aeffb3ec4cdfdf0ff1bdfd2f41f76ef55262595
3
+ size 4418043117
special_tokens_map.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token": {
3
+ "content": "<|im_end|>",
4
+ "single_word": false,
5
+ "lstrip": false,
6
+ "rstrip": false,
7
+ "normalized": false,
8
+ "special": true
9
+ },
10
+ "unk_token": {
11
+ "content": "<|im_end|>",
12
+ "single_word": false,
13
+ "lstrip": false,
14
+ "rstrip": false,
15
+ "normalized": false,
16
+ "special": true
17
+ },
18
+ "pad_token": {
19
+ "content": "<|endoftext|>",
20
+ "single_word": false,
21
+ "lstrip": false,
22
+ "rstrip": false,
23
+ "normalized": false,
24
+ "special": true
25
+ },
26
+ "additional_special_tokens": [
27
+ "<|im_start|>",
28
+ "<|im_end|>",
29
+ "<|object_ref_start|>",
30
+ "<|object_ref_end|>",
31
+ "<|box_start|>",
32
+ "<|box_end|>",
33
+ "<|quad_start|>",
34
+ "<|quad_end|>",
35
+ "<|vision_start|>",
36
+ "<|vision_end|>",
37
+ "<|vision_pad|>",
38
+ "<|image_pad|>",
39
+ "<|video_pad|>"
40
+ ]
41
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "errors": "replace",
3
+ "unk_token": null,
4
+ "bos_token": null,
5
+ "eos_token": "<|im_end|>",
6
+ "pad_token": "<|endoftext|>",
7
+ "clean_up_tokenization_spaces": false,
8
+ "split_special_tokens": false,
9
+ "add_prefix_space": false,
10
+ "added_tokens_decoder": {
11
+ "151643": {
12
+ "content": "<|endoftext|>",
13
+ "single_word": false,
14
+ "lstrip": false,
15
+ "rstrip": false,
16
+ "normalized": false,
17
+ "special": true
18
+ },
19
+ "151644": {
20
+ "content": "<|im_start|>",
21
+ "single_word": false,
22
+ "lstrip": false,
23
+ "rstrip": false,
24
+ "normalized": false,
25
+ "special": true
26
+ },
27
+ "151645": {
28
+ "content": "<|im_end|>",
29
+ "single_word": false,
30
+ "lstrip": false,
31
+ "rstrip": false,
32
+ "normalized": false,
33
+ "special": true
34
+ },
35
+ "151646": {
36
+ "content": "<|object_ref_start|>",
37
+ "single_word": false,
38
+ "lstrip": false,
39
+ "rstrip": false,
40
+ "normalized": false,
41
+ "special": true
42
+ },
43
+ "151647": {
44
+ "content": "<|object_ref_end|>",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ },
51
+ "151648": {
52
+ "content": "<|box_start|>",
53
+ "single_word": false,
54
+ "lstrip": false,
55
+ "rstrip": false,
56
+ "normalized": false,
57
+ "special": true
58
+ },
59
+ "151649": {
60
+ "content": "<|box_end|>",
61
+ "single_word": false,
62
+ "lstrip": false,
63
+ "rstrip": false,
64
+ "normalized": false,
65
+ "special": true
66
+ },
67
+ "151650": {
68
+ "content": "<|quad_start|>",
69
+ "single_word": false,
70
+ "lstrip": false,
71
+ "rstrip": false,
72
+ "normalized": false,
73
+ "special": true
74
+ },
75
+ "151651": {
76
+ "content": "<|quad_end|>",
77
+ "single_word": false,
78
+ "lstrip": false,
79
+ "rstrip": false,
80
+ "normalized": false,
81
+ "special": true
82
+ },
83
+ "151652": {
84
+ "content": "<|vision_start|>",
85
+ "single_word": false,
86
+ "lstrip": false,
87
+ "rstrip": false,
88
+ "normalized": false,
89
+ "special": true
90
+ },
91
+ "151653": {
92
+ "content": "<|vision_end|>",
93
+ "single_word": false,
94
+ "lstrip": false,
95
+ "rstrip": false,
96
+ "normalized": false,
97
+ "special": true
98
+ },
99
+ "151654": {
100
+ "content": "<|vision_pad|>",
101
+ "single_word": false,
102
+ "lstrip": false,
103
+ "rstrip": false,
104
+ "normalized": false,
105
+ "special": true
106
+ },
107
+ "151655": {
108
+ "content": "<|image_pad|>",
109
+ "single_word": false,
110
+ "lstrip": false,
111
+ "rstrip": false,
112
+ "normalized": false,
113
+ "special": true
114
+ },
115
+ "151656": {
116
+ "content": "<|video_pad|>",
117
+ "single_word": false,
118
+ "lstrip": false,
119
+ "rstrip": false,
120
+ "normalized": false,
121
+ "special": true
122
+ }
123
+ },
124
+ "additional_special_tokens": [
125
+ "<|im_start|>",
126
+ "<|im_end|>",
127
+ "<|object_ref_start|>",
128
+ "<|object_ref_end|>",
129
+ "<|box_start|>",
130
+ "<|box_end|>",
131
+ "<|quad_start|>",
132
+ "<|quad_end|>",
133
+ "<|vision_start|>",
134
+ "<|vision_end|>",
135
+ "<|vision_pad|>",
136
+ "<|image_pad|>",
137
+ "<|video_pad|>"
138
+ ],
139
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
140
+ "padding_side": "left",
141
+ "model_max_length": 32768,
142
+ "tokenizer_class": "Qwen2Tokenizer",
143
+ "added_tokens_file": null,
144
+ "special_tokens_map_file": null,
145
+ "chat_template_file": null
146
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff