qgallouedec HF Staff commited on
Commit
2a6d098
·
verified ·
1 Parent(s): 8f71a0d

Upload Qwen2_5_VLForConditionalGeneration

Browse files
Files changed (3) hide show
  1. config.json +81 -11
  2. generation_config.json +2 -0
  3. model.safetensors +2 -2
config.json CHANGED
@@ -2,26 +2,87 @@
2
  "architectures": [
3
  "Qwen2_5_VLForConditionalGeneration"
4
  ],
 
 
 
 
 
5
  "image_token_id": 151655,
 
 
 
 
6
  "model_type": "qwen2_5_vl",
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  "text_config": {
 
 
 
8
  "attention_dropout": 0.0,
 
 
9
  "hidden_act": "silu",
10
  "hidden_size": 16,
11
  "image_token_id": null,
12
  "initializer_range": 0.02,
13
- "intermediate_size": 32,
14
  "layer_types": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "full_attention",
16
  "full_attention"
17
  ],
18
- "max_position_embeddings": 32768,
19
- "max_window_layers": 80,
20
  "model_type": "qwen2_5_vl_text",
21
  "num_attention_heads": 4,
22
  "num_hidden_layers": 2,
23
  "num_key_value_heads": 2,
24
- "rms_norm_eps": 1e-05,
25
  "rope_scaling": {
26
  "mrope_section": [
27
  2
@@ -31,17 +92,23 @@
31
  },
32
  "rope_theta": 1000000.0,
33
  "sliding_window": null,
 
 
34
  "use_cache": true,
35
  "use_sliding_window": false,
36
  "video_token_id": null,
37
- "vocab_size": 151665
 
 
 
38
  },
39
- "torch_dtype": "float32",
40
  "transformers_version": "4.56.0.dev0",
 
 
41
  "video_token_id": 151656,
42
  "vision_config": {
43
- "depth": 4,
44
- "embed_dim": 64,
45
  "fullatt_block_indexes": [
46
  7,
47
  15,
@@ -51,21 +118,24 @@
51
  "hidden_act": "silu",
52
  "hidden_size": 16,
53
  "in_channels": 3,
 
54
  "initializer_range": 0.02,
55
- "intermediate_size": 32,
56
  "model_type": "qwen2_5_vl",
57
  "num_attention_heads": 4,
58
  "num_heads": 16,
59
  "num_hidden_layers": 2,
 
60
  "out_hidden_size": 16,
61
  "patch_size": 14,
62
  "spatial_merge_size": 2,
 
63
  "temporal_patch_size": 2,
64
- "tokens_per_second": 4,
65
  "window_size": 112
66
  },
67
  "vision_end_token_id": 151653,
68
  "vision_start_token_id": 151652,
69
  "vision_token_id": 151654,
70
- "vocab_size": 151665
71
  }
 
2
  "architectures": [
3
  "Qwen2_5_VLForConditionalGeneration"
4
  ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 2048,
10
  "image_token_id": 151655,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 11008,
13
+ "max_position_embeddings": 128000,
14
+ "max_window_layers": 70,
15
  "model_type": "qwen2_5_vl",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 36,
18
+ "num_key_value_heads": 2,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": {
21
+ "mrope_section": [
22
+ 2
23
+ ],
24
+ "rope_type": "default",
25
+ "type": "default"
26
+ },
27
+ "rope_theta": 1000000.0,
28
+ "sliding_window": 32768,
29
  "text_config": {
30
+ "architectures": [
31
+ "Qwen2_5_VLForConditionalGeneration"
32
+ ],
33
  "attention_dropout": 0.0,
34
+ "bos_token_id": 151643,
35
+ "eos_token_id": 151645,
36
  "hidden_act": "silu",
37
  "hidden_size": 16,
38
  "image_token_id": null,
39
  "initializer_range": 0.02,
40
+ "intermediate_size": 11008,
41
  "layer_types": [
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention",
60
+ "full_attention",
61
+ "full_attention",
62
+ "full_attention",
63
+ "full_attention",
64
+ "full_attention",
65
+ "full_attention",
66
+ "full_attention",
67
+ "full_attention",
68
+ "full_attention",
69
+ "full_attention",
70
+ "full_attention",
71
+ "full_attention",
72
+ "full_attention",
73
+ "full_attention",
74
+ "full_attention",
75
+ "full_attention",
76
  "full_attention",
77
  "full_attention"
78
  ],
79
+ "max_position_embeddings": 128000,
80
+ "max_window_layers": 70,
81
  "model_type": "qwen2_5_vl_text",
82
  "num_attention_heads": 4,
83
  "num_hidden_layers": 2,
84
  "num_key_value_heads": 2,
85
+ "rms_norm_eps": 1e-06,
86
  "rope_scaling": {
87
  "mrope_section": [
88
  2
 
92
  },
93
  "rope_theta": 1000000.0,
94
  "sliding_window": null,
95
+ "tie_word_embeddings": true,
96
+ "torch_dtype": "bfloat16",
97
  "use_cache": true,
98
  "use_sliding_window": false,
99
  "video_token_id": null,
100
+ "vision_end_token_id": 151653,
101
+ "vision_start_token_id": 151652,
102
+ "vision_token_id": 151654,
103
+ "vocab_size": 151936
104
  },
105
+ "torch_dtype": "bfloat16",
106
  "transformers_version": "4.56.0.dev0",
107
+ "use_cache": true,
108
+ "use_sliding_window": false,
109
  "video_token_id": 151656,
110
  "vision_config": {
111
+ "depth": 32,
 
112
  "fullatt_block_indexes": [
113
  7,
114
  15,
 
118
  "hidden_act": "silu",
119
  "hidden_size": 16,
120
  "in_channels": 3,
121
+ "in_chans": 3,
122
  "initializer_range": 0.02,
123
+ "intermediate_size": 3420,
124
  "model_type": "qwen2_5_vl",
125
  "num_attention_heads": 4,
126
  "num_heads": 16,
127
  "num_hidden_layers": 2,
128
+ "num_key_value_heads": 2,
129
  "out_hidden_size": 16,
130
  "patch_size": 14,
131
  "spatial_merge_size": 2,
132
+ "spatial_patch_size": 14,
133
  "temporal_patch_size": 2,
134
+ "tokens_per_second": 2,
135
  "window_size": 112
136
  },
137
  "vision_end_token_id": 151653,
138
  "vision_start_token_id": 151652,
139
  "vision_token_id": 151654,
140
+ "vocab_size": 151936
141
  }
generation_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
2
  "_from_model_config": true,
 
 
3
  "transformers_version": "4.56.0.dev0"
4
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 151643,
4
+ "eos_token_id": 151645,
5
  "transformers_version": "4.56.0.dev0"
6
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a31c03e2817b19f8096a13b8f0cb39a136fa0260fb48bc9f036e0f678cf57271
3
- size 19580256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8d012752bba2a00066a50891c7c7f85215ed31293a0491b9e87c415b1150b59
3
+ size 18086192