qgallouedec HF Staff commited on
Commit
ca78849
·
verified ·
1 Parent(s): c76a4ec

Upload Qwen2VLForConditionalGeneration

Browse files
Files changed (3) hide show
  1. config.json +71 -9
  2. generation_config.json +2 -0
  3. model.safetensors +2 -2
config.json CHANGED
@@ -2,26 +2,79 @@
2
  "architectures": [
3
  "Qwen2VLForConditionalGeneration"
4
  ],
 
 
 
 
 
5
  "image_token_id": 151655,
 
 
 
 
6
  "model_type": "qwen2_vl",
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  "text_config": {
 
 
 
8
  "attention_dropout": 0.0,
 
 
9
  "hidden_act": "silu",
10
  "hidden_size": 16,
11
  "image_token_id": null,
12
  "initializer_range": 0.02,
13
- "intermediate_size": 32,
14
  "layer_types": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "full_attention",
16
  "full_attention"
17
  ],
18
  "max_position_embeddings": 32768,
19
- "max_window_layers": 80,
20
  "model_type": "qwen2_vl_text",
21
  "num_attention_heads": 4,
22
  "num_hidden_layers": 2,
23
  "num_key_value_heads": 2,
24
- "rms_norm_eps": 1e-05,
25
  "rope_scaling": {
26
  "mrope_section": [
27
  2
@@ -31,33 +84,42 @@
31
  },
32
  "rope_theta": 1000000.0,
33
  "sliding_window": null,
 
 
34
  "use_cache": true,
35
  "use_sliding_window": false,
36
  "video_token_id": null,
37
- "vocab_size": 151657
 
 
 
38
  },
39
- "torch_dtype": "float32",
40
  "transformers_version": "4.56.0.dev0",
 
 
41
  "video_token_id": 151656,
42
  "vision_config": {
43
- "depth": 4,
44
- "embed_dim": 64,
45
  "hidden_act": "quick_gelu",
46
  "hidden_size": 16,
47
  "in_channels": 3,
 
48
  "initializer_range": 0.02,
49
- "intermediate_size": 32,
50
  "mlp_ratio": 4,
51
  "model_type": "qwen2_vl",
52
  "num_attention_heads": 4,
53
  "num_heads": 16,
54
  "num_hidden_layers": 2,
 
55
  "patch_size": 14,
56
  "spatial_merge_size": 2,
 
57
  "temporal_patch_size": 2
58
  },
59
  "vision_end_token_id": 151653,
60
  "vision_start_token_id": 151652,
61
  "vision_token_id": 151654,
62
- "vocab_size": 151657
63
  }
 
2
  "architectures": [
3
  "Qwen2VLForConditionalGeneration"
4
  ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 1536,
10
  "image_token_id": 151655,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 8960,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 28,
15
  "model_type": "qwen2_vl",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 28,
18
+ "num_key_value_heads": 2,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": {
21
+ "mrope_section": [
22
+ 2
23
+ ],
24
+ "rope_type": "default",
25
+ "type": "default"
26
+ },
27
+ "rope_theta": 1000000.0,
28
+ "sliding_window": 32768,
29
  "text_config": {
30
+ "architectures": [
31
+ "Qwen2VLForConditionalGeneration"
32
+ ],
33
  "attention_dropout": 0.0,
34
+ "bos_token_id": 151643,
35
+ "eos_token_id": 151645,
36
  "hidden_act": "silu",
37
  "hidden_size": 16,
38
  "image_token_id": null,
39
  "initializer_range": 0.02,
40
+ "intermediate_size": 8960,
41
  "layer_types": [
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention",
60
+ "full_attention",
61
+ "full_attention",
62
+ "full_attention",
63
+ "full_attention",
64
+ "full_attention",
65
+ "full_attention",
66
+ "full_attention",
67
+ "full_attention",
68
  "full_attention",
69
  "full_attention"
70
  ],
71
  "max_position_embeddings": 32768,
72
+ "max_window_layers": 28,
73
  "model_type": "qwen2_vl_text",
74
  "num_attention_heads": 4,
75
  "num_hidden_layers": 2,
76
  "num_key_value_heads": 2,
77
+ "rms_norm_eps": 1e-06,
78
  "rope_scaling": {
79
  "mrope_section": [
80
  2
 
84
  },
85
  "rope_theta": 1000000.0,
86
  "sliding_window": null,
87
+ "tie_word_embeddings": true,
88
+ "torch_dtype": "bfloat16",
89
  "use_cache": true,
90
  "use_sliding_window": false,
91
  "video_token_id": null,
92
+ "vision_end_token_id": 151653,
93
+ "vision_start_token_id": 151652,
94
+ "vision_token_id": 151654,
95
+ "vocab_size": 151936
96
  },
97
+ "torch_dtype": "bfloat16",
98
  "transformers_version": "4.56.0.dev0",
99
+ "use_cache": true,
100
+ "use_sliding_window": false,
101
  "video_token_id": 151656,
102
  "vision_config": {
103
+ "depth": 32,
104
+ "embed_dim": 1280,
105
  "hidden_act": "quick_gelu",
106
  "hidden_size": 16,
107
  "in_channels": 3,
108
+ "in_chans": 3,
109
  "initializer_range": 0.02,
 
110
  "mlp_ratio": 4,
111
  "model_type": "qwen2_vl",
112
  "num_attention_heads": 4,
113
  "num_heads": 16,
114
  "num_hidden_layers": 2,
115
+ "num_key_value_heads": 2,
116
  "patch_size": 14,
117
  "spatial_merge_size": 2,
118
+ "spatial_patch_size": 14,
119
  "temporal_patch_size": 2
120
  },
121
  "vision_end_token_id": 151653,
122
  "vision_start_token_id": 151652,
123
  "vision_token_id": 151654,
124
+ "vocab_size": 151936
125
  }
generation_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
2
  "_from_model_config": true,
 
 
3
  "transformers_version": "4.56.0.dev0"
4
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 151643,
4
+ "eos_token_id": 151645,
5
  "transformers_version": "4.56.0.dev0"
6
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a85c07e6b1f619c68b900b1701ae5a3816d280543c4e706c3903d026a77a4ea5
3
- size 20820208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4bc0214f634265ef3038644272509cc4c7124a09cac9ea18e33e5a9ad9452aa
3
+ size 1321603552