hf-internal-testing
/

tiny-random-VisionEncoderDecoderModel-donutswin-mbart

@@ -7,6 +7,7 @@
     "activation_dropout": 0.0,
     "activation_function": "gelu",
     "add_cross_attention": true,
     "architectures": null,
     "attention_dropout": 0.0,
     "bad_words_ids": null,
@@ -17,7 +18,7 @@
     "cross_attention_hidden_size": null,
     "d_model": 16,
     "decoder_attention_heads": 2,
-    "decoder_ffn_dim": 32,
     "decoder_layerdrop": 0.0,
     "decoder_layers": 1,
     "decoder_start_token_id": null,
@@ -26,7 +27,7 @@
     "dropout": 0.1,
     "early_stopping": false,
     "encoder_attention_heads": 2,
-    "encoder_ffn_dim": 32,
     "encoder_layerdrop": 0.0,
     "encoder_layers": 1,
     "encoder_no_repeat_ngram_size": 0,
@@ -67,7 +68,7 @@
     "repetition_penalty": 1.0,
     "return_dict": true,
     "return_dict_in_generate": false,
-    "scale_embedding": false,
     "sep_token_id": null,
     "suppress_tokens": null,
     "task_specific_params": null,
@@ -97,6 +98,8 @@
     "cross_attention_hidden_size": null,
     "decoder_start_token_id": null,
     "depths": [
       2
     ],
     "diversity_penalty": 0.0,
@@ -112,15 +115,12 @@
     "forced_eos_token_id": null,
     "hidden_act": "gelu",
     "hidden_dropout_prob": 0.0,
-    "hidden_size": 96,
     "id2label": {
       "0": "LABEL_0",
       "1": "LABEL_1"
     },
-    "image_size": [
-      2560,
-      1920
-    ],
     "initializer_range": 0.02,
     "is_decoder": false,
     "is_encoder_decoder": false,
@@ -139,15 +139,18 @@
     "num_beams": 1,
     "num_channels": 3,
     "num_heads": [
-      1
     ],
-    "num_layers": 1,
     "num_return_sequences": 1,
     "output_attentions": false,
     "output_hidden_states": false,
     "output_scores": false,
     "pad_token_id": null,
     "patch_size": 4,
     "prefix": null,
     "problem_type": null,
     "pruned_heads": {},
@@ -171,11 +174,11 @@
     "typical_p": 1.0,
     "use_absolute_embeddings": false,
     "use_bfloat16": false,
-    "window_size": 7
   },
   "is_encoder_decoder": true,
   "model_type": "vision-encoder-decoder",
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
-  "transformers_version": "4.43.4"
 }

     "activation_dropout": 0.0,
     "activation_function": "gelu",
     "add_cross_attention": true,
+    "add_final_layer_norm": true,
     "architectures": null,
     "attention_dropout": 0.0,
     "bad_words_ids": null,
     "cross_attention_hidden_size": null,
     "d_model": 16,
     "decoder_attention_heads": 2,
+    "decoder_ffn_dim": 64,
     "decoder_layerdrop": 0.0,
     "decoder_layers": 1,
     "decoder_start_token_id": null,
     "dropout": 0.1,
     "early_stopping": false,
     "encoder_attention_heads": 2,
+    "encoder_ffn_dim": 64,
     "encoder_layerdrop": 0.0,
     "encoder_layers": 1,
     "encoder_no_repeat_ngram_size": 0,
     "repetition_penalty": 1.0,
     "return_dict": true,
     "return_dict_in_generate": false,
+    "scale_embedding": true,
     "sep_token_id": null,
     "suppress_tokens": null,
     "task_specific_params": null,
     "cross_attention_hidden_size": null,
     "decoder_start_token_id": null,
     "depths": [
+      2,
+      4,
       2
     ],
     "diversity_penalty": 0.0,
     "forced_eos_token_id": null,
     "hidden_act": "gelu",
     "hidden_dropout_prob": 0.0,
+    "hidden_size": 384,
     "id2label": {
       "0": "LABEL_0",
       "1": "LABEL_1"
     },
+    "image_size": 224,
     "initializer_range": 0.02,
     "is_decoder": false,
     "is_encoder_decoder": false,
     "num_beams": 1,
     "num_channels": 3,
     "num_heads": [
+      1,
+      2,
+      4
     ],
+    "num_layers": 3,
     "num_return_sequences": 1,
     "output_attentions": false,
     "output_hidden_states": false,
     "output_scores": false,
     "pad_token_id": null,
     "patch_size": 4,
+    "path_norm": true,
     "prefix": null,
     "problem_type": null,
     "pruned_heads": {},
     "typical_p": 1.0,
     "use_absolute_embeddings": false,
     "use_bfloat16": false,
+    "window_size": 10
   },
   "is_encoder_decoder": true,
   "model_type": "vision-encoder-decoder",
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
+  "transformers_version": "4.44.2"
 }

generation_config.json CHANGED Viewed

@@ -4,5 +4,5 @@
   "eos_token_id": 2,
   "forced_eos_token_id": 2,
   "pad_token_id": 1,
-  "transformers_version": "4.43.4"
 }

   "eos_token_id": 2,
   "forced_eos_token_id": 2,
   "pad_token_id": 1,
+  "transformers_version": "4.44.2"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c6ba495d7635b3331bd3e7d86d416d2d4cc15ff3015e296fa644b8d8d5e3db8d
-size 4673200

 version https://git-lfs.github.com/spec/v1
+oid sha256:c706f1405873012f46817ff5fd32e98f4e4b621fe07ef379c7472b4717a809cd
+size 28133944