HongyuJia commited on
Commit
3e6cbc8
·
0 Parent(s):

Init commit

Browse files
Files changed (34) hide show
  1. .gitattributes +38 -0
  2. README.md +233 -0
  3. ckpt/magi/24B_base/inference_weight/model-00001-of-00006.safetensors +3 -0
  4. ckpt/magi/24B_base/inference_weight/model-00002-of-00006.safetensors +3 -0
  5. ckpt/magi/24B_base/inference_weight/model-00003-of-00006.safetensors +3 -0
  6. ckpt/magi/24B_base/inference_weight/model-00004-of-00006.safetensors +3 -0
  7. ckpt/magi/24B_base/inference_weight/model-00005-of-00006.safetensors +3 -0
  8. ckpt/magi/24B_base/inference_weight/model-00006-of-00006.safetensors +3 -0
  9. ckpt/magi/24B_base/inference_weight/model.safetensors.index.json +0 -0
  10. ckpt/magi/24B_distill/inference_weight.distill/model-00001-of-00006.safetensors +3 -0
  11. ckpt/magi/24B_distill/inference_weight.distill/model-00002-of-00006.safetensors +3 -0
  12. ckpt/magi/24B_distill/inference_weight.distill/model-00003-of-00006.safetensors +3 -0
  13. ckpt/magi/24B_distill/inference_weight.distill/model-00004-of-00006.safetensors +3 -0
  14. ckpt/magi/24B_distill/inference_weight.distill/model-00005-of-00006.safetensors +3 -0
  15. ckpt/magi/24B_distill/inference_weight.distill/model-00006-of-00006.safetensors +3 -0
  16. ckpt/magi/24B_distill/inference_weight.distill/model.safetensors.index.json +0 -0
  17. ckpt/magi/24B_distill_quant/inference_weight.fp8.distill/model-00001-of-00003.safetensors +3 -0
  18. ckpt/magi/24B_distill_quant/inference_weight.fp8.distill/model-00002-of-00003.safetensors +3 -0
  19. ckpt/magi/24B_distill_quant/inference_weight.fp8.distill/model-00003-of-00003.safetensors +3 -0
  20. ckpt/magi/24B_distill_quant/inference_weight.fp8.distill/model.safetensors.index.json +0 -0
  21. ckpt/t5/t5-v1_1-xxl/config.json +31 -0
  22. ckpt/t5/t5-v1_1-xxl/pytorch_model-00001-of-00002.bin +3 -0
  23. ckpt/t5/t5-v1_1-xxl/pytorch_model-00002-of-00002.bin +3 -0
  24. ckpt/t5/t5-v1_1-xxl/pytorch_model.bin.index.json +227 -0
  25. ckpt/t5/t5-v1_1-xxl/special_tokens_map.json +1 -0
  26. ckpt/t5/t5-v1_1-xxl/spiece.model +3 -0
  27. ckpt/t5/t5-v1_1-xxl/t5-v1_1-xxl/pytorch_model-00001-of-00002.bin +3 -0
  28. ckpt/t5/t5-v1_1-xxl/tokenizer_config.json +1 -0
  29. ckpt/vae/config.json +22 -0
  30. ckpt/vae/diffusion_pytorch_model.safetensors +3 -0
  31. figures/algorithm.png +3 -0
  32. figures/dit_architecture.png +3 -0
  33. figures/inhouse_human_evaluation.png +3 -0
  34. figures/logo_black.png +0 -0
.gitattributes ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ figures/algorithm.png filter=lfs diff=lfs merge=lfs -text
37
+ figures/dit_architecture.png filter=lfs diff=lfs merge=lfs -text
38
+ figures/inhouse_human_evaluation.png filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ pipeline_tag: image-to-video
6
+ library_name: MAGI-1
7
+ ---
8
+
9
+ ![magi-logo](figures/logo_black.png)
10
+
11
+
12
+ -----
13
+
14
+ <p align="center" style="line-height: 1;">
15
+ <a href="https://static.magi.world/static/files/MAGI_1.pdf" target="_blank" style="margin: 2px;">
16
+ <img alt="paper" src="https://img.shields.io/badge/Paper-arXiv-B31B1B?logo=arxiv" style="display: inline-block; vertical-align: middle;">
17
+ </a>
18
+ <a href="https://sand.ai" target="_blank" style="margin: 2px;">
19
+ <img alt="blog" src="https://img.shields.io/badge/Sand%20AI-Homepage-333333.svg?logo=data:image/svg%2bxml;base64,PHN2ZyB3aWR0aD0iODAwIiBoZWlnaHQ9IjgwMCIgdmlld0JveD0iMCAwIDgwMCA4MDAiIGZpbGw9Im5vbmUiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+CjxwYXRoIGZpbGwtcnVsZT0iZXZlbm9kZCIgY2xpcC1ydWxlPSJldmVub2RkIiBkPSJNMjI3IDIyNS4wODVDMjI3IDIwMi4zMDMgMjI3IDE5MC45MTIgMjMxLjQzNyAxODIuMjExQzIzNS4zMzkgMTc0LjU1NyAyNDEuNTY2IDE2OC4zMzQgMjQ5LjIyNiAxNjQuNDM0QzI1Ny45MzMgMTYwIDI2OS4zMzIgMTYwIDI5Mi4xMjkgMTYwSDUwNy44NzFDNTA5LjI5NSAxNjAgNTEwLjY3NiAxNjAgNTEyLjAxNCAxNjAuMDAxQzUzMi4wODIgMTYwLjAxNyA1NDIuNjExIDE2MC4yNzcgNTUwLjc3NCAxNjQuNDM0QzU1OC40MzQgMTY4LjMzNCA1NjQuNjYxIDE3NC41NTcgNTY4LjU2MyAxODIuMjExQzU3MyAxOTAuOTEyIDU3MyAyMDIuMzAzIDU3MyAyMjUuMDg1VjI1Ni41NThDNTczIDI5MS4zMTkgNTczIDMwOC43IDU2NS4wMzUgMzIzLjI3OUM1NTguNzU2IDMzNC43NzIgNTQzLjU2NSAzNDYuMTEgNTIzLjA3OCAzNTkuNjA1QzUxNC42NzQgMzY1LjE0MSA1MTAuNDcyIDM2Ny45MDkgNTA1LjYzOSAzNjcuOTM2QzUwMC44MDYgMzY3Ljk2NCA0OTYuNTAzIDM2NS4yIDQ4Ny44OTYgMzU5LjY3MUw0ODcuODk2IDM1OS42N0w0NjYuNDY5IDM0NS45MDVDNDU2Ljg3NSAzMzkuNzQyIDQ1Mi4wNzggMzM2LjY2IDQ1Mi4wNzggMzMyLjIxOEM0NTIuMDc4IDMyNy43NzcgNDU2Ljg3NSAzMjQuNjk1IDQ2Ni40NjkgMzE4LjUzMUw1MjYuNzgyIDI3OS43ODVDNTM1LjI5MSAyNzQuMzE5IDU0MC40MzUgMjY0LjkwMyA1NDAuNDM1IDI1NC43OTRDNTQwLjQzNSAyMzguMzg2IDUyNy4xMjUgMjI1LjA4NSA1MTAuNzA1IDIyNS4wODVIMjg5LjI5NUMyNzIuODc1IDIyNS4wODUgMjU5LjU2NSAyMzguMzg2IDI1OS41NjUgMjU0Ljc5NEMyNTkuNTY1IDI2NC45MDMgMjY0LjcwOSAyNzQuMzE5IDI3My4yMTggMjc5Ljc4NUw1MTMuMTggNDMzLjk0MUM1NDIuNDQxIDQ1Mi43MzggNTU3LjA3MSA0NjIuMTM3IDU2NS4wMzUgNDc2LjcxNkM1NzMgNDkxLjI5NCA1NzMgNTA4LjY3NSA1NzMgNTQzLjQzNlY1NzQuOTE1QzU3MyA1OTcuNjk3IDU3MyA2MDkuMDg4IDU2OC41NjMgNjE3Ljc4OUM1NjQuNjYxIDYyNS40NDQgNTU4LjQzNCA2MzEuNjY2IDU1MC43NzQgNjM1LjU2NkM1NDIuMDY3IDY0MCA1MzAuNjY4IDY0MCA1MDcuODcxIDY0MEgyOTIuMTI5QzI2OS4zMzIgNjQwIDI1Ny45MzMgNjQwIDI0OS4yMjYgNjM1LjU2NkMyNDEuNTY2IDYzMS42NjYgMjM1LjMzOSA2MjUuNDQ0IDIzMS40MzcgNjE3Ljc4OUMyMjcgNjA5LjA4OCAyMjcgNTk3LjY5NyAyMjcgNTc0LjkxNVY1NDMuNDM2QzIyNyA1MDguNjc1IDIyNyA0OTEuMjk0IDIzNC45NjUgNDc2LjcxNkMyNDEuMjQ0IDQ2NS4yMjIgMjU2LjQzMyA0NTMuODg2IDI3Ni45MTggNDQwLjM5MkMyODUuMzIyIDQzNC44NTYgMjg5LjUyNSA0MzIuMDg4IDI5NC4zNTcgNDMyLjA2QzI5OS4xOSA0MzIuMDMyIDMwMy40OTQgNDM0Ljc5NyAzMTIuMSA0NDAuMzI2TDMzMy41MjcgNDU0LjA5MUMzNDMuMTIyIDQ2MC4yNTQgMzQ3LjkxOSA0NjMuMzM2IDM0Ny45MTkgNDY3Ljc3OEMzNDcuOTE5IDQ3Mi4yMiAzNDMuMTIyIDQ3NS4zMDEgMzMzLjUyOCA0ODEuNDY1TDMzMy41MjcgNDgxLjQ2NUwyNzMuMjIgNTIwLjIwOEMyNjQuNzA5IDUyNS42NzUgMjU5LjU2NSA1MzUuMDkxIDI1OS41NjUgNTQ1LjIwMkMyNTkuNTY1IDU2MS42MTIgMjcyLjg3NyA1NzQuOTE1IDI4OS4yOTkgNTc0LjkxNUg1MTAuNzAxQzUyNy4xMjMgNTc0LjkxNSA1NDAuNDM1IDU2MS42MTIgNTQwLjQzNSA1NDUuMjAyQzU0MC40MzUgNTM1LjA5MSA1MzUuMjkxIDUyNS42NzUgNTI2Ljc4IDUyMC4yMDhMMjg2LjgyIDM2Ni4wNTNDMjU3LjU2IDM0Ny4yNTYgMjQyLjkyOSAzMzcuODU3IDIzNC45NjUgMzIzLjI3OUMyMjcgMzA4LjcgMjI3IDI5MS4zMTkgMjI3IDI1Ni41NThWMjI1LjA4NVoiIGZpbGw9IiNGRkZGRkYiLz4KPC9zdmc+Cg==" style="display: inline-block; vertical-align: middle;">
20
+ </a>
21
+ <a href="https://magi.sand.ai" target="_blank" style="margin: 2px;">
22
+ <img alt="product" src="https://img.shields.io/badge/Magi-Product-logo.svg?logo=data:image/svg%2bxml;base64,PHN2ZyB3aWR0aD0iODAwIiBoZWlnaHQ9IjgwMCIgdmlld0JveD0iMCAwIDgwMCA4MDAiIGZpbGw9Im5vbmUiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+CjxwYXRoIGZpbGwtcnVsZT0iZXZlbm9kZCIgY2xpcC1ydWxlPSJldmVub2RkIiBkPSJNNDY5LjAyNyA1MDcuOTUxVjE4MC4zNjRDNDY5LjAyNyAxNjguNDE2IDQ2OS4wMjcgMTYyLjQ0MiA0NjUuMjQ0IDE2MC41MTlDNDYxLjQ2MSAxNTguNTk2IDQ1Ni42NTkgMTYyLjEzIDQ0Ny4wNTYgMTY5LjE5OEwzNjEuMDQ4IDIzMi40OTZDMzQ2LjI5NiAyNDMuMzUzIDMzOC45MjEgMjQ4Ljc4MSAzMzQuOTQ3IDI1Ni42NUMzMzAuOTczIDI2NC41MTggMzMwLjk3MyAyNzMuNjk1IDMzMC45NzMgMjkyLjA0OVY2MTkuNjM2QzMzMC45NzMgNjMxLjU4NCAzMzAuOTczIDYzNy41NTggMzM0Ljc1NiA2MzkuNDgxQzMzOC41MzkgNjQxLjQwNCAzNDMuMzQxIDYzNy44NyAzNTIuOTQ0IDYzMC44MDJMNDM4Ljk1MiA1NjcuNTA0QzQ1My43MDQgNTU2LjY0OCA0NjEuMDggNTUxLjIxOSA0NjUuMDUzIDU0My4zNUM0NjkuMDI3IDUzNS40ODIgNDY5LjAyNyA1MjYuMzA1IDQ2OS4wMjcgNTA3Ljk1MVpNMjg3LjkwNyA0OTQuMTU1VjIyMS45M0MyODcuOTA3IDIxNC4wMDIgMjg3LjkwNyAyMTAuMDM5IDI4NS4zOTQgMjA4Ljc1NEMyODIuODgxIDIwNy40NyAyNzkuNjg0IDIwOS44MDEgMjczLjI5MiAyMTQuNDYyTDIwOS40MjEgMjYxLjAzMkMxOTguMjYyIDI2OS4xNjggMTkyLjY4MyAyNzMuMjM2IDE4OS42NzUgMjc5LjE2QzE4Ni42NjcgMjg1LjA4NCAxODYuNjY3IDI5Mi4wMDMgMTg2LjY2NyAzMDUuODQxVjU3OC4wNjdDMTg2LjY2NyA1ODUuOTk0IDE4Ni42NjcgNTg5Ljk1OCAxODkuMTggNTkxLjI0MkMxOTEuNjkzIDU5Mi41MjYgMTk0Ljg4OSA1OTAuMTk2IDIwMS4yODIgNTg1LjUzNUwyNjUuMTUyIDUzOC45NjVDMjc2LjMxMSA1MzAuODI5IDI4MS44OSA1MjYuNzYxIDI4NC44OTkgNTIwLjgzN0MyODcuOTA3IDUxNC45MTMgMjg3LjkwNyA1MDcuOTk0IDI4Ny45MDcgNDk0LjE1NVpNNjEzLjMzMyAyMjEuOTNWNDk0LjE1NUM2MTMuMzMzIDUwNy45OTQgNjEzLjMzMyA1MTQuOTEzIDYxMC4zMjUgNTIwLjgzN0M2MDcuMzE3IDUyNi43NjEgNjAxLjczOCA1MzAuODI5IDU5MC41NzkgNTM4Ljk2NUw1MjYuNzA4IDU4NS41MzVDNTIwLjMxNiA1OTAuMTk2IDUxNy4xMTkgNTkyLjUyNiA1MTQuNjA2IDU5MS4yNDJDNTEyLjA5MyA1ODkuOTU4IDUxMi4wOTMgNTg1Ljk5NCA1MTIuMDkzIDU3OC4wNjdWMzA1Ljg0MUM1MTIuMDkzIDI5Mi4wMDMgNTEyLjA5MyAyODUuMDg0IDUxNS4xMDIgMjc5LjE2QzUxOC4xMSAyNzMuMjM2IDUyMy42ODkgMjY5LjE2OCA1MzQuODQ4IDI2MS4wMzJMNTk4LjcxOSAyMTQuNDYyQzYwNS4xMTEgMjA5LjgwMSA2MDguMzA3IDIwNy40NyA2MTAuODIgMjA4Ljc1NEM2MTMuMzMzIDIxMC4wMzkgNjEzLjMzMyAyMTQuMDAyIDYxMy4zMzMgMjIxLjkzWiIgZmlsbD0iI0ZGRkZGRiIgc2hhcGUtcmVuZGVyaW5nPSJjcmlzcEVkZ2VzIi8+Cjwvc3ZnPgo=&color=DCBE7E" style="display: inline-block; vertical-align: middle;">
23
+ </a>
24
+ <a href="https://huggingface.co/sand-ai" target="_blank" style="margin: 2px;">
25
+ <img alt="Hugging Face" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Sand AI-ffc107?color=ffc107&logoColor=white" style="display: inline-block; vertical-align: middle;">
26
+ </a>
27
+ <a href="https://x.com/SandAI_HQ" target="_blank" style="margin: 2px;">
28
+ <img alt="Twitter Follow" src="https://img.shields.io/badge/Twitter-Sand%20AI-white?logo=x&logoColor=white" style="display: inline-block; vertical-align: middle;">
29
+ </a>
30
+ <a href="https://discord.gg/hgaZ86D7Wv" target="_blank" style="margin: 2px;">
31
+ <img alt="Discord" src="https://img.shields.io/badge/Discord-Sand%20AI-7289da?logo=discord&logoColor=white&color=7289da" style="display: inline-block; vertical-align: middle;">
32
+ </a>
33
+ <a href="https://github.com/SandAI-org/Magi/LICENSE" target="_blank" style="margin: 2px;">
34
+ <img alt="license" src="https://img.shields.io/badge/License-Apache2.0-green?logo=Apache" style="display: inline-block; vertical-align: middle;">
35
+ </a>
36
+ </p>
37
+
38
+ # MAGI-1: Autoregressive Video Generation at Scale
39
+
40
+ This repository contains the code for the MAGI-1 model, pre-trained weights and inference code. You can find more information on our [technical report](https://static.magi.world/static/files/MAGI_1.pdf) or directly create magic with MAGI-1 [here](http://sand.ai) . 🚀✨
41
+
42
+
43
+ ## 🔥🔥🔥 Latest News
44
+
45
+ - Apr 21, 2025: MAGI-1 is here 🎉. We've released the model weights and inference code — check it out!
46
+
47
+
48
+ ## 1. About
49
+
50
+ We present MAGI-1, a world model that generates videos by ***autoregressively*** predicting a sequence of video chunks, defined as fixed-length segments of consecutive frames. Trained to denoise per-chunk noise that increases monotonically over time, MAGI-1 enables causal temporal modeling and naturally supports streaming generation. It achieves strong performance on image-to-video (I2V) tasks conditioned on text instructions, providing high temporal consistency and scalability, which are made possible by several algorithmic innovations and a dedicated infrastructure stack. MAGI-1 further supports controllable generation via chunk-wise prompting, enabling smooth scene transitions, long-horizon synthesis, and fine-grained text-driven control. We believe MAGI-1 offers a promising direction for unifying high-fidelity video generation with flexible instruction control and real-time deployment.
51
+
52
+
53
+ ## 2. Model Summary
54
+
55
+ ### Transformer-based VAE
56
+
57
+ - Variational autoencoder (VAE) with transformer-based architecture, 8x spatial and 4x temporal compression.
58
+ - Fastest average decoding time and highly competitive reconstruction quality
59
+
60
+ ### Auto-Regressive Denoising Algorithm
61
+
62
+ MAGI-1 is an autoregressive denoising video generation model generating videos chunk-by-chunk instead of as a whole. Each chunk (24 frames) is denoised holistically, and the generation of the next chunk begins as soon as the current one reaches a certain level of denoising. This pipeline design enables concurrent processing of up to four chunks for efficient video generation.
63
+
64
+ ![auto-regressive denosing algorithm](figures/algorithm.png)
65
+
66
+ ### Diffusion Model Architecture
67
+
68
+ MAGI-1 is built upon the Diffusion Transformer, incorporating several key innovations to enhance training efficiency and stability at scale. These advancements include Block-Causal Attention, Parallel Attention Block, QK-Norm and GQA, Sandwich Normalization in FFN, SwiGLU, and Softcap Modulation. For more details, please refer to the [technical report.](https://static.magi.world/static/files/MAGI_1.pdf)
69
+ <div align="center">
70
+ <img src="figures/dit_architecture.png" alt="diffusion model architecture" width="500" />
71
+ </div>
72
+
73
+ ### Distillation Algorithm
74
+
75
+ We adopt a shortcut distillation approach that trains a single velocity-based model to support variable inference budgets. By enforcing a self-consistency constraint—equating one large step with two smaller steps—the model learns to approximate flow-matching trajectories across multiple step sizes. During training, step sizes are cyclically sampled from {64, 32, 16, 8}, and classifier-free guidance distillation is incorporated to preserve conditional alignment. This enables efficient inference with minimal loss in fidelity.
76
+
77
+
78
+ ## 3. Model Zoo
79
+
80
+ We provide the pre-trained weights for MAGI-1, including the 24B and 4.5B models, as well as the corresponding distill and distill+quant models. The model weight links are shown in the table.
81
+
82
+ | Model | Link | Recommend Machine |
83
+ | ----------------------------- | ------------------------------------------------------------ | ------------------------------- |
84
+ | T5 | [T5](https://huggingface.co/sand-ai/MAGI-1/tree/main/ckpt/t5) | - |
85
+ | MAGI-1-VAE | [MAGI-1-VAE](https://huggingface.co/sand-ai/MAGI-1/tree/main/ckpt/vae) | - |
86
+ | MAGI-1-24B | [MAGI-1-24B](https://huggingface.co/sand-ai/MAGI-1/tree/main/ckpt/magi/24B_base) | H100/H800 \* 8 |
87
+ | MAGI-1-24B-distill | [MAGI-1-24B-distill](https://huggingface.co/sand-ai/MAGI-1/tree/main/ckpt/magi/24B_distill) | H100/H800 \* 8 |
88
+ | MAGI-1-24B-distill+fp8_quant | [MAGI-1-24B-distill+quant](https://huggingface.co/sand-ai/MAGI-1/tree/main/ckpt/magi/24B_distill_quant) | H100/H800 \* 4 or RTX 4090 \* 8 |
89
+ | MAGI-1-4.5B | MAGI-1-4.5B | RTX 4090 \* 1 |
90
+
91
+ ## 4. Evaluation
92
+
93
+ ### In-house Human Evaluation
94
+
95
+ MAGI-1 achieves state-of-the-art performance among open-source models (surpassing Wan-2.1 and significantly outperforming Hailuo and HunyuanVideo), particularly excelling in instruction following and motion quality, positioning it as a strong potential competitor to closed-source commercial models such as Kling.
96
+
97
+ ![inhouse human evaluation](figures/inhouse_human_evaluation.png)
98
+
99
+ ### Physical Evaluation
100
+
101
+ Thanks to the natural advantages of autoregressive architecture, Magi achieves far superior precision in predicting physical behavior through video continuation—significantly outperforming all existing models.
102
+
103
+ | Model | Phys. IQ Score ↑ | Spatial IoU ↑ | Spatio Temporal ↑ | Weighted Spatial IoU ↑ | MSE ↓ |
104
+ |----------------|------------------|---------------|-------------------|-------------------------|--------|
105
+ | **V2V Models** | | | | | |
106
+ | **Magi (V2V)** | **56.02** | **0.367** | **0.270** | **0.304** | **0.005** |
107
+ | VideoPoet (V2V)| 29.50 | 0.204 | 0.164 | 0.137 | 0.010 |
108
+ | **I2V Models** | | | | | |
109
+ | **Magi (I2V)** | **30.23** | **0.203** | **0.151** | **0.154** | **0.012** |
110
+ | Kling1.6 (I2V) | 23.64 | 0.197 | 0.086 | 0.144 | 0.025 |
111
+ | VideoPoet (I2V)| 20.30 | 0.141 | 0.126 | 0.087 | 0.012 |
112
+ | Gen 3 (I2V) | 22.80 | 0.201 | 0.115 | 0.116 | 0.015 |
113
+ | Wan2.1 (I2V) | 20.89 | 0.153 | 0.100 | 0.112 | 0.023 |
114
+ | Sora (I2V) | 10.00 | 0.138 | 0.047 | 0.063 | 0.030 |
115
+ | **GroundTruth**| **100.0** | **0.678** | **0.535** | **0.577** | **0.002** |
116
+
117
+
118
+ ## 5. How to run
119
+
120
+ ### Environment Preparation
121
+
122
+ We provide two ways to run MAGI-1, with the Docker environment being the recommended option.
123
+
124
+ **Run with Docker Environment (Recommend)**
125
+
126
+ ```bash
127
+ docker pull sandai/magi:latest
128
+
129
+ docker run -it --gpus all --privileged --shm-size=32g --name magi --net=host --ipc=host --ulimit memlock=-1 --ulimit stack=6710886 sandai/magi:latest /bin/bash
130
+ ```
131
+
132
+ **Run with Source Code**
133
+
134
+ ```bash
135
+ # Create a new environment
136
+ conda create -n magi python==3.10.12
137
+
138
+ # Install pytorch
139
+ conda install pytorch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 pytorch-cuda=12.4 -c pytorch -c nvidia
140
+
141
+ # Install other dependencies
142
+ pip install -r requirements.txt
143
+
144
+ # Install ffmpeg
145
+ conda install -c conda-forge ffmpeg=4.4
146
+
147
+ # Install MagiAttention, for more information, please refer to https://github.com/SandAI-org/MagiAttention#
148
+ git clone [email protected]:SandAI-org/MagiAttention.git
149
+ cd MagiAttention
150
+ git submodule update --init --recursive
151
+ pip install --no-build-isolation .
152
+ ```
153
+
154
+ ### Inference Command
155
+
156
+ To run the `MagiPipeline`, you can control the input and output by modifying the parameters in the `example/24B/run.sh` or `example/4.5B/run.sh` script. Below is an explanation of the key parameters:
157
+
158
+ #### Parameter Descriptions
159
+
160
+ - `--config_file`: Specifies the path to the configuration file, which contains model configuration parameters, e.g., `example/24B/24B_config.json`.
161
+ - `--mode`: Specifies the mode of operation. Available options are:
162
+ - `t2v`: Text to Video
163
+ - `i2v`: Image to Video
164
+ - `v2v`: Video to Video
165
+ - `--prompt`: The text prompt used for video generation, e.g., `"Good Boy"`.
166
+ - `--image_path`: Path to the image file, used only in `i2v` mode.
167
+ - `--prefix_video_path`: Path to the prefix video file, used only in `v2v` mode.
168
+ - `--output_path`: Path where the generated video file will be saved.
169
+
170
+ #### Bash Script
171
+
172
+ ```bash
173
+ #!/bin/bash
174
+ # Run 24B MAGI-1 model
175
+ bash example/24B/run.sh
176
+
177
+ # Run 4.5B MAGI-1 model
178
+ bash example/4.5B/run.sh
179
+ ```
180
+
181
+ #### Customizing Parameters
182
+
183
+ You can modify the parameters in `run.sh` as needed. For example:
184
+
185
+ - To use the Image to Video mode (`i2v`), set `--mode` to `i2v` and provide `--image_path`:
186
+ ```bash
187
+ --mode i2v \
188
+ --image_path example/assets/image.jpeg \
189
+ ```
190
+
191
+ - To use the Video to Video mode (`v2v`), set `--mode` to `v2v` and provide `--prefix_video_path`:
192
+ ```bash
193
+ --mode v2v \
194
+ --prefix_video_path example/assets/prefix_video.mp4 \
195
+ ```
196
+
197
+ By adjusting these parameters, you can flexibly control the input and output to meet different requirements.
198
+
199
+ ### Some Useful Configs (for config.json)
200
+
201
+ | Config | Help |
202
+ | -------------- | ------------------------------------------------------------ |
203
+ | seed | Random seed used for video generation |
204
+ | video_size_h | Height of the video |
205
+ | video_size_w | Width of the video |
206
+ | num_frames | Controls the duration of generated video |
207
+ | fps | Frames per second, 4 video frames correspond to 1 latent_frame |
208
+ | cfg_number | Base model uses cfg_number==2, distill and quant model uses cfg_number=1 |
209
+ | load | Directory containing a model checkpoint. |
210
+ | t5_pretrained | Path to load pretrained T5 model |
211
+ | vae_pretrained | Path to load pretrained VAE model |
212
+
213
+
214
+ ## 6. License
215
+
216
+ This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.
217
+
218
+ ## 7. Citation
219
+
220
+ If you find our code or model useful in your research, please cite:
221
+
222
+ ```bibtex
223
+ @misc{magi1,
224
+ title={MAGI-1: Autoregressive Video Generation at Scale},
225
+ author={Sand-AI},
226
+ year={2025},
227
+ url={https://static.magi.world/static/files/MAGI_1.pdf},
228
+ }
229
+ ```
230
+
231
+ ## 8. Contact
232
+
233
+ If you have any questions, please feel free to raise an issue or contact us at [[email protected]]([email protected]) .
ckpt/magi/24B_base/inference_weight/model-00001-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e55c7996a8f517349a9c11b702f4217d27b8dc2bb4bbb99ab097dd66872623c
3
+ size 4988160184
ckpt/magi/24B_base/inference_weight/model-00002-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7abb141cd258434d91ed02c4a95076da7262ce3fe9f3a49004e457818d0de1a0
3
+ size 7247764000
ckpt/magi/24B_base/inference_weight/model-00003-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cefe5c35cbf553d714c04f3e4b017ccf93a472c9dfa35763e09f3626ca7822b
3
+ size 19327358992
ckpt/magi/24B_base/inference_weight/model-00004-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c063bb74aae9a4982f69a7c72dd4bcf84d6ecece1fa74657adab47a7bef63081
3
+ size 9663682528
ckpt/magi/24B_base/inference_weight/model-00005-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:151c2f417ae3ce95e9f73604313c830f2035de69d131cb39ccb4b5a188415568
3
+ size 3623890200
ckpt/magi/24B_base/inference_weight/model-00006-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f83d1a2cf25923f6f38aad7f4a611dc33c972c8b0cb3b2151e0aeebed363d89a
3
+ size 3028420248
ckpt/magi/24B_base/inference_weight/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
ckpt/magi/24B_distill/inference_weight.distill/model-00001-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:351f466d82f932b95d56f2c81e5a3310a45ce90f0d862b65c7a3ed74678133a6
3
+ size 4988160184
ckpt/magi/24B_distill/inference_weight.distill/model-00002-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d68ac40a3236ad1145f05116734af056ce9fc1dc93f1f4e7d530a67af1bad9c
3
+ size 7247764000
ckpt/magi/24B_distill/inference_weight.distill/model-00003-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f19da7622e331aeb937da8c388d55ac4aa518ab0d91ce4fc7091fb2b5787187
3
+ size 19327358992
ckpt/magi/24B_distill/inference_weight.distill/model-00004-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49304615535a77c1ebeb8fb0468578f710886eae6591ca0e70e50516d8813233
3
+ size 9663682528
ckpt/magi/24B_distill/inference_weight.distill/model-00005-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2b97e49e10350d0080861d2b2e394f555ba7ea9f00a208b8d8aa2104739bdae
3
+ size 4831856536
ckpt/magi/24B_distill/inference_weight.distill/model-00006-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c6a7239aebb1dd5213c1d6183830fc4c46b0630411ededa21092fe32ac609e2
3
+ size 1820453696
ckpt/magi/24B_distill/inference_weight.distill/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
ckpt/magi/24B_distill_quant/inference_weight.fp8.distill/model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ffecb6e1191507fbe9d57d06cc274b3c50590c548f7a453bcdd69172275062c
3
+ size 9836542920
ckpt/magi/24B_distill_quant/inference_weight.fp8.distill/model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f7038c048711a35071b110c46dbb1ed55a9937213c5712171423cff490b9286
3
+ size 9973482972
ckpt/magi/24B_distill_quant/inference_weight.fp8.distill/model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d16af830c3357dc84e22ee00e8b898049ecdfbb8c2e6d7eb9adcd5e3f3c6eae9
3
+ size 6664159332
ckpt/magi/24B_distill_quant/inference_weight.fp8.distill/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
ckpt/t5/t5-v1_1-xxl/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/t5-v1_1-xxl",
3
+ "architectures": [
4
+ "T5EncoderModel"
5
+ ],
6
+ "d_ff": 10240,
7
+ "d_kv": 64,
8
+ "d_model": 4096,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "num_decoder_layers": 24,
20
+ "num_heads": 64,
21
+ "num_layers": 24,
22
+ "output_past": true,
23
+ "pad_token_id": 0,
24
+ "relative_attention_max_distance": 128,
25
+ "relative_attention_num_buckets": 32,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.21.1",
29
+ "use_cache": true,
30
+ "vocab_size": 32128
31
+ }
ckpt/t5/t5-v1_1-xxl/pytorch_model-00001-of-00002.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f71ad0624095dae788b1023081dda1b4040bd24f7244a5b5b46eebc09825839
3
+ size 9452285635
ckpt/t5/t5-v1_1-xxl/pytorch_model-00002-of-00002.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f68f80678299ac59f69b3550ebd47b966571920d8f9e71f42ab61fabaaed868
3
+ size 9597031749
ckpt/t5/t5-v1_1-xxl/pytorch_model.bin.index.json ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 19575627776
4
+ },
5
+ "weight_map": {
6
+ "encoder.block.0.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
7
+ "encoder.block.0.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
8
+ "encoder.block.0.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
9
+ "encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00001-of-00002.bin",
10
+ "encoder.block.0.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
11
+ "encoder.block.0.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
12
+ "encoder.block.0.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
13
+ "encoder.block.0.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
14
+ "encoder.block.0.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
15
+ "encoder.block.0.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
16
+ "encoder.block.1.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
17
+ "encoder.block.1.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
18
+ "encoder.block.1.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
19
+ "encoder.block.1.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
20
+ "encoder.block.1.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
21
+ "encoder.block.1.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
22
+ "encoder.block.1.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
23
+ "encoder.block.1.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
24
+ "encoder.block.1.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
25
+ "encoder.block.10.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
26
+ "encoder.block.10.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
27
+ "encoder.block.10.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
28
+ "encoder.block.10.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
29
+ "encoder.block.10.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
30
+ "encoder.block.10.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
31
+ "encoder.block.10.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
32
+ "encoder.block.10.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
33
+ "encoder.block.10.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
34
+ "encoder.block.11.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
35
+ "encoder.block.11.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
36
+ "encoder.block.11.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
37
+ "encoder.block.11.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
38
+ "encoder.block.11.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
39
+ "encoder.block.11.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
40
+ "encoder.block.11.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
41
+ "encoder.block.11.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
42
+ "encoder.block.11.layer.1.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
43
+ "encoder.block.12.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00002.bin",
44
+ "encoder.block.12.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00002.bin",
45
+ "encoder.block.12.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00002.bin",
46
+ "encoder.block.12.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00002.bin",
47
+ "encoder.block.12.layer.0.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
48
+ "encoder.block.12.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00002.bin",
49
+ "encoder.block.12.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
50
+ "encoder.block.12.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
51
+ "encoder.block.12.layer.1.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
52
+ "encoder.block.13.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00002.bin",
53
+ "encoder.block.13.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00002.bin",
54
+ "encoder.block.13.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00002.bin",
55
+ "encoder.block.13.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00002.bin",
56
+ "encoder.block.13.layer.0.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
57
+ "encoder.block.13.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00002.bin",
58
+ "encoder.block.13.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
59
+ "encoder.block.13.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
60
+ "encoder.block.13.layer.1.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
61
+ "encoder.block.14.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00002.bin",
62
+ "encoder.block.14.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00002.bin",
63
+ "encoder.block.14.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00002.bin",
64
+ "encoder.block.14.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00002.bin",
65
+ "encoder.block.14.layer.0.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
66
+ "encoder.block.14.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00002.bin",
67
+ "encoder.block.14.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
68
+ "encoder.block.14.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
69
+ "encoder.block.14.layer.1.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
70
+ "encoder.block.15.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00002.bin",
71
+ "encoder.block.15.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00002.bin",
72
+ "encoder.block.15.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00002.bin",
73
+ "encoder.block.15.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00002.bin",
74
+ "encoder.block.15.layer.0.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
75
+ "encoder.block.15.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00002.bin",
76
+ "encoder.block.15.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
77
+ "encoder.block.15.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
78
+ "encoder.block.15.layer.1.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
79
+ "encoder.block.16.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00002.bin",
80
+ "encoder.block.16.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00002.bin",
81
+ "encoder.block.16.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00002.bin",
82
+ "encoder.block.16.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00002.bin",
83
+ "encoder.block.16.layer.0.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
84
+ "encoder.block.16.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00002.bin",
85
+ "encoder.block.16.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
86
+ "encoder.block.16.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
87
+ "encoder.block.16.layer.1.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
88
+ "encoder.block.17.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00002.bin",
89
+ "encoder.block.17.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00002.bin",
90
+ "encoder.block.17.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00002.bin",
91
+ "encoder.block.17.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00002.bin",
92
+ "encoder.block.17.layer.0.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
93
+ "encoder.block.17.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00002.bin",
94
+ "encoder.block.17.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
95
+ "encoder.block.17.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
96
+ "encoder.block.17.layer.1.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
97
+ "encoder.block.18.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00002.bin",
98
+ "encoder.block.18.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00002.bin",
99
+ "encoder.block.18.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00002.bin",
100
+ "encoder.block.18.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00002.bin",
101
+ "encoder.block.18.layer.0.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
102
+ "encoder.block.18.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00002.bin",
103
+ "encoder.block.18.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
104
+ "encoder.block.18.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
105
+ "encoder.block.18.layer.1.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
106
+ "encoder.block.19.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00002.bin",
107
+ "encoder.block.19.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00002.bin",
108
+ "encoder.block.19.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00002.bin",
109
+ "encoder.block.19.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00002.bin",
110
+ "encoder.block.19.layer.0.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
111
+ "encoder.block.19.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00002.bin",
112
+ "encoder.block.19.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
113
+ "encoder.block.19.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
114
+ "encoder.block.19.layer.1.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
115
+ "encoder.block.2.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
116
+ "encoder.block.2.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
117
+ "encoder.block.2.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
118
+ "encoder.block.2.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
119
+ "encoder.block.2.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
120
+ "encoder.block.2.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
121
+ "encoder.block.2.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
122
+ "encoder.block.2.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
123
+ "encoder.block.2.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
124
+ "encoder.block.20.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00002.bin",
125
+ "encoder.block.20.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00002.bin",
126
+ "encoder.block.20.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00002.bin",
127
+ "encoder.block.20.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00002.bin",
128
+ "encoder.block.20.layer.0.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
129
+ "encoder.block.20.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00002.bin",
130
+ "encoder.block.20.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
131
+ "encoder.block.20.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
132
+ "encoder.block.20.layer.1.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
133
+ "encoder.block.21.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00002.bin",
134
+ "encoder.block.21.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00002.bin",
135
+ "encoder.block.21.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00002.bin",
136
+ "encoder.block.21.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00002.bin",
137
+ "encoder.block.21.layer.0.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
138
+ "encoder.block.21.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00002.bin",
139
+ "encoder.block.21.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
140
+ "encoder.block.21.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
141
+ "encoder.block.21.layer.1.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
142
+ "encoder.block.22.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00002.bin",
143
+ "encoder.block.22.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00002.bin",
144
+ "encoder.block.22.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00002.bin",
145
+ "encoder.block.22.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00002.bin",
146
+ "encoder.block.22.layer.0.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
147
+ "encoder.block.22.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00002.bin",
148
+ "encoder.block.22.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
149
+ "encoder.block.22.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
150
+ "encoder.block.22.layer.1.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
151
+ "encoder.block.23.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00002.bin",
152
+ "encoder.block.23.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00002.bin",
153
+ "encoder.block.23.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00002.bin",
154
+ "encoder.block.23.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00002.bin",
155
+ "encoder.block.23.layer.0.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
156
+ "encoder.block.23.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00002.bin",
157
+ "encoder.block.23.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
158
+ "encoder.block.23.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
159
+ "encoder.block.23.layer.1.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
160
+ "encoder.block.3.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
161
+ "encoder.block.3.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
162
+ "encoder.block.3.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
163
+ "encoder.block.3.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
164
+ "encoder.block.3.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
165
+ "encoder.block.3.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
166
+ "encoder.block.3.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
167
+ "encoder.block.3.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
168
+ "encoder.block.3.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
169
+ "encoder.block.4.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
170
+ "encoder.block.4.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
171
+ "encoder.block.4.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
172
+ "encoder.block.4.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
173
+ "encoder.block.4.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
174
+ "encoder.block.4.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
175
+ "encoder.block.4.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
176
+ "encoder.block.4.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
177
+ "encoder.block.4.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
178
+ "encoder.block.5.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
179
+ "encoder.block.5.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
180
+ "encoder.block.5.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
181
+ "encoder.block.5.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
182
+ "encoder.block.5.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
183
+ "encoder.block.5.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
184
+ "encoder.block.5.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
185
+ "encoder.block.5.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
186
+ "encoder.block.5.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
187
+ "encoder.block.6.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
188
+ "encoder.block.6.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
189
+ "encoder.block.6.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
190
+ "encoder.block.6.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
191
+ "encoder.block.6.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
192
+ "encoder.block.6.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
193
+ "encoder.block.6.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
194
+ "encoder.block.6.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
195
+ "encoder.block.6.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
196
+ "encoder.block.7.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
197
+ "encoder.block.7.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
198
+ "encoder.block.7.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
199
+ "encoder.block.7.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
200
+ "encoder.block.7.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
201
+ "encoder.block.7.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
202
+ "encoder.block.7.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
203
+ "encoder.block.7.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
204
+ "encoder.block.7.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
205
+ "encoder.block.8.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
206
+ "encoder.block.8.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
207
+ "encoder.block.8.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
208
+ "encoder.block.8.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
209
+ "encoder.block.8.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
210
+ "encoder.block.8.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
211
+ "encoder.block.8.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
212
+ "encoder.block.8.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
213
+ "encoder.block.8.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
214
+ "encoder.block.9.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
215
+ "encoder.block.9.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
216
+ "encoder.block.9.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
217
+ "encoder.block.9.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
218
+ "encoder.block.9.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
219
+ "encoder.block.9.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
220
+ "encoder.block.9.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
221
+ "encoder.block.9.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
222
+ "encoder.block.9.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
223
+ "encoder.embed_tokens.weight": "pytorch_model-00001-of-00002.bin",
224
+ "encoder.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
225
+ "shared.weight": "pytorch_model-00001-of-00002.bin"
226
+ }
227
+ }
ckpt/t5/t5-v1_1-xxl/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}
ckpt/t5/t5-v1_1-xxl/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
ckpt/t5/t5-v1_1-xxl/t5-v1_1-xxl/pytorch_model-00001-of-00002.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe6002bb05bfd4d2e2dc79468b6320aff4ae6798589bb88129f64294a883b558
3
+ size 5019090944
ckpt/t5/t5-v1_1-xxl/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "model_max_length": 512, "name_or_path": "t5-small"}
ckpt/vae/config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "ViTVAE",
3
+ "_diffusers_version": "0.28.2",
4
+ "ddconfig": {
5
+ "conv_last_layer": true,
6
+ "depth": 24,
7
+ "double_z": true,
8
+ "embed_dim": 1024,
9
+ "in_chans": 3,
10
+ "ln_in_attn": true,
11
+ "mlp_ratio": 4,
12
+ "norm_code": false,
13
+ "num_heads": 16,
14
+ "patch_length": 4,
15
+ "patch_size": 8,
16
+ "qkv_bias": true,
17
+ "video_length": 16,
18
+ "video_size": 256,
19
+ "z_chans": 16
20
+ },
21
+ "model_type": "vit"
22
+ }
ckpt/vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5092a7bcd112b7a743235bddad17d30b497da48b70eae51c5340bae8294b761
3
+ size 2455072868
figures/algorithm.png ADDED

Git LFS Details

  • SHA256: 7a44e9b01116d3207d8e190119464e1e49cf62d4ad67acd30767bc6984724e95
  • Pointer size: 132 Bytes
  • Size of remote file: 3.56 MB
figures/dit_architecture.png ADDED

Git LFS Details

  • SHA256: 1acbcc40f77b3167246ed1a734c9a3aa8566d7035765ab1bede654044443bd61
  • Pointer size: 131 Bytes
  • Size of remote file: 248 kB
figures/inhouse_human_evaluation.png ADDED

Git LFS Details

  • SHA256: 657aa4a189f7db325a5acc967fad6b40ad22d55855ecbe038f27235abf9be3aa
  • Pointer size: 131 Bytes
  • Size of remote file: 304 kB
figures/logo_black.png ADDED