rahul7star commited on
Commit
30f8a30
·
verified ·
1 Parent(s): 54408c0

Migrated from GitHub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +9 -0
  2. Custom Resolutions Instructions.txt +16 -0
  3. LICENSE.txt +17 -0
  4. ORIGINAL_README.md +244 -0
  5. assets/comp_effic.png +3 -0
  6. assets/data_for_diff_stage.jpg +3 -0
  7. assets/i2v_res.png +3 -0
  8. assets/logo.png +0 -0
  9. assets/t2v_res.jpg +3 -0
  10. assets/vben_vs_sota.png +3 -0
  11. assets/video_dit_arch.jpg +3 -0
  12. assets/video_vae_res.jpg +3 -0
  13. configs/fantasy.json +15 -0
  14. configs/flf2v_720p.json +15 -0
  15. configs/i2v.json +14 -0
  16. configs/i2v_2_2.json +14 -0
  17. configs/multitalk.json +15 -0
  18. configs/phantom_1.3B.json +14 -0
  19. configs/phantom_14B.json +14 -0
  20. configs/sky_df_1.3.json +14 -0
  21. configs/sky_df_14B.json +14 -0
  22. configs/t2v.json +14 -0
  23. configs/t2v_1.3B.json +14 -0
  24. configs/vace_1.3B.json +16 -0
  25. configs/vace_14B.json +16 -0
  26. configs/vace_multitalk_14B.json +17 -0
  27. defaults/ReadMe.txt +13 -0
  28. defaults/fantasy.json +12 -0
  29. defaults/flf2v_720p.json +16 -0
  30. defaults/flux.json +16 -0
  31. defaults/flux_dev_kontext.json +19 -0
  32. defaults/flux_krea.json +16 -0
  33. defaults/flux_schnell.json +17 -0
  34. defaults/fun_inp.json +13 -0
  35. defaults/fun_inp_1.3B.json +11 -0
  36. defaults/hunyuan.json +12 -0
  37. defaults/hunyuan_avatar.json +12 -0
  38. defaults/hunyuan_custom.json +12 -0
  39. defaults/hunyuan_custom_audio.json +12 -0
  40. defaults/hunyuan_custom_edit.json +12 -0
  41. defaults/hunyuan_i2v.json +12 -0
  42. defaults/hunyuan_t2v_accvideo.json +30 -0
  43. defaults/hunyuan_t2v_fast.json +31 -0
  44. defaults/i2v.json +13 -0
  45. defaults/i2v_2_2.json +24 -0
  46. defaults/i2v_720p.json +14 -0
  47. defaults/i2v_fusionix.json +10 -0
  48. defaults/ltxv_13B.json +19 -0
  49. defaults/ltxv_distilled.json +15 -0
  50. defaults/moviigen.json +16 -0
.gitattributes CHANGED
@@ -33,3 +33,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/comp_effic.png filter=lfs diff=lfs merge=lfs -text
37
+ assets/data_for_diff_stage.jpg filter=lfs diff=lfs merge=lfs -text
38
+ assets/i2v_res.png filter=lfs diff=lfs merge=lfs -text
39
+ assets/t2v_res.jpg filter=lfs diff=lfs merge=lfs -text
40
+ assets/vben_vs_sota.png filter=lfs diff=lfs merge=lfs -text
41
+ assets/video_dit_arch.jpg filter=lfs diff=lfs merge=lfs -text
42
+ assets/video_vae_res.jpg filter=lfs diff=lfs merge=lfs -text
43
+ preprocessing/matanyone/tutorial_multi_targets.mp4 filter=lfs diff=lfs merge=lfs -text
44
+ preprocessing/matanyone/tutorial_single_target.mp4 filter=lfs diff=lfs merge=lfs -text
Custom Resolutions Instructions.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You can override the choice of Resolutions offered by WanGP, if you create a file "resolutions.json" in the main WanGP folder.
2
+ This file is composed of a list of 2 elements sublists. Each 2 elements sublist should have the format ["Label", "WxH"] where W, H are respectively the Width and Height of the resolution. Please make sure that W and H are multiples of 16. The letter "x" should be placed inbetween these two dimensions.
3
+
4
+ Here is below a sample "resolutions.json" file :
5
+
6
+ [
7
+ ["1280x720 (16:9, 720p)", "1280x720"],
8
+ ["720x1280 (9:16, 720p)", "720x1280"],
9
+ ["1024x1024 (1:1, 720p)", "1024x1024"],
10
+ ["1280x544 (21:9, 720p)", "1280x544"],
11
+ ["544x1280 (9:21, 720p)", "544x1280"],
12
+ ["1104x832 (4:3, 720p)", "1104x832"],
13
+ ["832x1104 (3:4, 720p)", "832x1104"],
14
+ ["960x960 (1:1, 720p)", "960x960"],
15
+ ["832x480 (16:9, 480p)", "832x480"]
16
+ ]
LICENSE.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FREE for Non Commercial USE
2
+
3
+ You are free to:
4
+ - Share — copy and redistribute the material in any medium or format
5
+ - Adapt — remix, transform, and build upon the material
6
+ The licensor cannot revoke these freedoms as long as you follow the license terms.
7
+
8
+ Under the following terms:
9
+ - Attribution — You must give appropriate credit , provide a link to the license, and indicate if changes were made . You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.
10
+ NonCommercial — You may not use the material for commercial purposes .
11
+
12
+ - No additional restrictions — You may not apply legal terms or technological measures that legally restrict others from doing anything the license permits.
13
+ Notices:
14
+
15
+ - You do not have to comply with the license for elements of the material in the public domain or where your use is permitted by an applicable exception or limitation .
16
+
17
+ No warranties are given. The license may not give you all of the permissions necessary for your intended use. For example, other rights such as publicity, privacy, or moral rights may limit how you use the material.
ORIGINAL_README.md ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # WanGP
2
+
3
+ -----
4
+ <p align="center">
5
+ <b>WanGP by DeepBeepMeep : The best Open Source Video Generative Models Accessible to the GPU Poor</b>
6
+ </p>
7
+
8
+ WanGP supports the Wan (and derived models), Hunyuan Video and LTV Video models with:
9
+ - Low VRAM requirements (as low as 6 GB of VRAM is sufficient for certain models)
10
+ - Support for old GPUs (RTX 10XX, 20xx, ...)
11
+ - Very Fast on the latest GPUs
12
+ - Easy to use Full Web based interface
13
+ - Auto download of the required model adapted to your specific architecture
14
+ - Tools integrated to facilitate Video Generation : Mask Editor, Prompt Enhancer, Temporal and Spatial Generation, MMAudio, Video Browser, Pose / Depth / Flow extractor
15
+ - Loras Support to customize each model
16
+ - Queuing system : make your shopping list of videos to generate and come back later
17
+
18
+ **Discord Server to get Help from Other Users and show your Best Videos:** https://discord.gg/g7efUW9jGV
19
+
20
+ **Follow DeepBeepMeep on Twitter/X to get the Latest News**: https://x.com/deepbeepmeep
21
+
22
+ ## 🔥 Latest Updates :
23
+ ### August 4 2025: WanGP v7.6 - Remuxed
24
+
25
+ With this new version you won't have any excuse if there is no sound in your video.
26
+
27
+ *Continue Video* now works with any video that has already some sound (hint: Multitalk ).
28
+
29
+ Also, on top of MMaudio and the various sound driven models I have added the ability to use your own soundtrack.
30
+
31
+ As a result you can apply a different sound source on each new video segment when doing a *Continue Video*.
32
+
33
+ For instance:
34
+ - first video part: use Multitalk with two people speaking
35
+ - second video part: you apply your own soundtrack which will gently follow the multitalk conversation
36
+ - third video part: you use Vace effect and its corresponding control audio will be concatenated to the rest of the audio
37
+
38
+ To multiply the combinations I have also implemented *Continue Video* with the various image2video models.
39
+
40
+ Also:
41
+ - End Frame support added for LTX Video models
42
+ - Loras can now be targetted specifically at the High noise or Low noise models with Wan 2.2, check the Loras and Finetune guides
43
+ - Flux Krea Dev support
44
+
45
+ ### July 30 2025: WanGP v7.5: Just another release ... Wan 2.2 part 2
46
+ Here is now Wan 2.2 image2video a very good model if you want to set Start and End frames. Two Wan 2.2 models delivered, only one to go ...
47
+
48
+ Please note that although it is an image2video model it is structurally very close to Wan 2.2 text2video (same layers with only a different initial projection). Given that Wan 2.1 image2video loras don't work too well (half of their tensors are not supported), I have decided that this model will look for its loras in the text2video loras folder instead of the image2video folder.
49
+
50
+ I have also optimized RAM management with Wan 2.2 so that loras and modules will be loaded only once in RAM and Reserved RAM, this saves up to 5 GB of RAM which can make a difference...
51
+
52
+ And this time I really removed Vace Cocktail Light which gave a blurry vision.
53
+
54
+ ### July 29 2025: WanGP v7.4: Just another release ... Wan 2.2 Preview
55
+ Wan 2.2 is here. The good news is that WanGP wont require a single byte of extra VRAM to run it and it will be as fast as Wan 2.1. The bad news is that you will need much more RAM if you want to leverage entirely this new model since it has twice has many parameters.
56
+
57
+ So here is a preview version of Wan 2.2 that is without the 5B model and Wan 2.2 image to video for the moment.
58
+
59
+ However as I felt bad to deliver only half of the wares, I gave you instead .....** Wan 2.2 Vace Experimental Cocktail** !
60
+
61
+ Very good surprise indeed, the loras and Vace partially work with Wan 2.2. We will need to wait for the official Vace 2.2 release since some Vace features are broken like identity preservation
62
+
63
+ Bonus zone: Flux multi images conditions has been added, or maybe not if I broke everything as I have been distracted by Wan...
64
+
65
+ 7.4 update: I forgot to update the version number. I also removed Vace Cocktail light which didnt work well.
66
+
67
+ ### July 27 2025: WanGP v7.3 : Interlude
68
+ While waiting for Wan 2.2, you will appreciate the model selection hierarchy which is very useful to collect even more models. You will also appreciate that WanGP remembers which model you used last in each model family.
69
+
70
+ ### July 26 2025: WanGP v7.2 : Ode to Vace
71
+ I am really convinced that Vace can do everything the other models can do and in a better way especially as Vace can be combined with Multitalk.
72
+
73
+ Here are some new Vace improvements:
74
+ - I have provided a default finetune named *Vace Cocktail* which is a model created on the fly using the Wan text 2 video model and the Loras used to build FusioniX. The weight of the *Detail Enhancer* Lora has been reduced to improve identity preservation. Copy the model definition in *defaults/vace_14B_cocktail.json* in the *finetunes/* folder to change the Cocktail composition. Cocktail contains already some Loras acccelerators so no need to add on top a Lora Accvid, Causvid or Fusionix, ... . The whole point of Cocktail is to be able to build you own FusioniX (which originally is a combination of 4 loras) but without the inconvenient of FusioniX.
75
+ - Talking about identity preservation, it tends to go away when one generates a single Frame instead of a Video which is shame for our Vace photoshop. But there is a solution : I have added an Advanced Quality option, that tells WanGP to generate a little more than a frame (it will still keep only the first frame). It will be a little slower but you will be amazed how Vace Cocktail combined with this option will preserve identities (bye bye *Phantom*).
76
+ - As in practise I have observed one switches frequently between *Vace text2video* and *Vace text2image* I have put them in the same place they are now just one tab away, no need to reload the model. Likewise *Wan text2video* and *Wan tex2image* have been merged.
77
+ - Color fixing when using Sliding Windows. A new postprocessing *Color Correction* applied automatically by default (you can disable it in the *Advanced tab Sliding Window*) will try to match the colors of the new window with that of the previous window. It doesnt fix all the unwanted artifacts of the new window but at least this makes the transition smoother. Thanks to the multitalk team for the original code.
78
+
79
+ Also you will enjoy our new real time statistics (CPU / GPU usage, RAM / VRAM used, ... ). Many thanks to **Redtash1** for providing the framework for this new feature ! You need to go in the Config tab to enable real time stats.
80
+
81
+
82
+ ### July 21 2025: WanGP v7.12
83
+ - Flux Family Reunion : *Flux Dev* and *Flux Schnell* have been invited aboard WanGP. To celebrate that, Loras support for the Flux *diffusers* format has also been added.
84
+
85
+ - LTX Video upgraded to version 0.9.8: you can now generate 1800 frames (1 min of video !) in one go without a sliding window. With the distilled model it will take only 5 minutes with a RTX 4090 (you will need 22 GB of VRAM though). I have added options to select higher humber frames if you want to experiment (go to Configuration Tab / General / Increase the Max Number of Frames, change the value and restart the App)
86
+
87
+ - LTX Video ControlNet : it is a Control Net that allows you for instance to transfer a Human motion or Depth from a control video. It is not as powerful as Vace but can produce interesting things especially as now you can generate quickly a 1 min video. Under the scene IC-Loras (see below) for Pose, Depth and Canny are automatically loaded for you, no need to add them.
88
+
89
+ - LTX IC-Lora support: these are special Loras that consumes a conditional image or video
90
+ Beside the pose, depth and canny IC-Loras transparently loaded there is the *detailer* (https://huggingface.co/Lightricks/LTX-Video-ICLoRA-detailer-13b-0.9.8) which is basically an upsampler. Add the *detailer* as a Lora and use LTX Raw Format as control net choice to use it.
91
+
92
+ - Matanyone is now also for the GPU Poor as its VRAM requirements have been divided by 2! (7.12 shadow update)
93
+
94
+ - Easier way to select video resolution
95
+
96
+
97
+ ### July 15 2025: WanGP v7.0 is an AI Powered Photoshop
98
+ This release turns the Wan models into Image Generators. This goes way more than allowing to generate a video made of single frame :
99
+ - Multiple Images generated at the same time so that you can choose the one you like best.It is Highly VRAM optimized so that you can generate for instance 4 720p Images at the same time with less than 10 GB
100
+ - With the *image2image* the original text2video WanGP becomes an image upsampler / restorer
101
+ - *Vace image2image* comes out of the box with image outpainting, person / object replacement, ...
102
+ - You can use in one click a newly Image generated as Start Image or Reference Image for a Video generation
103
+
104
+ And to complete the full suite of AI Image Generators, Ladies and Gentlemen please welcome for the first time in WanGP : **Flux Kontext**.\
105
+ As a reminder Flux Kontext is an image editor : give it an image and a prompt and it will do the change for you.\
106
+ This highly optimized version of Flux Kontext will make you feel that you have been cheated all this time as WanGP Flux Kontext requires only 8 GB of VRAM to generate 4 images at the same time with no need for quantization.
107
+
108
+ WanGP v7 comes with *Image2image* vanilla and *Vace FusinoniX*. However you can build your own finetune where you will combine a text2video or Vace model with any combination of Loras.
109
+
110
+ Also in the news:
111
+ - You can now enter the *Bbox* for each speaker in *Multitalk* to precisely locate who is speaking. And to save some headaches the *Image Mask generator* will give you the *Bbox* coordinates of an area you have selected.
112
+ - *Film Grain* post processing to add a vintage look at your video
113
+ - *First Last Frame to Video* model should work much better now as I have discovered rencently its implementation was not complete
114
+ - More power for the finetuners, you can now embed Loras directly in the finetune definition. You can also override the default models (titles, visibility, ...) with your own finetunes. Check the doc that has been updated.
115
+
116
+
117
+ ### July 10 2025: WanGP v6.7, is NAG a game changer ? you tell me
118
+ Maybe you knew that already but most *Loras accelerators* we use today (Causvid, FusioniX) don't use *Guidance* at all (that it is *CFG* is set to 1). This helps to get much faster generations but the downside is that *Negative Prompts* are completely ignored (including the default ones set by the models). **NAG** (https://github.com/ChenDarYen/Normalized-Attention-Guidance) aims to solve that by injecting the *Negative Prompt* during the *attention* processing phase.
119
+
120
+ So WanGP 6.7 gives you NAG, but not any NAG, a *Low VRAM* implementation, the default one ends being VRAM greedy. You will find NAG in the *General* advanced tab for most Wan models.
121
+
122
+ Use NAG especially when Guidance is set to 1. To turn it on set the **NAG scale** to something around 10. There are other NAG parameters **NAG tau** and **NAG alpha** which I recommend to change only if you don't get good results by just playing with the NAG scale. Don't hesitate to share on this discord server the best combinations for these 3 parameters.
123
+
124
+ The authors of NAG claim that NAG can also be used when using a Guidance (CFG > 1) and to improve the prompt adherence.
125
+
126
+ ### July 8 2025: WanGP v6.6, WanGP offers you **Vace Multitalk Dual Voices Fusionix Infinite** :
127
+ **Vace** our beloved super Control Net has been combined with **Multitalk** the new king in town that can animate up to two people speaking (**Dual Voices**). It is accelerated by the **Fusionix** model and thanks to *Sliding Windows* support and *Adaptive Projected Guidance* (much slower but should reduce the reddish effect with long videos) your two people will be able to talk for very a long time (which is an **Infinite** amount of time in the field of video generation).
128
+
129
+ Of course you will get as well *Multitalk* vanilla and also *Multitalk 720p* as a bonus.
130
+
131
+ And since I am mister nice guy I have enclosed as an exclusivity an *Audio Separator* that will save you time to isolate each voice when using Multitalk with two people.
132
+
133
+ As I feel like resting a bit I haven't produced yet a nice sample Video to illustrate all these new capabilities. But here is the thing, I ams sure you will publish in the *Share Your Best Video* channel your *Master Pieces*. The best ones will be added to the *Announcements Channel* and will bring eternal fame to its authors.
134
+
135
+ But wait, there is more:
136
+ - Sliding Windows support has been added anywhere with Wan models, so imagine with text2video recently upgraded in 6.5 into a video2video, you can now upsample very long videos regardless of your VRAM. The good old image2video model can now reuse the last image to produce new videos (as requested by many of you)
137
+ - I have added also the capability to transfer the audio of the original control video (Misc. advanced tab) and an option to preserve the fps into the generated video, so from now on you will be to upsample / restore your old families video and keep the audio at their original pace. Be aware that the duration will be limited to 1000 frames as I still need to add streaming support for unlimited video sizes.
138
+
139
+ Also, of interest too:
140
+ - Extract video info from Videos that have not been generated by WanGP, even better you can also apply post processing (Upsampling / MMAudio) on non WanGP videos
141
+ - Force the generated video fps to your liking, works wery well with Vace when using a Control Video
142
+ - Ability to chain URLs of Finetune models (for instance put the URLs of a model in your main finetune and reference this finetune in other finetune models to save time)
143
+
144
+ ### July 2 2025: WanGP v6.5.1, WanGP takes care of you: lots of quality of life features:
145
+ - View directly inside WanGP the properties (seed, resolutions, length, most settings...) of the past generations
146
+ - In one click use the newly generated video as a Control Video or Source Video to be continued
147
+ - Manage multiple settings for the same model and switch between them using a dropdown box
148
+ - WanGP will keep the last generated videos in the Gallery and will remember the last model you used if you restart the app but kept the Web page open
149
+ - Custom resolutions : add a file in the WanGP folder with the list of resolutions you want to see in WanGP (look at the instruction readme in this folder)
150
+
151
+ Taking care of your life is not enough, you want new stuff to play with ?
152
+ - MMAudio directly inside WanGP : add an audio soundtrack that matches the content of your video. By the way it is a low VRAM MMAudio and 6 GB of VRAM should be sufficient. You will need to go in the *Extensions* tab of the WanGP *Configuration* to enable MMAudio
153
+ - Forgot to upsample your video during the generation ? want to try another MMAudio variation ? Fear not you can also apply upsampling or add an MMAudio track once the video generation is done. Even better you can ask WangGP for multiple variations of MMAudio to pick the one you like best
154
+ - MagCache support: a new step skipping approach, supposed to be better than TeaCache. Makes a difference if you usually generate with a high number of steps
155
+ - SageAttention2++ support : not just the compatibility but also a slightly reduced VRAM usage
156
+ - Video2Video in Wan Text2Video : this is the paradox, a text2video can become a video2video if you start the denoising process later on an existing video
157
+ - FusioniX upsampler: this is an illustration of Video2Video in Text2Video. Use the FusioniX text2video model with an output resolution of 1080p and a denoising strength of 0.25 and you will get one of the best upsamplers (in only 2/3 steps, you will need lots of VRAM though). Increase the denoising strength and you will get one of the best Video Restorer
158
+ - Choice of Wan Samplers / Schedulers
159
+ - More Lora formats support
160
+
161
+ **If you had upgraded to v6.5 please upgrade again to 6.5.1 as this will fix a bug that ignored Loras beyond the first one**
162
+
163
+ See full changelog: **[Changelog](docs/CHANGELOG.md)**
164
+
165
+ ## 📋 Table of Contents
166
+
167
+ - [🚀 Quick Start](#-quick-start)
168
+ - [📦 Installation](#-installation)
169
+ - [🎯 Usage](#-usage)
170
+ - [📚 Documentation](#-documentation)
171
+ - [🔗 Related Projects](#-related-projects)
172
+
173
+ ## 🚀 Quick Start
174
+
175
+ **One-click installation:** Get started instantly with [Pinokio App](https://pinokio.computer/)
176
+
177
+ **Manual installation:**
178
+ ```bash
179
+ git clone https://github.com/deepbeepmeep/Wan2GP.git
180
+ cd Wan2GP
181
+ conda create -n wan2gp python=3.10.9
182
+ conda activate wan2gp
183
+ pip install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu124
184
+ pip install -r requirements.txt
185
+ ```
186
+
187
+ **Run the application:**
188
+ ```bash
189
+ python wgp.py # Text-to-video (default)
190
+ python wgp.py --i2v # Image-to-video
191
+ ```
192
+
193
+ **Update the application:**
194
+ If using Pinokio use Pinokio to update otherwise:
195
+ Get in the directory where WanGP is installed and:
196
+ ```bash
197
+ git pull
198
+ pip install -r requirements.txt
199
+ ```
200
+
201
+
202
+ ## 📦 Installation
203
+
204
+ For detailed installation instructions for different GPU generations:
205
+ - **[Installation Guide](docs/INSTALLATION.md)** - Complete setup instructions for RTX 10XX to RTX 50XX
206
+
207
+ ## 🎯 Usage
208
+
209
+ ### Basic Usage
210
+ - **[Getting Started Guide](docs/GETTING_STARTED.md)** - First steps and basic usage
211
+ - **[Models Overview](docs/MODELS.md)** - Available models and their capabilities
212
+
213
+ ### Advanced Features
214
+ - **[Loras Guide](docs/LORAS.md)** - Using and managing Loras for customization
215
+ - **[Finetunes](docs/FINETUNES.md)** - Add manually new models to WanGP
216
+ - **[VACE ControlNet](docs/VACE.md)** - Advanced video control and manipulation
217
+ - **[Command Line Reference](docs/CLI.md)** - All available command line options
218
+
219
+ ## 📚 Documentation
220
+
221
+ - **[Changelog](docs/CHANGELOG.md)** - Latest updates and version history
222
+ - **[Troubleshooting](docs/TROUBLESHOOTING.md)** - Common issues and solutions
223
+
224
+ ## 📚 Video Guides
225
+ - Nice Video that explain how to use Vace:\
226
+ https://www.youtube.com/watch?v=FMo9oN2EAvE
227
+ - Another Vace guide:\
228
+ https://www.youtube.com/watch?v=T5jNiEhf9xk
229
+
230
+ ## 🔗 Related Projects
231
+
232
+ ### Other Models for the GPU Poor
233
+ - **[HuanyuanVideoGP](https://github.com/deepbeepmeep/HunyuanVideoGP)** - One of the best open source Text to Video generators
234
+ - **[Hunyuan3D-2GP](https://github.com/deepbeepmeep/Hunyuan3D-2GP)** - Image to 3D and text to 3D tool
235
+ - **[FluxFillGP](https://github.com/deepbeepmeep/FluxFillGP)** - Inpainting/outpainting tools based on Flux
236
+ - **[Cosmos1GP](https://github.com/deepbeepmeep/Cosmos1GP)** - Text to world generator and image/video to world
237
+ - **[OminiControlGP](https://github.com/deepbeepmeep/OminiControlGP)** - Flux-derived application for object transfer
238
+ - **[YuE GP](https://github.com/deepbeepmeep/YuEGP)** - Song generator with instruments and singer's voice
239
+
240
+ ---
241
+
242
+ <p align="center">
243
+ Made with ❤️ by DeepBeepMeep
244
+ </p>
assets/comp_effic.png ADDED

Git LFS Details

  • SHA256: b0e225caffb4b31295ad150f95ee852e4c3dde4a00ac8f79a2ff500f2ce26b8d
  • Pointer size: 132 Bytes
  • Size of remote file: 1.79 MB
assets/data_for_diff_stage.jpg ADDED

Git LFS Details

  • SHA256: 59aec08409f2d46b0e640e4e120dc7cca52c08c3de56d026602dbcff1ebf241a
  • Pointer size: 131 Bytes
  • Size of remote file: 528 kB
assets/i2v_res.png ADDED

Git LFS Details

  • SHA256: 6823b3206d8d0cb18d3b5b949dec1217f1178109ba11f14e977b67e1f7b8a248
  • Pointer size: 131 Bytes
  • Size of remote file: 892 kB
assets/logo.png ADDED
assets/t2v_res.jpg ADDED

Git LFS Details

  • SHA256: 91db579092446be2a834bc67721a8e4346936f38c4edb912f459ca3e10f8f439
  • Pointer size: 131 Bytes
  • Size of remote file: 301 kB
assets/vben_vs_sota.png ADDED

Git LFS Details

  • SHA256: 9a0e86ca85046d2675f97984b88b6e74df07bba8a62a31ab8a1aef50d4eda44e
  • Pointer size: 132 Bytes
  • Size of remote file: 1.55 MB
assets/video_dit_arch.jpg ADDED

Git LFS Details

  • SHA256: 195dceec6570289d8b01cc51d2e28a7786216f19de55b23978a52610d1646a66
  • Pointer size: 131 Bytes
  • Size of remote file: 643 kB
assets/video_vae_res.jpg ADDED

Git LFS Details

  • SHA256: d8f9e7f7353848056a615c8ef35ab86ec22976bb46cb27405008b4089701945c
  • Pointer size: 131 Bytes
  • Size of remote file: 213 kB
configs/fantasy.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "WanModel",
3
+ "_diffusers_version": "0.30.0",
4
+ "dim": 5120,
5
+ "eps": 1e-06,
6
+ "ffn_dim": 13824,
7
+ "freq_dim": 256,
8
+ "in_dim": 36,
9
+ "model_type": "i2v",
10
+ "num_heads": 40,
11
+ "num_layers": 40,
12
+ "out_dim": 16,
13
+ "text_len": 512,
14
+ "fantasytalking_dim": 2048
15
+ }
configs/flf2v_720p.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "WanModel",
3
+ "_diffusers_version": "0.30.0",
4
+ "dim": 5120,
5
+ "eps": 1e-06,
6
+ "ffn_dim": 13824,
7
+ "freq_dim": 256,
8
+ "in_dim": 36,
9
+ "model_type": "i2v",
10
+ "num_heads": 40,
11
+ "num_layers": 40,
12
+ "out_dim": 16,
13
+ "text_len": 512,
14
+ "flf": true
15
+ }
configs/i2v.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "WanModel",
3
+ "_diffusers_version": "0.30.0",
4
+ "dim": 5120,
5
+ "eps": 1e-06,
6
+ "ffn_dim": 13824,
7
+ "freq_dim": 256,
8
+ "in_dim": 36,
9
+ "model_type": "i2v",
10
+ "num_heads": 40,
11
+ "num_layers": 40,
12
+ "out_dim": 16,
13
+ "text_len": 512
14
+ }
configs/i2v_2_2.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "WanModel",
3
+ "_diffusers_version": "0.33.0",
4
+ "dim": 5120,
5
+ "eps": 1e-06,
6
+ "ffn_dim": 13824,
7
+ "freq_dim": 256,
8
+ "in_dim": 36,
9
+ "model_type": "i2v2_2",
10
+ "num_heads": 40,
11
+ "num_layers": 40,
12
+ "out_dim": 16,
13
+ "text_len": 512
14
+ }
configs/multitalk.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "WanModel",
3
+ "_diffusers_version": "0.30.0",
4
+ "dim": 5120,
5
+ "eps": 1e-06,
6
+ "ffn_dim": 13824,
7
+ "freq_dim": 256,
8
+ "in_dim": 36,
9
+ "model_type": "i2v",
10
+ "num_heads": 40,
11
+ "num_layers": 40,
12
+ "out_dim": 16,
13
+ "text_len": 512,
14
+ "multitalk_output_dim": 768
15
+ }
configs/phantom_1.3B.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "WanModel",
3
+ "_diffusers_version": "0.30.0",
4
+ "dim": 1536,
5
+ "eps": 1e-06,
6
+ "ffn_dim": 8960,
7
+ "freq_dim": 256,
8
+ "in_dim": 16,
9
+ "model_type": "t2v",
10
+ "num_heads": 12,
11
+ "num_layers": 30,
12
+ "out_dim": 16,
13
+ "text_len": 512
14
+ }
configs/phantom_14B.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "WanModel",
3
+ "_diffusers_version": "0.30.0",
4
+ "dim": 5120,
5
+ "eps": 1e-06,
6
+ "ffn_dim": 13824,
7
+ "freq_dim": 256,
8
+ "in_dim": 16,
9
+ "model_type": "t2v",
10
+ "num_heads": 40,
11
+ "num_layers": 40,
12
+ "out_dim": 16,
13
+ "text_len": 512
14
+ }
configs/sky_df_1.3.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "WanModel",
3
+ "_diffusers_version": "0.30.0",
4
+ "dim": 1536,
5
+ "eps": 1e-06,
6
+ "ffn_dim": 8960,
7
+ "freq_dim": 256,
8
+ "in_dim": 16,
9
+ "model_type": "t2v",
10
+ "num_heads": 12,
11
+ "num_layers": 30,
12
+ "out_dim": 16,
13
+ "text_len": 512
14
+ }
configs/sky_df_14B.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "WanModel",
3
+ "_diffusers_version": "0.30.0",
4
+ "dim": 5120,
5
+ "eps": 1e-06,
6
+ "ffn_dim": 13824,
7
+ "freq_dim": 256,
8
+ "in_dim": 16,
9
+ "model_type": "t2v",
10
+ "num_heads": 40,
11
+ "num_layers": 40,
12
+ "out_dim": 16,
13
+ "text_len": 512
14
+ }
configs/t2v.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "WanModel",
3
+ "_diffusers_version": "0.30.0",
4
+ "dim": 5120,
5
+ "eps": 1e-06,
6
+ "ffn_dim": 13824,
7
+ "freq_dim": 256,
8
+ "in_dim": 16,
9
+ "model_type": "t2v",
10
+ "num_heads": 40,
11
+ "num_layers": 40,
12
+ "out_dim": 16,
13
+ "text_len": 512
14
+ }
configs/t2v_1.3B.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "WanModel",
3
+ "_diffusers_version": "0.30.0",
4
+ "dim": 1536,
5
+ "eps": 1e-06,
6
+ "ffn_dim": 8960,
7
+ "freq_dim": 256,
8
+ "in_dim": 16,
9
+ "model_type": "t2v",
10
+ "num_heads": 12,
11
+ "num_layers": 30,
12
+ "out_dim": 16,
13
+ "text_len": 512
14
+ }
configs/vace_1.3B.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "VaceWanModel",
3
+ "_diffusers_version": "0.30.0",
4
+ "dim": 1536,
5
+ "eps": 1e-06,
6
+ "ffn_dim": 8960,
7
+ "freq_dim": 256,
8
+ "in_dim": 16,
9
+ "model_type": "t2v",
10
+ "num_heads": 12,
11
+ "num_layers": 30,
12
+ "out_dim": 16,
13
+ "text_len": 512,
14
+ "vace_layers": [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28],
15
+ "vace_in_dim": 96
16
+ }
configs/vace_14B.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "VaceWanModel",
3
+ "_diffusers_version": "0.30.0",
4
+ "dim": 5120,
5
+ "eps": 1e-06,
6
+ "ffn_dim": 13824,
7
+ "freq_dim": 256,
8
+ "in_dim": 16,
9
+ "model_type": "t2v",
10
+ "num_heads": 40,
11
+ "num_layers": 40,
12
+ "out_dim": 16,
13
+ "text_len": 512,
14
+ "vace_layers": [0, 5, 10, 15, 20, 25, 30, 35],
15
+ "vace_in_dim": 96
16
+ }
configs/vace_multitalk_14B.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "VaceWanModel",
3
+ "_diffusers_version": "0.30.0",
4
+ "dim": 5120,
5
+ "eps": 1e-06,
6
+ "ffn_dim": 13824,
7
+ "freq_dim": 256,
8
+ "in_dim": 16,
9
+ "model_type": "t2v",
10
+ "num_heads": 40,
11
+ "num_layers": 40,
12
+ "out_dim": 16,
13
+ "text_len": 512,
14
+ "vace_layers": [0, 5, 10, 15, 20, 25, 30, 35],
15
+ "vace_in_dim": 96,
16
+ "multitalk_output_dim": 768
17
+ }
defaults/ReadMe.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Please dot not modify any file in this Folder.
2
+
3
+ If you want to change a property of a default model, copy the corrresponding model file in the ./finetunes folder and modify the properties you want to change in the new file.
4
+ If a property is not in the new file, it will be inherited automatically from the default file that matches the same name file.
5
+
6
+ For instance to hide a model:
7
+
8
+ {
9
+ "model":
10
+ {
11
+ "visible": false
12
+ }
13
+ }
defaults/fantasy.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Fantasy Talking 720p",
5
+ "architecture" : "fantasy",
6
+ "modules": ["fantasy"],
7
+ "description": "The Fantasy Talking model corresponds to the original Wan image 2 video model combined with the Fantasy Speaking module to process an audio Input.",
8
+ "URLs": "i2v_720p",
9
+ "teacache_coefficients" : [-114.36346466, 65.26524496, -18.82220707, 4.91518089, -0.23412683]
10
+ },
11
+ "resolution": "1280x720"
12
+ }
defaults/flf2v_720p.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "First Last Frame to Video 720p (FLF2V) 14B",
5
+ "architecture" : "flf2v_720p",
6
+ "visible" : true,
7
+ "description": "The First Last Frame 2 Video model is the official model Image 2 Video model that supports Start and End frames.",
8
+ "URLs": [
9
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_mbf16.safetensors",
10
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_quanto_mbf16_int8.safetensors",
11
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_quanto_mfp16_int8.safetensors"
12
+ ],
13
+ "auto_quantize": true
14
+ },
15
+ "resolution": "1280x720"
16
+ }
defaults/flux.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 1 Dev 12B",
4
+ "architecture": "flux",
5
+ "description": "FLUX.1 Dev is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev_quanto_bf16_int8.safetensors"
9
+ ],
10
+ "image_outputs": true,
11
+ "flux-model": "flux-dev"
12
+ },
13
+ "prompt": "draw a hat",
14
+ "resolution": "1280x720",
15
+ "batch_size": 1
16
+ }
defaults/flux_dev_kontext.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 1 Dev Kontext 12B",
4
+ "architecture": "flux",
5
+ "description": "FLUX.1 Kontext is a 12 billion parameter rectified flow transformer capable of editing images based on instructions stored in the Prompt. Please be aware that Flux Kontext is picky on the resolution of the input image and the output dimensions may not match the dimensions of the input image.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_quanto_bf16_int8.safetensors"
9
+ ],
10
+ "image_outputs": true,
11
+ "reference_image": true,
12
+ "flux-model": "flux-dev-kontext"
13
+ },
14
+ "prompt": "add a hat",
15
+ "resolution": "1280x720",
16
+ "batch_size": 1
17
+ }
18
+
19
+
defaults/flux_krea.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 1 Krea Dev 12B",
4
+ "architecture": "flux",
5
+ "description": "Cutting-edge output quality, with a focus on aesthetic photography..",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-krea-dev_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-krea-dev_quanto_bf16_int8.safetensors"
9
+ ],
10
+ "image_outputs": true,
11
+ "flux-model": "flux-dev"
12
+ },
13
+ "prompt": "draw a hat",
14
+ "resolution": "1280x720",
15
+ "batch_size": 1
16
+ }
defaults/flux_schnell.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Flux 1 Schnell 12B",
4
+ "architecture": "flux",
5
+ "description": "FLUX.1 Schnell is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. As a distilled model it requires fewer denoising steps.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-schnell_bf16.safetensors",
8
+ "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-schnell_quanto_bf16_int8.safetensors"
9
+ ],
10
+ "image_outputs": true,
11
+ "flux-model": "flux-schnell"
12
+ },
13
+ "prompt": "draw a hat",
14
+ "resolution": "1280x720",
15
+ "num_inference_steps": 10,
16
+ "batch_size": 1
17
+ }
defaults/fun_inp.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Fun InP image2video 14B",
5
+ "architecture" : "fun_inp",
6
+ "description": "The Fun model is an alternative image 2 video that supports out the box End Image fixing (contrary to the original Wan image 2 video model).",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_bf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_quanto_int8.safetensors",
10
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_quanto_fp16_int8.safetensors"
11
+ ]
12
+ }
13
+ }
defaults/fun_inp_1.3B.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Fun InP image2video 1.3B",
5
+ "architecture" : "fun_inp_1.3B",
6
+ "description": "The Fun model is an alternative image 2 video that supports out the box End Image fixing (contrary to the original Wan image 2 video model). The 1.3B adds also image 2 to video capability to the 1.3B model.",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_1.3B_bf16.safetensors"
9
+ ]
10
+ }
11
+ }
defaults/hunyuan.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Hunyuan Video Text2video 720p 13B",
5
+ "architecture" : "hunyuan",
6
+ "description": "Probably the best text 2 video model available.",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_720_bf16.safetensors.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_720_quanto_int8.safetensors"
10
+ ]
11
+ }
12
+ }
defaults/hunyuan_avatar.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Hunyuan Video Avatar 720p 13B",
5
+ "architecture" : "hunyuan_avatar",
6
+ "description": "With the Hunyuan Video Avatar model you can animate a person based on the content of an audio input. Please note that the video generator works by processing 128 frames segment at a time (even if you ask less). The good news is that it will concatenate multiple segments for long video generation (max 3 segments recommended as the quality will get worse).",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_avatar_720_bf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_avatar_720_quanto_bf16_int8.safetensors"
10
+ ]
11
+ }
12
+ }
defaults/hunyuan_custom.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Hunyuan Video Custom 720p 13B",
5
+ "architecture" : "hunyuan_custom",
6
+ "description": "The Hunyuan Video Custom model is probably the best model to transfer people (only people for the moment) as it is quite good to keep their identity. However it is slow as to get good results, you need to generate 720p videos with 30 steps.",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_720_bf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_720_quanto_bf16_int8.safetensors"
10
+ ]
11
+ }
12
+ }
defaults/hunyuan_custom_audio.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Hunyuan Video Custom Audio 720p 13B",
5
+ "architecture" : "hunyuan_custom_audio",
6
+ "description": "The Hunyuan Video Custom Audio model can be used to generate scenes of a person speaking given a Reference Image and a Recorded Voice or Song. The reference image is not a start image and therefore one can represent the person in a different context.The video length can be anything up to 10s. It is also quite good to generate no sound Video based on a person.",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_audio_720_bf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_audio_720_quanto_bf16_int8.safetensors"
10
+ ]
11
+ }
12
+ }
defaults/hunyuan_custom_edit.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Hunyuan Video Custom Edit 720p 13B",
5
+ "architecture" : "hunyuan_custom_edit",
6
+ "description": "The Hunyuan Video Custom Edit model can be used to do Video inpainting on a person (add accessories or completely replace the person). You will need in any case to define a Video Mask which will indicate which area of the Video should be edited.",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_edit_720_bf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_edit_720_quanto_bf16_int8.safetensors"
10
+ ]
11
+ }
12
+ }
defaults/hunyuan_i2v.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Hunyuan Video Image2video 720p 13B",
5
+ "architecture" : "hunyuan_i2v",
6
+ "description": "A good looking image 2 video model, but not so good in prompt adherence.",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_i2v_720_bf16v2.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_i2v_720_quanto_int8v2.safetensors"
10
+ ]
11
+ }
12
+ }
defaults/hunyuan_t2v_accvideo.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Hunyuan Video AccVideo 720p 13B",
4
+ "architecture": "hunyuan",
5
+ "description": " AccVideo is a novel efficient distillation method to accelerate video diffusion models with synthetic datset. Our method is 8.5x faster than HunyuanVideo.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/accvideo_hunyuan_video_720_quanto_int8.safetensors"
8
+ ],
9
+ "preload_URLs": [
10
+ ],
11
+ "auto_quantize": true
12
+ },
13
+ "negative_prompt": "",
14
+ "resolution": "832x480",
15
+ "video_length": 81,
16
+ "seed": 42,
17
+ "num_inference_steps": 5,
18
+ "flow_shift": 7,
19
+ "embedded_guidance_scale": 6,
20
+ "repeat_generation": 1,
21
+ "loras_multipliers": "",
22
+ "temporal_upsampling": "",
23
+ "spatial_upsampling": "",
24
+ "RIFLEx_setting": 0,
25
+ "slg_start_perc": 10,
26
+ "slg_end_perc": 90,
27
+ "prompt_enhancer": "",
28
+ "activated_loras": [
29
+ ]
30
+ }
defaults/hunyuan_t2v_fast.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "Hunyuan Video FastHunyuan 720p 13B",
4
+ "architecture": "hunyuan",
5
+ "description": "Fast Hunyuan is an accelerated HunyuanVideo model. It can sample high quality videos with 6 diffusion steps.",
6
+ "URLs": [
7
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/fast_hunyuan_video_720_quanto_int8.safetensors"
8
+ ],
9
+ "preload_URLs": [
10
+ "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/fast_hunyuan_video_720_quanto_int8_map.json"
11
+ ],
12
+ "auto_quantize": true
13
+ },
14
+ "negative_prompt": "",
15
+ "resolution": "832x480",
16
+ "video_length": 81,
17
+ "seed": 42,
18
+ "num_inference_steps": 6,
19
+ "flow_shift": 17,
20
+ "embedded_guidance_scale": 6,
21
+ "repeat_generation": 1,
22
+ "loras_multipliers": "",
23
+ "temporal_upsampling": "",
24
+ "spatial_upsampling": "",
25
+ "RIFLEx_setting": 0,
26
+ "slg_start_perc": 10,
27
+ "slg_end_perc": 90,
28
+ "prompt_enhancer": "",
29
+ "activated_loras": [
30
+ ]
31
+ }
defaults/i2v.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Wan2.1 Image2video 480p 14B",
5
+ "architecture" : "i2v",
6
+ "description": "The standard Wan Image 2 Video specialized to generate 480p images. It also offers Start and End Image support (End Image is not supported in the original model but seems to work well)",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_480p_14B_mbf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_480p_14B_quanto_mbf16_int8.safetensors",
10
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_480p_14B_quanto_mfp16_int8.safetensors"
11
+ ]
12
+ }
13
+ }
defaults/i2v_2_2.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Wan2.2 Image2video 14B",
5
+ "architecture" : "i2v_2_2",
6
+ "description": "Wan 2.2 Image 2 Video model. Contrary to the Wan Image2video 2.1 this model is structurally close to the t2v model. You will need consequently to store Loras for this model in the t2v Lora Folder.",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_high_mbf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_high_quanto_mbf16_int8.safetensors",
10
+ "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_high_quanto_mfp16_int8.safetensors"
11
+ ],
12
+ "URLs2": [
13
+ "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_low_mbf16.safetensors",
14
+ "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_low_quanto_mbf16_int8.safetensors",
15
+ "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_low_quanto_mfp16_int8.safetensors"
16
+ ],
17
+ "group": "wan2_2"
18
+ },
19
+ "switch_threshold" : 900,
20
+ "guidance_scale" : 3.5,
21
+ "guidance2_scale" : 3.5,
22
+ "flow_shift" : 5
23
+
24
+ }
defaults/i2v_720p.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Wan2.1 Image2video 720p 14B",
5
+ "architecture" : "i2v",
6
+ "description": "The standard Wan Image 2 Video specialized to generate 720p images. It also offers Start and End Image support (End Image is not supported in the original model but seems to work well).",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_720p_14B_mbf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_720p_14B_quanto_mbf16_int8.safetensors",
10
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_720p_14B_quanto_mfp16_int8.safetensors"
11
+ ]
12
+ },
13
+ "resolution": "1280x720"
14
+ }
defaults/i2v_fusionix.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "Wan2.1 Image2video 480p FusioniX 14B",
5
+ "architecture" : "i2v",
6
+ "description": "A powerful merged image-to-video model based on the original WAN 2.1 I2V model, enhanced using multiple open-source components and LoRAs to boost motion realism, temporal consistency, and expressive detail.",
7
+ "URLs": "i2v",
8
+ "loras": ["https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/loras_accelerators/Wan2.1_I2V_14B_FusionX_LoRA.safetensors"]
9
+ }
10
+ }
defaults/ltxv_13B.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "LTX Video 0.9.8 13B",
5
+ "architecture" : "ltxv_13B",
6
+ "description": "LTX Video is a fast model that can be used to generate very very long videos (up to 1800 frames !).It is recommended to keep the number of steps to 30 or you will need to update the file 'ltxv_video/configs/ltxv-13b-0.9.8-dev.yaml'.The LTX Video model expects very long prompts, so don't hesitate to use the Prompt Enhancer.",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv_0.9.8_13B_dev_bf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv_0.9.8_13B_dev_quanto_bf16_int8.safetensors"
10
+ ],
11
+ "preload_URLs" : [
12
+ "https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv-097-ic-lora-pose-control-diffusers.safetensors",
13
+ "https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv-097-ic-lora-depth-control-diffusers.safetensors",
14
+ "https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv-097-ic-lora-canny-control-diffusers.safetensors"
15
+ ],
16
+ "LTXV_config": "ltx_video/configs/ltxv-13b-0.9.8-dev.yaml"
17
+ },
18
+ "num_inference_steps": 30
19
+ }
defaults/ltxv_distilled.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "LTX Video 0.9.8 Distilled 13B",
5
+ "architecture" : "ltxv_13B",
6
+ "description": "LTX Video is a fast model that can be used to generate very long videos (up to 1800 frames !).This distilled version is a very fast version and retains a high level of quality. The LTX Video model expects very long prompts, so don't hesitate to use the Prompt Enhancer.",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv_0.9.8_13B_distilled_bf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv_0.9.8_13B_distilled_quanto_bf16_int8.safetensors"
10
+ ],
11
+ "preload_URLs" : "ltxv_13B",
12
+ "LTXV_config": "ltx_video/configs/ltxv-13b-0.9.8-distilled.yaml"
13
+ },
14
+ "num_inference_steps": 6
15
+ }
defaults/moviigen.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model":
3
+ {
4
+ "name": "MoviiGen 1080p 14B",
5
+ "architecture" : "t2v",
6
+ "description": "MoviiGen 1.1, a cutting-edge video generation model that excels in cinematic aesthetics and visual quality. Use it to generate videos in 720p or 1080p in the 21:9 ratio.",
7
+ "URLs": [
8
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_moviigen1.1_14B_mbf16.safetensors",
9
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_moviigen1.1_14B_quanto_mbf16_int8.safetensors",
10
+ "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_moviigen1.1_14B_quanto_mfp16_int8.safetensors"
11
+ ],
12
+ "auto_quantize": true
13
+ },
14
+ "resolution": "1280x720",
15
+ "video_length": 81
16
+ }