rahul7star commited on
Commit
4a08685
·
verified ·
1 Parent(s): 649174b

Update wan2_fast.py

Browse files
Files changed (1) hide show
  1. wan2_fast.py +53 -77
wan2_fast.py CHANGED
@@ -56,6 +56,10 @@ pipe.to("cuda")
56
  # pipe.load_lora_weights(causvid_path, adapter_name="causvid_lora")
57
  # pipe.set_adapters(["causvid_lora"], adapter_weights=[0.95])
58
  # pipe.fuse_lora()
 
 
 
 
59
 
60
  # MOD_VALUE = 32
61
  # DEFAULT_H_SLIDER_VALUE = 512
@@ -96,45 +100,56 @@ pipe.to("cuda")
96
 
97
  #New math to make it High Res
98
 
99
- MOD_VALUE = 32
100
 
101
- # Defaults for higher-res generation
102
- DEFAULT_H_SLIDER_VALUE = 768
103
- DEFAULT_W_SLIDER_VALUE = 1344 # 16:9 friendly and divisible by MOD_VALUE
104
-
105
- # Original Space = Hugging Face space with compute limits
106
- IS_ORIGINAL_SPACE = os.environ.get("IS_ORIGINAL_SPACE", "True") == "True"
107
-
108
- # Conservative limits for low-end environments
109
- LIMITED_MAX_RESOLUTION = 640
110
- LIMITED_MAX_DURATION = 2.0
111
- LIMITED_MAX_STEPS = 4
112
-
113
- # Generous limits for local or Pro spaces
114
- ORIGINAL_SLIDER_MIN_H, ORIGINAL_SLIDER_MAX_H = 128, 1536
115
- ORIGINAL_SLIDER_MIN_W, ORIGINAL_SLIDER_MAX_W = 128, 1536
116
- ORIGINAL_MAX_DURATION = round(81 / 24, 1) # 3.4 seconds
117
- ORIGINAL_MAX_STEPS = 8
118
-
119
- # Use limited or original (generous) settings
120
- if IS_ORIGINAL_SPACE:
121
- SLIDER_MIN_H, SLIDER_MAX_H = 128, LIMITED_MAX_RESOLUTION
122
- SLIDER_MIN_W, SLIDER_MAX_W = 128, LIMITED_MAX_RESOLUTION
123
- MAX_DURATION = LIMITED_MAX_DURATION
124
- MAX_STEPS = LIMITED_MAX_STEPS
125
- else:
126
- SLIDER_MIN_H, SLIDER_MAX_H = ORIGINAL_SLIDER_MIN_H, ORIGINAL_SLIDER_MAX_H
127
- SLIDER_MIN_W, SLIDER_MAX_W = ORIGINAL_SLIDER_MIN_W, ORIGINAL_SLIDER_MAX_W
128
- MAX_DURATION = ORIGINAL_MAX_DURATION
129
- MAX_STEPS = ORIGINAL_MAX_STEPS
130
 
131
- MAX_SEED = np.iinfo(np.int32).max
 
132
 
133
- FIXED_FPS = 24
134
- FIXED_OUTPUT_FPS = 18 # reduce final video FPS to save space
135
- MIN_FRAMES_MODEL = 8
136
- MAX_FRAMES_MODEL = 81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  default_prompt_t2v = "cinematic footage, group of pedestrians dancing in the streets of NYC, high quality breakdance, 4K, tiktok video, intricate details, instagram feel, dynamic camera, smooth dance motion, dimly lit, stylish, beautiful faces, smiling, music video"
140
  default_negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
@@ -172,45 +187,7 @@ def generate_video(prompt, height, width,
172
  guidance_scale = 1, steps = 4,
173
  seed = 42, randomize_seed = False,
174
  progress=gr.Progress(track_tqdm=True)):
175
- """
176
- Generate a video from a text prompt using the Wan 2.1 T2V model with CausVid LoRA.
177
-
178
- This function takes a text prompt and generates a video based on the provided
179
- prompt and parameters. It uses the Wan 2.1 1.3B Text-to-Video model with CausVid LoRA
180
- for fast generation in 3-8 steps.
181
-
182
- Args:
183
- prompt (str): Text prompt describing the desired video content.
184
- height (int): Target height for the output video. Will be adjusted to multiple of MOD_VALUE (32).
185
- width (int): Target width for the output video. Will be adjusted to multiple of MOD_VALUE (32).
186
- negative_prompt (str, optional): Negative prompt to avoid unwanted elements.
187
- Defaults to default_negative_prompt (contains unwanted visual artifacts).
188
- duration_seconds (float, optional): Duration of the generated video in seconds.
189
- Defaults to 2. Clamped between MIN_FRAMES_MODEL/FIXED_FPS and MAX_FRAMES_MODEL/FIXED_FPS.
190
- guidance_scale (float, optional): Controls adherence to the prompt. Higher values = more adherence.
191
- Defaults to 1.0. Range: 0.0-20.0.
192
- steps (int, optional): Number of inference steps. More steps = higher quality but slower.
193
- Defaults to 4. Range: 1-30.
194
- seed (int, optional): Random seed for reproducible results. Defaults to 42.
195
- Range: 0 to MAX_SEED (2147483647).
196
- randomize_seed (bool, optional): Whether to use a random seed instead of the provided seed.
197
- Defaults to False.
198
- progress (gr.Progress, optional): Gradio progress tracker. Defaults to gr.Progress(track_tqdm=True).
199
-
200
- Returns:
201
- tuple: A tuple containing:
202
- - video_path (str): Path to the generated video file (.mp4)
203
- - current_seed (int): The seed used for generation (useful when randomize_seed=True)
204
-
205
- Raises:
206
- gr.Error: If prompt is empty or None.
207
-
208
- Note:
209
- - Frame count is calculated as duration_seconds * FIXED_FPS (24)
210
- - Output dimensions are adjusted to be multiples of MOD_VALUE (32)
211
- - The function uses GPU acceleration via the @spaces.GPU decorator
212
- - Generation time varies based on steps and duration (see get_duration function)
213
- """
214
  if not prompt or prompt.strip() == "":
215
  raise gr.Error("Please enter a text prompt. Try to use long and precise descriptions.")
216
 
@@ -246,9 +223,8 @@ def generate_video(prompt, height, width,
246
  with gr.Blocks(css="body { max-width: 100vw; overflow-x: hidden; }") as demo:
247
  gr.HTML('<meta name="viewport" content="width=device-width, initial-scale=1">')
248
  # ... your other components here ...
249
- gr.Markdown("# ⚡ InstaVideo")
250
- gr.Markdown("This Gradio space is a fork of [wan2-1-fast from multimodalart](https://huggingface.co/spaces/multimodalart/wan2-1-fast), and is powered by the Wan CausVid LoRA [from Kijai](https://huggingface.co/Kijai/WanVideo_comfy/blob/main/Wan21_CausVid_bidirect2_T2V_1_3B_lora_rank32.safetensors).")
251
-
252
  # Add notice for limited spaces
253
  if IS_ORIGINAL_SPACE:
254
  gr.Markdown("⚠️ **This free public demo limits the resolution to 640px, duration to 2s, and inference steps to 4. For full capabilities please duplicate this space.**")
 
56
  # pipe.load_lora_weights(causvid_path, adapter_name="causvid_lora")
57
  # pipe.set_adapters(["causvid_lora"], adapter_weights=[0.95])
58
  # pipe.fuse_lora()
59
+ #####################################################
60
+
61
+
62
+
63
 
64
  # MOD_VALUE = 32
65
  # DEFAULT_H_SLIDER_VALUE = 512
 
100
 
101
  #New math to make it High Res
102
 
103
+ # MOD_VALUE = 32
104
 
105
+ # # Defaults for higher-res generation
106
+ # DEFAULT_H_SLIDER_VALUE = 768
107
+ # DEFAULT_W_SLIDER_VALUE = 1344 # 16:9 friendly and divisible by MOD_VALUE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
+ # # Original Space = Hugging Face space with compute limits
110
+ # IS_ORIGINAL_SPACE = os.environ.get("IS_ORIGINAL_SPACE", "True") == "True"
111
 
112
+ # # Conservative limits for low-end environments
113
+ # LIMITED_MAX_RESOLUTION = 640
114
+ # LIMITED_MAX_DURATION = 2.0
115
+ # LIMITED_MAX_STEPS = 4
116
+
117
+ # # Generous limits for local or Pro spaces
118
+ # ORIGINAL_SLIDER_MIN_H, ORIGINAL_SLIDER_MAX_H = 128, 1536
119
+ # ORIGINAL_SLIDER_MIN_W, ORIGINAL_SLIDER_MAX_W = 128, 1536
120
+ # ORIGINAL_MAX_DURATION = round(81 / 24, 1) # 3.4 seconds
121
+ # ORIGINAL_MAX_STEPS = 8
122
+
123
+ # # Use limited or original (generous) settings
124
+ # if IS_ORIGINAL_SPACE:
125
+ # SLIDER_MIN_H, SLIDER_MAX_H = 128, LIMITED_MAX_RESOLUTION
126
+ # SLIDER_MIN_W, SLIDER_MAX_W = 128, LIMITED_MAX_RESOLUTION
127
+ # MAX_DURATION = LIMITED_MAX_DURATION
128
+ # MAX_STEPS = LIMITED_MAX_STEPS
129
+ # else:
130
+ # SLIDER_MIN_H, SLIDER_MAX_H = ORIGINAL_SLIDER_MIN_H, ORIGINAL_SLIDER_MAX_H
131
+ # SLIDER_MIN_W, SLIDER_MAX_W = ORIGINAL_SLIDER_MIN_W, ORIGINAL_SLIDER_MAX_W
132
+ # MAX_DURATION = ORIGINAL_MAX_DURATION
133
+ # MAX_STEPS = ORIGINAL_MAX_STEPS
134
+
135
+ # MAX_SEED = np.iinfo(np.int32).max
136
+
137
+ # FIXED_FPS = 24
138
+ # FIXED_OUTPUT_FPS = 18 # reduce final video FPS to save space
139
+ # MIN_FRAMES_MODEL = 8
140
+ # MAX_FRAMES_MODEL = 81
141
 
142
+ # Constants
143
+ MOD_VALUE = 32
144
+ DEFAULT_H_SLIDER_VALUE = 896
145
+ DEFAULT_W_SLIDER_VALUE = 896
146
+ NEW_FORMULA_MAX_AREA = 720 * 1024
147
+ SLIDER_MIN_H, SLIDER_MAX_H = 256, 1024
148
+ SLIDER_MIN_W, SLIDER_MAX_W = 256, 1024
149
+ MAX_SEED = np.iinfo(np.int32).max
150
+ FIXED_FPS = 24
151
+ MIN_FRAMES_MODEL = 25
152
+ MAX_FRAMES_MODEL = 193
153
 
154
  default_prompt_t2v = "cinematic footage, group of pedestrians dancing in the streets of NYC, high quality breakdance, 4K, tiktok video, intricate details, instagram feel, dynamic camera, smooth dance motion, dimly lit, stylish, beautiful faces, smiling, music video"
155
  default_negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
 
187
  guidance_scale = 1, steps = 4,
188
  seed = 42, randomize_seed = False,
189
  progress=gr.Progress(track_tqdm=True)):
190
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  if not prompt or prompt.strip() == "":
192
  raise gr.Error("Please enter a text prompt. Try to use long and precise descriptions.")
193
 
 
223
  with gr.Blocks(css="body { max-width: 100vw; overflow-x: hidden; }") as demo:
224
  gr.HTML('<meta name="viewport" content="width=device-width, initial-scale=1">')
225
  # ... your other components here ...
226
+ gr.Markdown("# ⚡ InstaVideo - FastWan2.2 Demo")
227
+
 
228
  # Add notice for limited spaces
229
  if IS_ORIGINAL_SPACE:
230
  gr.Markdown("⚠️ **This free public demo limits the resolution to 640px, duration to 2s, and inference steps to 4. For full capabilities please duplicate this space.**")