Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -21,12 +21,12 @@ pipe.load_lora_weights(
|
|
21 |
weight_name="FusionX_LoRa/Phantom_Wan_14B_FusionX_LoRA.safetensors",
|
22 |
adapter_name="phantom"
|
23 |
)
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
|
31 |
MOD_VALUE = 32
|
32 |
DEFAULT_H_SLIDER_VALUE = 512
|
@@ -77,7 +77,7 @@ def handle_gallery_upload_for_dims_wan(gallery_images, current_h_val, current_w_
|
|
77 |
return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
|
78 |
try:
|
79 |
# Use the first image to calculate dimensions
|
80 |
-
first_image = gallery_images[0]
|
81 |
new_h, new_w = _calculate_new_dimensions_wan(
|
82 |
first_image, MOD_VALUE, NEW_FORMULA_MAX_AREA,
|
83 |
SLIDER_MIN_H, SLIDER_MAX_H, SLIDER_MIN_W, SLIDER_MAX_W,
|
@@ -176,8 +176,6 @@ def generate_video(gallery_images, mode, prompt, height, width,
|
|
176 |
"""
|
177 |
if gallery_images is None or len(gallery_images) == 0:
|
178 |
raise gr.Error("Please upload at least one image to the gallery.")
|
179 |
-
else:
|
180 |
-
gallery_images = [img[0] for img in gallery_images]
|
181 |
|
182 |
if mode == "FLF2V" and len(gallery_images) >= 2:
|
183 |
gallery_images = gallery_images[:2]
|
@@ -203,12 +201,26 @@ def generate_video(gallery_images, mode, prompt, height, width,
|
|
203 |
reference_images = None
|
204 |
elif mode == "Ref2V":
|
205 |
frames, mask = prepare_video_and_mask_Ref2V(height=target_h, width=target_w, num_frames=num_frames)
|
206 |
-
|
|
|
207 |
else: # mode == "Random2V"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
|
209 |
frames, mask = prepare_video_and_mask_Random2V(
|
210 |
images=gallery_images,
|
211 |
-
frame_indices=
|
212 |
height=target_h,
|
213 |
width=target_w,
|
214 |
num_frames=num_frames
|
@@ -235,20 +247,9 @@ def generate_video(gallery_images, mode, prompt, height, width,
|
|
235 |
export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
|
236 |
return video_path, current_seed
|
237 |
|
238 |
-
control_modes = """
|
239 |
-
**3 control modes avilable:**
|
240 |
-
|
241 |
-
**Ref2V (Reference-to-Video)** Generate a video incorporating elements from input reference images
|
242 |
-
|
243 |
-
**FLF2V (First-Last Frame-to-Video)** Generate a video using first and last frame conditioning defined by input images
|
244 |
-
|
245 |
-
**Random2V (Random-to-Video)** Generate a video with intermediate transitions between multiple input images
|
246 |
-
"""
|
247 |
-
|
248 |
with gr.Blocks() as demo:
|
249 |
-
gr.Markdown("#
|
250 |
-
gr.Markdown("Using [
|
251 |
-
gr.Markdown(f"{control_modes}")
|
252 |
|
253 |
with gr.Row():
|
254 |
with gr.Column():
|
@@ -269,8 +270,8 @@ with gr.Blocks() as demo:
|
|
269 |
mode_radio = gr.Radio(
|
270 |
choices=["Ref2V", "FLF2V", "Random2V"],
|
271 |
value="Ref2V",
|
272 |
-
label="
|
273 |
-
info="Ref2V: Reference to Video | FLF2V: First-Last Frame to Video | Random2V: Random to Video"
|
274 |
)
|
275 |
|
276 |
prompt_input = gr.Textbox(label="Prompt", value=MODE_PROMPTS["Ref2V"])
|
@@ -278,7 +279,7 @@ with gr.Blocks() as demo:
|
|
278 |
minimum=round(MIN_FRAMES_MODEL/FIXED_FPS,1),
|
279 |
maximum=round(MAX_FRAMES_MODEL/FIXED_FPS,1),
|
280 |
step=0.1,
|
281 |
-
value=2
|
282 |
label="Duration (seconds)",
|
283 |
info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps."
|
284 |
)
|
@@ -290,13 +291,22 @@ with gr.Blocks() as demo:
|
|
290 |
with gr.Row():
|
291 |
height_input = gr.Slider(minimum=SLIDER_MIN_H, maximum=SLIDER_MAX_H, step=MOD_VALUE, value=DEFAULT_H_SLIDER_VALUE, label=f"Output Height (multiple of {MOD_VALUE})")
|
292 |
width_input = gr.Slider(minimum=SLIDER_MIN_W, maximum=SLIDER_MAX_W, step=MOD_VALUE, value=DEFAULT_W_SLIDER_VALUE, label=f"Output Width (multiple of {MOD_VALUE})")
|
293 |
-
steps_slider = gr.Slider(minimum=1, maximum=
|
294 |
guidance_scale_input = gr.Slider(minimum=0.0, maximum=5.0, step=0.5, value=1.0, label="Guidance Scale", visible=False)
|
295 |
|
296 |
generate_button = gr.Button("Generate Video", variant="primary")
|
297 |
|
298 |
with gr.Column():
|
299 |
video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
|
301 |
# Update prompt when mode changes
|
302 |
mode_radio.change(
|
@@ -319,13 +329,6 @@ with gr.Blocks() as demo:
|
|
319 |
]
|
320 |
|
321 |
generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
|
322 |
-
gr.Examples(
|
323 |
-
examples=[
|
324 |
-
[["reachy.png", "sunglasses.jpg", "gpu_hat.png"], "Ref2V", "the cute robot is wearing the sunglasses and the hat that reads 'GPU poor', and moves around playfully", 480, 832],
|
325 |
-
[["flf2v_input_first_frame.png", "flf2v_input_last_frame.png"], "FLF2V", "CG animation style, a small blue bird takes off from the ground, flapping its wings. The bird's feathers are delicate, with a unique pattern on its chest. The background shows a blue sky with white clouds under bright sunshine. The camera follows the bird upward, capturing its flight and the vastness of the sky from a close-up, low-angle perspective.", 512, 512],
|
326 |
-
],
|
327 |
-
inputs=[gallery_component, mode_radio, prompt_input, height_input, width_input], outputs=[video_output, seed_input], fn=generate_video, cache_examples="lazy"
|
328 |
-
)
|
329 |
|
330 |
if __name__ == "__main__":
|
331 |
demo.queue().launch(mcp_server=True)
|
|
|
21 |
weight_name="FusionX_LoRa/Phantom_Wan_14B_FusionX_LoRA.safetensors",
|
22 |
adapter_name="phantom"
|
23 |
)
|
24 |
+
pipe.load_lora_weights(
|
25 |
+
"vrgamedevgirl84/Wan14BT2VFusioniX",
|
26 |
+
weight_name="OtherLoRa's/DetailEnhancerV1.safetensors", adapter_name="detailer"
|
27 |
+
)
|
28 |
+
pipe.set_adapters(["phantom","detailer"], adapter_weights=[1, .9])
|
29 |
+
pipe.fuse_lora()
|
30 |
|
31 |
MOD_VALUE = 32
|
32 |
DEFAULT_H_SLIDER_VALUE = 512
|
|
|
77 |
return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
|
78 |
try:
|
79 |
# Use the first image to calculate dimensions
|
80 |
+
first_image = gallery_images[0]
|
81 |
new_h, new_w = _calculate_new_dimensions_wan(
|
82 |
first_image, MOD_VALUE, NEW_FORMULA_MAX_AREA,
|
83 |
SLIDER_MIN_H, SLIDER_MAX_H, SLIDER_MIN_W, SLIDER_MAX_W,
|
|
|
176 |
"""
|
177 |
if gallery_images is None or len(gallery_images) == 0:
|
178 |
raise gr.Error("Please upload at least one image to the gallery.")
|
|
|
|
|
179 |
|
180 |
if mode == "FLF2V" and len(gallery_images) >= 2:
|
181 |
gallery_images = gallery_images[:2]
|
|
|
201 |
reference_images = None
|
202 |
elif mode == "Ref2V":
|
203 |
frames, mask = prepare_video_and_mask_Ref2V(height=target_h, width=target_w, num_frames=num_frames)
|
204 |
+
# Resize reference images to match target dimensions
|
205 |
+
reference_images = [img.resize((target_w, target_h)) for img in gallery_images]
|
206 |
else: # mode == "Random2V"
|
207 |
+
# Calculate appropriate frame indices based on number of images and frames
|
208 |
+
num_images = len(gallery_images)
|
209 |
+
if num_images == 1:
|
210 |
+
frame_indices = [num_frames // 2] # Place single image in the middle
|
211 |
+
elif num_images == 2:
|
212 |
+
frame_indices = [0, num_frames - 1] # Place at start and end
|
213 |
+
else:
|
214 |
+
# Distribute images evenly across the video
|
215 |
+
# Ensure we don't exceed available frames
|
216 |
+
max_images = min(num_images, num_frames)
|
217 |
+
step = max(1, num_frames // max_images)
|
218 |
+
frame_indices = [min(i * step, num_frames - 1) for i in range(max_images)]
|
219 |
+
gallery_images = gallery_images[:max_images] # Limit images to what we can use
|
220 |
|
221 |
frames, mask = prepare_video_and_mask_Random2V(
|
222 |
images=gallery_images,
|
223 |
+
frame_indices=frame_indices,
|
224 |
height=target_h,
|
225 |
width=target_w,
|
226 |
num_frames=num_frames
|
|
|
247 |
export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
|
248 |
return video_path, current_seed
|
249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
with gr.Blocks() as demo:
|
251 |
+
gr.Markdown("# Wan 2.1 VACE (14B) with Phantom & Detail Enhancer LoRAs - Multi-Image Gallery")
|
252 |
+
gr.Markdown("Using [Wan2.1-VACE-14B](https://huggingface.co/Wan-AI/Wan2.1-VACE-14B-diffusers) with Phantom FusionX and Detail Enhancer LoRAs for advanced video generation with multiple conditioning modes.")
|
|
|
253 |
|
254 |
with gr.Row():
|
255 |
with gr.Column():
|
|
|
270 |
mode_radio = gr.Radio(
|
271 |
choices=["Ref2V", "FLF2V", "Random2V"],
|
272 |
value="Ref2V",
|
273 |
+
label="Processing Mode",
|
274 |
+
info="Ref2V: Reference to Video | FLF2V: First-Last Frame to Video | Random2V: Random frames to Video"
|
275 |
)
|
276 |
|
277 |
prompt_input = gr.Textbox(label="Prompt", value=MODE_PROMPTS["Ref2V"])
|
|
|
279 |
minimum=round(MIN_FRAMES_MODEL/FIXED_FPS,1),
|
280 |
maximum=round(MAX_FRAMES_MODEL/FIXED_FPS,1),
|
281 |
step=0.1,
|
282 |
+
value=2,
|
283 |
label="Duration (seconds)",
|
284 |
info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps."
|
285 |
)
|
|
|
291 |
with gr.Row():
|
292 |
height_input = gr.Slider(minimum=SLIDER_MIN_H, maximum=SLIDER_MAX_H, step=MOD_VALUE, value=DEFAULT_H_SLIDER_VALUE, label=f"Output Height (multiple of {MOD_VALUE})")
|
293 |
width_input = gr.Slider(minimum=SLIDER_MIN_W, maximum=SLIDER_MAX_W, step=MOD_VALUE, value=DEFAULT_W_SLIDER_VALUE, label=f"Output Width (multiple of {MOD_VALUE})")
|
294 |
+
steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="Inference Steps")
|
295 |
guidance_scale_input = gr.Slider(minimum=0.0, maximum=5.0, step=0.5, value=1.0, label="Guidance Scale", visible=False)
|
296 |
|
297 |
generate_button = gr.Button("Generate Video", variant="primary")
|
298 |
|
299 |
with gr.Column():
|
300 |
video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
|
301 |
+
with gr.Accordion("Mode Information", open=True):
|
302 |
+
gr.Markdown("""
|
303 |
+
**Processing Modes:**
|
304 |
+
- **Ref2V**: Uses uploaded images as style references for video generation. All frames are generated based on the reference images.
|
305 |
+
- **FLF2V**: First-Last Frame mode - uses first and last images as keyframes and generates the frames in between (requires exactly 2 images)
|
306 |
+
- **Random2V**: Places uploaded images at specific frames in the video and generates the rest. Images are distributed evenly across the video duration.
|
307 |
+
|
308 |
+
**Note**: VACE pipeline supports advanced conditioning with masks and reference images for more control over generation.
|
309 |
+
""")
|
310 |
|
311 |
# Update prompt when mode changes
|
312 |
mode_radio.change(
|
|
|
329 |
]
|
330 |
|
331 |
generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
|
333 |
if __name__ == "__main__":
|
334 |
demo.queue().launch(mcp_server=True)
|