linoyts HF Staff commited on
Commit
c1ef467
·
verified ·
1 Parent(s): aee98ed
Files changed (1) hide show
  1. app.py +66 -40
app.py CHANGED
@@ -111,19 +111,20 @@ def load_control_lora(control_type, current_lora_state):
111
  print(f"Error loading {control_type} LoRA: {e}")
112
  raise
113
 
114
- def process_video_for_canny(video):
115
  """
116
  Process video for canny control.
117
  """
118
  print("Processing video for canny control...")
119
  canny_video = []
 
 
120
  for frame in video:
121
  # TODO: change resolution logic
122
- canny_video.append(canny_processor(frame, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024))
123
 
124
  return canny_video
125
 
126
-
127
  @spaces.GPU()
128
  def process_video_for_pose(video):
129
  """
@@ -169,31 +170,54 @@ def process_video_for_pose(video):
169
 
170
  return pose_video
171
 
172
- def process_video_for_control(reference_video, control_type):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  video = load_video(reference_video)
174
- """Process video based on the selected control type"""
175
  if control_type == "canny":
176
- processed_video = process_video_for_canny(video)
 
177
  elif control_type == "depth":
178
  processed_video = process_video_for_depth(video)
179
  elif control_type == "pose":
180
  processed_video = process_video_for_pose(video)
181
  else:
182
- processed_video = reference_video
183
- # fps = 24
184
- # with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp2_file:
185
- # output2_path = tmp2_file.name
186
- # export_to_video(processed_video, output2_path, fps=fps)
187
- # return output2_path
188
  return processed_video
189
 
190
-
191
  @spaces.GPU(duration=160)
192
  def generate_video(
193
  reference_video,
 
194
  prompt,
195
  control_type,
196
- # current_lora_state,
197
  duration=3.0,
198
  negative_prompt="worst quality, inconsistent motion, blurry, jittery, distorted",
199
  height=768,
@@ -225,19 +249,18 @@ def generate_video(
225
  temporal_compression = pipeline.vae_temporal_compression_ratio
226
  num_frames = ((num_frames - 1) // temporal_compression) * temporal_compression + 1
227
 
228
-
229
-
230
- # Load the appropriate control LoRA and update state
231
- # updated_lora_state = load_control_lora(control_type, current_lora_state)
232
-
233
- # # Loads video into a list of pil images
234
- # video = load_video(reference_video)
235
- # progress(0.1, desc="Processing video for control...")
236
 
237
- # Process video based on control type
238
- processed_video = process_video_for_control(reference_video, control_type)
 
 
 
 
 
239
 
240
- processed_video = read_video(processed_video) # turns to tensor
 
241
 
242
  progress(0.2, desc="Preparing generation parameters...")
243
 
@@ -262,9 +285,7 @@ def generate_video(
262
  num_inference_steps=num_inference_steps,
263
  decode_timestep=0.05,
264
  decode_noise_scale=0.025,
265
- # image_cond_noise_scale=image_cond_noise_scale,
266
  guidance_scale=guidance_scale,
267
- # guidance_rescale=guidance_rescale,
268
  generator=torch.Generator().manual_seed(seed),
269
  output_type="latent",
270
  ).frames
@@ -294,7 +315,6 @@ def generate_video(
294
  guidance_scale=guidance_scale,
295
  decode_noise_scale = 0.025,
296
  image_cond_noise_scale=0.025,
297
- #guidance_rescale=guidance_rescale,
298
  generator=torch.Generator(device="cuda").manual_seed(seed),
299
  output_type="pil",
300
  ).frames[0]
@@ -325,8 +345,8 @@ with gr.Blocks() as demo:
325
  """
326
  )
327
 
328
- # State variable for tracking current LoRA
329
- current_lora_state = gr.State(value=None)
330
 
331
  with gr.Row():
332
  with gr.Column(scale=1):
@@ -402,8 +422,6 @@ with gr.Blocks() as demo:
402
  value=1.0
403
  )
404
 
405
-
406
-
407
  with gr.Row():
408
  randomize_seed = gr.Checkbox(
409
  label="Randomize Seed",
@@ -425,22 +443,21 @@ with gr.Blocks() as demo:
425
  height=400
426
  )
427
  control_video = gr.Video(
428
- label="Control Video",
429
  height=400,
430
- visible=False
431
  )
432
 
433
  gr.Examples(
434
  examples=[
435
- ["video_assets/vid_1.mp4", "A sleek cybernetic wolf sprinting through a neon-lit futuristic cityscape, its metallic form gleaming with electric blue circuits. The wolf's powerful stride carries it down rain-slicked streets between towering skyscrapers, while holographic advertisements cast colorful reflections on its chrome surface. Sparks of digital energy trail behind the creature as it moves with fluid mechanical precision through the urban maze, creating streaks of light in the misty night air.", "canny", 3, "worst quality, inconsistent motion, blurry, jittery, distorted", 768, 1152, 7, 1, 0, True],
436
- ["video_assets/vid_2.mp4", "A translucent ghost floating in a moonlit cemetery, raising a glowing spectral lantern that casts eerie light through the darkness. The ethereal figure's wispy form shimmers as it lifts the phantom light above its head, illuminating weathered tombstones and gnarled trees. Pale mist swirls around the ghost as the lantern pulses with otherworldly energy, creating haunting shadows that dance across the graveyard in the dead of night.", "canny", 2.5, "worst quality, inconsistent motion, blurry, jittery, distorted", 768, 1152, 7, 1, 0, True],
437
- ["video_assets/vid_3.mp4", "A sleek android assassin poised in a combat stance atop a futuristic skyscraper, arms positioned for perfect balance. The chrome-plated figure gleams under neon city lights as holographic data streams flow around its metallic form. Rain droplets bead on its polished surface while the sprawling cyberpunk metropolis stretches endlessly below. Electric circuits pulse beneath the android's transparent panels as it maintains its precise, calculated pose against the backdrop of flying vehicles and towering digital billboards.", "canny", 3, "worst quality, inconsistent motion, blurry, jittery, distorted", 768, 1152, 7, 1, 0, True],
438
- ["video_assets/vid_4.mp4", "Luminescent video game characters with glowing outlines and neon-bright details wandering through a digital landscape. Their bodies emit soft, colorful light that pulses gently as they move, creating trails of radiance behind them. The characters have a futuristic, stylized appearance with smooth surfaces that reflect their inner glow. They navigate naturally through their environment, their movements fluid and purposeful, while their bioluminescent features cast dynamic shadows and illuminate the surrounding area. The scene has a cyberpunk aesthetic with the characters' radiant presence serving as the primary light source in an otherwise darkened digital world.", "canny", 2.5, "worst quality, inconsistent motion, blurry, jittery, distorted", 768, 1152, 7, 1, 0, True],
439
  ],
440
  inputs=[reference_video,
441
  prompt,
442
  control_type,
443
- # current_lora_state,
444
  duration,
445
  negative_prompt,
446
  height,
@@ -454,13 +471,22 @@ with gr.Blocks() as demo:
454
  )
455
 
456
  # Event handlers
 
 
 
 
 
 
 
 
 
457
  generate_btn.click(
458
  fn=generate_video,
459
  inputs=[
460
  reference_video,
 
461
  prompt,
462
  control_type,
463
- # current_lora_state,
464
  duration,
465
  negative_prompt,
466
  height,
 
111
  print(f"Error loading {control_type} LoRA: {e}")
112
  raise
113
 
114
+ def process_video_for_canny(video, width, height):
115
  """
116
  Process video for canny control.
117
  """
118
  print("Processing video for canny control...")
119
  canny_video = []
120
+ detect_resolution = video[0].size()
121
+
122
  for frame in video:
123
  # TODO: change resolution logic
124
+ canny_video.append(canny_processor(frame, low_threshold=50, high_threshold=200, detect_resolution=detect_resolution, image_resolution=(width, height)))
125
 
126
  return canny_video
127
 
 
128
  @spaces.GPU()
129
  def process_video_for_pose(video):
130
  """
 
170
 
171
  return pose_video
172
 
173
+ def process_input_video(reference_video, width, height):
174
+ """
175
+ Process the input video for canny edges and return both processed video and preview.
176
+ """
177
+ if reference_video is None:
178
+ return None
179
+
180
+ try:
181
+ # Load video into a list of PIL images
182
+ video = load_video(reference_video)
183
+
184
+ # Process video for canny edges
185
+ processed_video = process_video_for_canny(video, width, height)
186
+
187
+ # Create a preview video file for display
188
+ fps = 24
189
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
190
+ preview_path = tmp_file.name
191
+ export_to_video(processed_video, preview_path, fps=fps)
192
+
193
+ return preview_path
194
+
195
+ except Exception as e:
196
+ print(f"Error processing input video: {e}")
197
+ return None
198
+
199
+ def process_video_for_control(reference_video, control_type, width, height):
200
+ """Process video based on the selected control type - now only used for non-canny types"""
201
  video = load_video(reference_video)
202
+
203
  if control_type == "canny":
204
+ # This should not be called for canny since it's pre-processed
205
+ processed_video = process_video_for_canny(video, width, height)
206
  elif control_type == "depth":
207
  processed_video = process_video_for_depth(video)
208
  elif control_type == "pose":
209
  processed_video = process_video_for_pose(video)
210
  else:
211
+ processed_video = video
212
+
 
 
 
 
213
  return processed_video
214
 
 
215
  @spaces.GPU(duration=160)
216
  def generate_video(
217
  reference_video,
218
+ control_video, # New parameter for pre-processed video
219
  prompt,
220
  control_type,
 
221
  duration=3.0,
222
  negative_prompt="worst quality, inconsistent motion, blurry, jittery, distorted",
223
  height=768,
 
249
  temporal_compression = pipeline.vae_temporal_compression_ratio
250
  num_frames = ((num_frames - 1) // temporal_compression) * temporal_compression + 1
251
 
252
+ progress(0.1, desc="Preparing processed video...")
 
 
 
 
 
 
 
253
 
254
+ # Use pre-processed video frames if available (for canny), otherwise process on-demand
255
+ if control_video is not None:
256
+ # Use the pre-processed canny frames
257
+ processed_video = load_video(control_video)
258
+ else:
259
+ # Fallback to processing on-demand for other control types
260
+ processed_video = process_video_for_control(reference_video, control_type, width, height)
261
 
262
+ # Convert to tensor
263
+ processed_video = read_video(processed_video)
264
 
265
  progress(0.2, desc="Preparing generation parameters...")
266
 
 
285
  num_inference_steps=num_inference_steps,
286
  decode_timestep=0.05,
287
  decode_noise_scale=0.025,
 
288
  guidance_scale=guidance_scale,
 
289
  generator=torch.Generator().manual_seed(seed),
290
  output_type="latent",
291
  ).frames
 
315
  guidance_scale=guidance_scale,
316
  decode_noise_scale = 0.025,
317
  image_cond_noise_scale=0.025,
 
318
  generator=torch.Generator(device="cuda").manual_seed(seed),
319
  output_type="pil",
320
  ).frames[0]
 
345
  """
346
  )
347
 
348
+ # State variables
349
+ #current_lora_state = gr.State(value=None)
350
 
351
  with gr.Row():
352
  with gr.Column(scale=1):
 
422
  value=1.0
423
  )
424
 
 
 
425
  with gr.Row():
426
  randomize_seed = gr.Checkbox(
427
  label="Randomize Seed",
 
443
  height=400
444
  )
445
  control_video = gr.Video(
446
+ label="Processed Control Video (Canny Edges)",
447
  height=400,
448
+ visible=True
449
  )
450
 
451
  gr.Examples(
452
  examples=[
453
+ ["video_assets/vid_1.mp4", None, "A sleek cybernetic wolf sprinting through a neon-lit futuristic cityscape, its metallic form gleaming with electric blue circuits. The wolf's powerful stride carries it down rain-slicked streets between towering skyscrapers, while holographic advertisements cast colorful reflections on its chrome surface. Sparks of digital energy trail behind the creature as it moves with fluid mechanical precision through the urban maze, creating streaks of light in the misty night air.", "canny", 3, "worst quality, inconsistent motion, blurry, jittery, distorted", 768, 1152, 7, 1, 0, True],
454
+ ["video_assets/vid_2.mp4", None, "A translucent ghost floating in a moonlit cemetery, raising a glowing spectral lantern that casts eerie light through the darkness. The ethereal figure's wispy form shimmers as it lifts the phantom light above its head, illuminating weathered tombstones and gnarled trees. Pale mist swirls around the ghost as the lantern pulses with otherworldly energy, creating haunting shadows that dance across the graveyard in the dead of night.", "canny", 2.5, "worst quality, inconsistent motion, blurry, jittery, distorted", 768, 1152, 7, 1, 0, True],
455
+ ["video_assets/vid_3.mp4", None,"A sleek android assassin poised in a combat stance atop a futuristic skyscraper, arms positioned for perfect balance. The chrome-plated figure gleams under neon city lights as holographic data streams flow around its metallic form. Rain droplets bead on its polished surface while the sprawling cyberpunk metropolis stretches endlessly below. Electric circuits pulse beneath the android's transparent panels as it maintains its precise, calculated pose against the backdrop of flying vehicles and towering digital billboards.", "canny", 3, "worst quality, inconsistent motion, blurry, jittery, distorted", 768, 1152, 7, 1, 0, True],
456
+ ["video_assets/vid_4.mp4", None, "Luminescent video game characters with glowing outlines and neon-bright details wandering through a digital landscape. Their bodies emit soft, colorful light that pulses gently as they move, creating trails of radiance behind them. The characters have a futuristic, stylized appearance with smooth surfaces that reflect their inner glow. They navigate naturally through their environment, their movements fluid and purposeful, while their bioluminescent features cast dynamic shadows and illuminate the surrounding area. The scene has a cyberpunk aesthetic with the characters' radiant presence serving as the primary light source in an otherwise darkened digital world.", "canny", 2.5, "worst quality, inconsistent motion, blurry, jittery, distorted", 768, 1152, 7, 1, 0, True],
457
  ],
458
  inputs=[reference_video,
459
  prompt,
460
  control_type,
 
461
  duration,
462
  negative_prompt,
463
  height,
 
471
  )
472
 
473
  # Event handlers
474
+
475
+ # Auto-process video when uploaded
476
+ reference_video.upload(
477
+ fn=process_input_video,
478
+ inputs=[reference_video, width, height],
479
+ outputs=[control_video],
480
+ show_progress=True
481
+ )
482
+
483
  generate_btn.click(
484
  fn=generate_video,
485
  inputs=[
486
  reference_video,
487
+ control_video, # Use pre-processed video
488
  prompt,
489
  control_type,
 
490
  duration,
491
  negative_prompt,
492
  height,