Fabrice-TIERCELIN commited on
Commit
1de0827
·
verified ·
1 Parent(s): 223e85a

Optimize memory

Browse files
Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -468,6 +468,8 @@ def worker(input_image, end_image, image_position, end_stillness, prompts, n_pro
468
  return [start_latent, image_encoder_last_hidden_state]
469
 
470
  [start_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram)
 
 
471
 
472
  # Dtype
473
 
@@ -752,6 +754,8 @@ def worker_start_end(input_image, end_image, image_position, end_stillness, prom
752
  return [start_latent, end_latent, image_encoder_last_hidden_state]
753
 
754
  [start_latent, end_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, has_end_image, end_image, height, width, vae, gpu, image_encoder, high_vram)
 
 
755
 
756
  # Dtype
757
  image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
@@ -766,7 +770,6 @@ def worker_start_end(input_image, end_image, image_position, end_stillness, prom
766
  start_latent = start_latent.to(history_latents)
767
  if has_end_image:
768
  end_latent = end_latent.to(history_latents)
769
- end_latent = end_latent.expand(-1, -1, 1 + end_stillness, -1, -1)
770
 
771
  history_pixels = None
772
  total_generated_latent_frames = 0
@@ -859,7 +862,7 @@ def worker_start_end(input_image, end_image, image_position, end_stillness, prom
859
 
860
  # Use end image latent for the first section if provided
861
  if has_end_image and is_first_section:
862
- clean_latents_post = end_latent
863
 
864
  clean_latents = torch.cat([start_latent, clean_latents_post], dim=2)
865
 
@@ -946,6 +949,7 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
946
 
947
  # 20250506 pftq: Encode video
948
  start_latent, input_image_np, video_latents, fps, height, width = video_encode(input_video, resolution, no_resize, vae, vae_batch_size=vae_batch, device=gpu)
 
949
  start_latent = start_latent.to(dtype=torch.float32, device=cpu)
950
  video_latents = video_latents.cpu()
951
 
@@ -994,6 +998,7 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
994
  end_frame, target_width=width, target_height=height, vae=vae,
995
  image_encoder=image_encoder, feature_extractor=feature_extractor, device=gpu
996
  )[:2]
 
997
  end_latent = end_latent.to(dtype=torch.float32, device=cpu)
998
  else:
999
  end_latent = end_clip_embedding = None
@@ -1775,7 +1780,7 @@ with block:
1775
  "./img_examples/Example5.png", # input_image
1776
  "./img_examples/Example6.png", # end_image
1777
  0, # image_position
1778
- 0, # end_stillness
1779
  "A woman jumps out of the train and arrives on the ground, viewed from the outside, photorealistic, realistic, amateur photography, midday, insanely detailed, 8k",
1780
  "start_end", # generation_mode
1781
  "Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, poorly framed, blurred, blurry, over-smooth, jumpcut, crossfader, crossfading", # n_prompt
 
468
  return [start_latent, image_encoder_last_hidden_state]
469
 
470
  [start_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram)
471
+ input_image = None
472
+ end_image = None
473
 
474
  # Dtype
475
 
 
754
  return [start_latent, end_latent, image_encoder_last_hidden_state]
755
 
756
  [start_latent, end_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, has_end_image, end_image, height, width, vae, gpu, image_encoder, high_vram)
757
+ input_image = None
758
+ end_image = None
759
 
760
  # Dtype
761
  image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
 
770
  start_latent = start_latent.to(history_latents)
771
  if has_end_image:
772
  end_latent = end_latent.to(history_latents)
 
773
 
774
  history_pixels = None
775
  total_generated_latent_frames = 0
 
862
 
863
  # Use end image latent for the first section if provided
864
  if has_end_image and is_first_section:
865
+ clean_latents_post = end_latent.expand(-1, -1, 1 + end_stillness, -1, -1)
866
 
867
  clean_latents = torch.cat([start_latent, clean_latents_post], dim=2)
868
 
 
949
 
950
  # 20250506 pftq: Encode video
951
  start_latent, input_image_np, video_latents, fps, height, width = video_encode(input_video, resolution, no_resize, vae, vae_batch_size=vae_batch, device=gpu)
952
+ input_video = None
953
  start_latent = start_latent.to(dtype=torch.float32, device=cpu)
954
  video_latents = video_latents.cpu()
955
 
 
998
  end_frame, target_width=width, target_height=height, vae=vae,
999
  image_encoder=image_encoder, feature_extractor=feature_extractor, device=gpu
1000
  )[:2]
1001
+ end_frame = None
1002
  end_latent = end_latent.to(dtype=torch.float32, device=cpu)
1003
  else:
1004
  end_latent = end_clip_embedding = None
 
1780
  "./img_examples/Example5.png", # input_image
1781
  "./img_examples/Example6.png", # end_image
1782
  0, # image_position
1783
+ 1, # end_stillness
1784
  "A woman jumps out of the train and arrives on the ground, viewed from the outside, photorealistic, realistic, amateur photography, midday, insanely detailed, 8k",
1785
  "start_end", # generation_mode
1786
  "Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, poorly framed, blurred, blurry, over-smooth, jumpcut, crossfader, crossfading", # n_prompt