SUPIR

Running

App Files Files Community

Fabrice-TIERCELIN commited on 19 days ago

Commit

1de0827

verified ·

1 Parent(s): 223e85a

Optimize memory

Browse files

Files changed (1) hide show

app.py +8 -3

app.py CHANGED Viewed

@@ -468,6 +468,8 @@ def worker(input_image, end_image, image_position, end_stillness, prompts, n_pro
             return [start_latent, image_encoder_last_hidden_state]
         [start_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram)
         # Dtype
@@ -752,6 +754,8 @@ def worker_start_end(input_image, end_image, image_position, end_stillness, prom
             return [start_latent, end_latent, image_encoder_last_hidden_state]
         [start_latent, end_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, has_end_image, end_image, height, width, vae, gpu, image_encoder, high_vram)
         # Dtype
         image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
@@ -766,7 +770,6 @@ def worker_start_end(input_image, end_image, image_position, end_stillness, prom
         start_latent = start_latent.to(history_latents)
         if has_end_image:
             end_latent = end_latent.to(history_latents)
-            end_latent = end_latent.expand(-1, -1, 1 + end_stillness, -1, -1)
         history_pixels = None
         total_generated_latent_frames = 0
@@ -859,7 +862,7 @@ def worker_start_end(input_image, end_image, image_position, end_stillness, prom
             # Use end image latent for the first section if provided
             if has_end_image and is_first_section:
-                clean_latents_post = end_latent
             clean_latents = torch.cat([start_latent, clean_latents_post], dim=2)
@@ -946,6 +949,7 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
         # 20250506 pftq: Encode video
         start_latent, input_image_np, video_latents, fps, height, width = video_encode(input_video, resolution, no_resize, vae, vae_batch_size=vae_batch, device=gpu)
         start_latent = start_latent.to(dtype=torch.float32, device=cpu)
         video_latents = video_latents.cpu()
@@ -994,6 +998,7 @@ def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed,
                 end_frame, target_width=width, target_height=height, vae=vae,
                 image_encoder=image_encoder, feature_extractor=feature_extractor, device=gpu
             )[:2]
             end_latent = end_latent.to(dtype=torch.float32, device=cpu)
         else:
             end_latent = end_clip_embedding = None
@@ -1775,7 +1780,7 @@ with block:
                         "./img_examples/Example5.png", # input_image
                         "./img_examples/Example6.png", # end_image
                         0, # image_position
-                        0, # end_stillness
                         "A woman jumps out of the train and arrives on the ground, viewed from the outside, photorealistic, realistic, amateur photography, midday, insanely detailed, 8k",
                         "start_end", # generation_mode
                         "Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, poorly framed, blurred, blurry, over-smooth, jumpcut, crossfader, crossfading", # n_prompt

             return [start_latent, image_encoder_last_hidden_state]
         [start_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram)
+        input_image = None
+        end_image = None
         # Dtype
             return [start_latent, end_latent, image_encoder_last_hidden_state]
         [start_latent, end_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, has_end_image, end_image, height, width, vae, gpu, image_encoder, high_vram)
+        input_image = None
+        end_image = None
         # Dtype
         image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
         start_latent = start_latent.to(history_latents)
         if has_end_image:
             end_latent = end_latent.to(history_latents)
         history_pixels = None
         total_generated_latent_frames = 0
             # Use end image latent for the first section if provided
             if has_end_image and is_first_section:
+                clean_latents_post = end_latent.expand(-1, -1, 1 + end_stillness, -1, -1)
             clean_latents = torch.cat([start_latent, clean_latents_post], dim=2)
         # 20250506 pftq: Encode video
         start_latent, input_image_np, video_latents, fps, height, width = video_encode(input_video, resolution, no_resize, vae, vae_batch_size=vae_batch, device=gpu)
+        input_video = None
         start_latent = start_latent.to(dtype=torch.float32, device=cpu)
         video_latents = video_latents.cpu()
                 end_frame, target_width=width, target_height=height, vae=vae,
                 image_encoder=image_encoder, feature_extractor=feature_extractor, device=gpu
             )[:2]
+            end_frame = None
             end_latent = end_latent.to(dtype=torch.float32, device=cpu)
         else:
             end_latent = end_clip_embedding = None
                         "./img_examples/Example5.png", # input_image
                         "./img_examples/Example6.png", # end_image
                         0, # image_position
+                        1, # end_stillness
                         "A woman jumps out of the train and arrives on the ground, viewed from the outside, photorealistic, realistic, amateur photography, midday, insanely detailed, 8k",
                         "start_end", # generation_mode
                         "Missing arm, long hand, unrealistic position, impossible contortion, visible bone, muscle contraction, poorly framed, blurred, blurry, over-smooth, jumpcut, crossfader, crossfading", # n_prompt