Spaces:
Running
Running
Use the parameter End stillness
Browse files
app.py
CHANGED
@@ -919,7 +919,7 @@ def worker_start_end(input_image, end_image, image_position, prompts, n_prompt,
|
|
919 |
|
920 |
# 20250506 pftq: Modified worker to accept video input and clean frame count
|
921 |
@torch.no_grad()
|
922 |
-
def worker_video(input_video, end_frame, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
|
923 |
def encode_prompt(prompt, n_prompt):
|
924 |
llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
|
925 |
|
@@ -994,7 +994,7 @@ def worker_video(input_video, end_frame, prompts, n_prompt, seed, batch, resolut
|
|
994 |
image_encoder=image_encoder, feature_extractor=feature_extractor, device=gpu
|
995 |
)[:2]
|
996 |
end_latent = end_latent.to(dtype=torch.float32, device=cpu)
|
997 |
-
end_latent = end_latent.expand(-1, -1,
|
998 |
else:
|
999 |
end_latent = end_clip_embedding = None
|
1000 |
|
@@ -1029,7 +1029,7 @@ def worker_video(input_video, end_frame, prompts, n_prompt, seed, batch, resolut
|
|
1029 |
def callback(d):
|
1030 |
return
|
1031 |
|
1032 |
-
def compute_latent(history_latents, latent_window_size, latent_padding_size, num_clean_frames, start_latent, end_latent):
|
1033 |
# 20250506 pftq: Use user-specified number of context frames, matching original allocation for num_clean_frames=2
|
1034 |
available_frames = history_latents.shape[2] # Number of latent frames
|
1035 |
max_pixel_frames = min(latent_window_size * 4 - 3, available_frames * 4) # Cap at available pixel frames
|
@@ -1044,9 +1044,9 @@ def worker_video(input_video, end_frame, prompts, n_prompt, seed, batch, resolut
|
|
1044 |
total_context_frames = min(total_context_frames, available_frames) # 20250507 pftq: Edge case for <=1 sec videos
|
1045 |
|
1046 |
post_frames = 100 # Single frame for end_latent, otherwise padding causes still image
|
1047 |
-
indices = torch.arange(0, 1 + num_4x_frames + num_2x_frames + effective_clean_frames + adjusted_latent_frames + ((latent_padding_size +
|
1048 |
clean_latent_indices_start, clean_latent_4x_indices, clean_latent_2x_indices, clean_latent_1x_indices, latent_indices, blank_indices, clean_latent_indices_post = indices.split(
|
1049 |
-
[1, num_4x_frames, num_2x_frames, effective_clean_frames, adjusted_latent_frames, latent_padding_size if end_latent is not None else 0,
|
1050 |
)
|
1051 |
clean_latent_indices = torch.cat([clean_latent_indices_start, clean_latent_1x_indices, clean_latent_indices_post], dim=1)
|
1052 |
|
@@ -1137,7 +1137,7 @@ def worker_video(input_video, end_frame, prompts, n_prompt, seed, batch, resolut
|
|
1137 |
else:
|
1138 |
transformer.initialize_teacache(enable_teacache=False)
|
1139 |
|
1140 |
-
[max_frames, clean_latents, clean_latents_2x, clean_latents_4x, latent_indices, clean_latents, clean_latent_indices, clean_latent_2x_indices, clean_latent_4x_indices] = compute_latent(history_latents, latent_window_size, latent_padding_size, num_clean_frames, start_latent, end_latent)
|
1141 |
|
1142 |
generated_latents = sample_hunyuan(
|
1143 |
transformer=transformer,
|
@@ -1348,7 +1348,7 @@ def process_video_on_gpu(input_video, end_frame, end_stillness, prompts, n_promp
|
|
1348 |
stream = AsyncStream()
|
1349 |
|
1350 |
# 20250506 pftq: Pass num_clean_frames, vae_batch, etc
|
1351 |
-
async_run(worker_video, input_video, end_frame, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch)
|
1352 |
|
1353 |
output_filename = None
|
1354 |
|
|
|
919 |
|
920 |
# 20250506 pftq: Modified worker to accept video input and clean frame count
|
921 |
@torch.no_grad()
|
922 |
+
def worker_video(input_video, end_frame, end_stillness, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch):
|
923 |
def encode_prompt(prompt, n_prompt):
|
924 |
llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
|
925 |
|
|
|
994 |
image_encoder=image_encoder, feature_extractor=feature_extractor, device=gpu
|
995 |
)[:2]
|
996 |
end_latent = end_latent.to(dtype=torch.float32, device=cpu)
|
997 |
+
end_latent = end_latent.expand(-1, -1, 1 + end_stillness, -1, -1)
|
998 |
else:
|
999 |
end_latent = end_clip_embedding = None
|
1000 |
|
|
|
1029 |
def callback(d):
|
1030 |
return
|
1031 |
|
1032 |
+
def compute_latent(history_latents, latent_window_size, latent_padding_size, num_clean_frames, start_latent, end_latent, end_stillness):
|
1033 |
# 20250506 pftq: Use user-specified number of context frames, matching original allocation for num_clean_frames=2
|
1034 |
available_frames = history_latents.shape[2] # Number of latent frames
|
1035 |
max_pixel_frames = min(latent_window_size * 4 - 3, available_frames * 4) # Cap at available pixel frames
|
|
|
1044 |
total_context_frames = min(total_context_frames, available_frames) # 20250507 pftq: Edge case for <=1 sec videos
|
1045 |
|
1046 |
post_frames = 100 # Single frame for end_latent, otherwise padding causes still image
|
1047 |
+
indices = torch.arange(0, 1 + num_4x_frames + num_2x_frames + effective_clean_frames + adjusted_latent_frames + ((latent_padding_size + 1 + end_stillness) if end_latent is not None else 0)).unsqueeze(0) # 20250507 pftq: latent_window_size to adjusted_latent_frames for edge case for <=1 sec videos
|
1048 |
clean_latent_indices_start, clean_latent_4x_indices, clean_latent_2x_indices, clean_latent_1x_indices, latent_indices, blank_indices, clean_latent_indices_post = indices.split(
|
1049 |
+
[1, num_4x_frames, num_2x_frames, effective_clean_frames, adjusted_latent_frames, latent_padding_size if end_latent is not None else 0, (1 + end_stillness) if end_latent is not None else 0], dim=1 # 20250507 pftq: latent_window_size to adjusted_latent_frames for edge case for <=1 sec videos
|
1050 |
)
|
1051 |
clean_latent_indices = torch.cat([clean_latent_indices_start, clean_latent_1x_indices, clean_latent_indices_post], dim=1)
|
1052 |
|
|
|
1137 |
else:
|
1138 |
transformer.initialize_teacache(enable_teacache=False)
|
1139 |
|
1140 |
+
[max_frames, clean_latents, clean_latents_2x, clean_latents_4x, latent_indices, clean_latents, clean_latent_indices, clean_latent_2x_indices, clean_latent_4x_indices] = compute_latent(history_latents, latent_window_size, latent_padding_size, num_clean_frames, start_latent, end_latent, end_stillness)
|
1141 |
|
1142 |
generated_latents = sample_hunyuan(
|
1143 |
transformer=transformer,
|
|
|
1348 |
stream = AsyncStream()
|
1349 |
|
1350 |
# 20250506 pftq: Pass num_clean_frames, vae_batch, etc
|
1351 |
+
async_run(worker_video, input_video, end_frame, end_stillness, prompts, n_prompt, seed, batch, resolution, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, enable_preview, use_teacache, no_resize, mp4_crf, num_clean_frames, vae_batch)
|
1352 |
|
1353 |
output_filename = None
|
1354 |
|