openfree commited on
Commit
30c6c6a
·
verified ·
1 Parent(s): da27b1b

Upload k_diffusion_hunyuan.py

Browse files
diffusers_helper/pipelines/k_diffusion_hunyuan.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import math
3
+
4
+ from diffusers_helper.k_diffusion.uni_pc_fm import sample_unipc
5
+ from diffusers_helper.k_diffusion.wrapper import fm_wrapper
6
+ from diffusers_helper.utils import repeat_to_batch_size
7
+
8
+
9
+ def flux_time_shift(t, mu=1.15, sigma=1.0):
10
+ return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma)
11
+
12
+
13
+ def calculate_flux_mu(context_length, x1=256, y1=0.5, x2=4096, y2=1.15, exp_max=7.0):
14
+ k = (y2 - y1) / (x2 - x1)
15
+ b = y1 - k * x1
16
+ mu = k * context_length + b
17
+ mu = min(mu, math.log(exp_max))
18
+ return mu
19
+
20
+
21
+ def get_flux_sigmas_from_mu(n, mu):
22
+ sigmas = torch.linspace(1, 0, steps=n + 1)
23
+ sigmas = flux_time_shift(sigmas, mu=mu)
24
+ return sigmas
25
+
26
+
27
+ @torch.inference_mode()
28
+ def sample_hunyuan(
29
+ transformer,
30
+ sampler='unipc',
31
+ initial_latent=None,
32
+ concat_latent=None,
33
+ strength=1.0,
34
+ width=512,
35
+ height=512,
36
+ frames=16,
37
+ real_guidance_scale=1.0,
38
+ distilled_guidance_scale=6.0,
39
+ guidance_rescale=0.0,
40
+ shift=None,
41
+ num_inference_steps=25,
42
+ batch_size=None,
43
+ generator=None,
44
+ prompt_embeds=None,
45
+ prompt_embeds_mask=None,
46
+ prompt_poolers=None,
47
+ negative_prompt_embeds=None,
48
+ negative_prompt_embeds_mask=None,
49
+ negative_prompt_poolers=None,
50
+ dtype=torch.bfloat16,
51
+ device=None,
52
+ negative_kwargs=None,
53
+ callback=None,
54
+ **kwargs,
55
+ ):
56
+ device = device or transformer.device
57
+
58
+ if batch_size is None:
59
+ batch_size = int(prompt_embeds.shape[0])
60
+
61
+ latents = torch.randn((batch_size, 16, (frames + 3) // 4, height // 8, width // 8), generator=generator, device=generator.device).to(device=device, dtype=torch.float32)
62
+
63
+ B, C, T, H, W = latents.shape
64
+ seq_length = T * H * W // 4
65
+
66
+ if shift is None:
67
+ mu = calculate_flux_mu(seq_length, exp_max=7.0)
68
+ else:
69
+ mu = math.log(shift)
70
+
71
+ sigmas = get_flux_sigmas_from_mu(num_inference_steps, mu).to(device)
72
+
73
+ k_model = fm_wrapper(transformer)
74
+
75
+ if initial_latent is not None:
76
+ sigmas = sigmas * strength
77
+ first_sigma = sigmas[0].to(device=device, dtype=torch.float32)
78
+ initial_latent = initial_latent.to(device=device, dtype=torch.float32)
79
+ latents = initial_latent.float() * (1.0 - first_sigma) + latents.float() * first_sigma
80
+
81
+ if concat_latent is not None:
82
+ concat_latent = concat_latent.to(latents)
83
+
84
+ distilled_guidance = torch.tensor([distilled_guidance_scale * 1000.0] * batch_size).to(device=device, dtype=dtype)
85
+
86
+ prompt_embeds = repeat_to_batch_size(prompt_embeds, batch_size)
87
+ prompt_embeds_mask = repeat_to_batch_size(prompt_embeds_mask, batch_size)
88
+ prompt_poolers = repeat_to_batch_size(prompt_poolers, batch_size)
89
+ negative_prompt_embeds = repeat_to_batch_size(negative_prompt_embeds, batch_size)
90
+ negative_prompt_embeds_mask = repeat_to_batch_size(negative_prompt_embeds_mask, batch_size)
91
+ negative_prompt_poolers = repeat_to_batch_size(negative_prompt_poolers, batch_size)
92
+ concat_latent = repeat_to_batch_size(concat_latent, batch_size)
93
+
94
+ sampler_kwargs = dict(
95
+ dtype=dtype,
96
+ cfg_scale=real_guidance_scale,
97
+ cfg_rescale=guidance_rescale,
98
+ concat_latent=concat_latent,
99
+ positive=dict(
100
+ pooled_projections=prompt_poolers,
101
+ encoder_hidden_states=prompt_embeds,
102
+ encoder_attention_mask=prompt_embeds_mask,
103
+ guidance=distilled_guidance,
104
+ **kwargs,
105
+ ),
106
+ negative=dict(
107
+ pooled_projections=negative_prompt_poolers,
108
+ encoder_hidden_states=negative_prompt_embeds,
109
+ encoder_attention_mask=negative_prompt_embeds_mask,
110
+ guidance=distilled_guidance,
111
+ **(kwargs if negative_kwargs is None else {**kwargs, **negative_kwargs}),
112
+ )
113
+ )
114
+
115
+ if sampler == 'unipc':
116
+ results = sample_unipc(k_model, latents, sigmas, extra_args=sampler_kwargs, disable=False, callback=callback)
117
+ else:
118
+ raise NotImplementedError(f'Sampler {sampler} is not supported.')
119
+
120
+ return results