Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Upload k_diffusion_hunyuan.py
Browse files
    	
        diffusers_helper/pipelines/k_diffusion_hunyuan.py
    ADDED
    
    | @@ -0,0 +1,120 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import torch
         | 
| 2 | 
            +
            import math
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            from diffusers_helper.k_diffusion.uni_pc_fm import sample_unipc
         | 
| 5 | 
            +
            from diffusers_helper.k_diffusion.wrapper import fm_wrapper
         | 
| 6 | 
            +
            from diffusers_helper.utils import repeat_to_batch_size
         | 
| 7 | 
            +
             | 
| 8 | 
            +
             | 
| 9 | 
            +
            def flux_time_shift(t, mu=1.15, sigma=1.0):
         | 
| 10 | 
            +
                return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma)
         | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
            def calculate_flux_mu(context_length, x1=256, y1=0.5, x2=4096, y2=1.15, exp_max=7.0):
         | 
| 14 | 
            +
                k = (y2 - y1) / (x2 - x1)
         | 
| 15 | 
            +
                b = y1 - k * x1
         | 
| 16 | 
            +
                mu = k * context_length + b
         | 
| 17 | 
            +
                mu = min(mu, math.log(exp_max))
         | 
| 18 | 
            +
                return mu
         | 
| 19 | 
            +
             | 
| 20 | 
            +
             | 
| 21 | 
            +
            def get_flux_sigmas_from_mu(n, mu):
         | 
| 22 | 
            +
                sigmas = torch.linspace(1, 0, steps=n + 1)
         | 
| 23 | 
            +
                sigmas = flux_time_shift(sigmas, mu=mu)
         | 
| 24 | 
            +
                return sigmas
         | 
| 25 | 
            +
             | 
| 26 | 
            +
             | 
| 27 | 
            +
            @torch.inference_mode()
         | 
| 28 | 
            +
            def sample_hunyuan(
         | 
| 29 | 
            +
                    transformer,
         | 
| 30 | 
            +
                    sampler='unipc',
         | 
| 31 | 
            +
                    initial_latent=None,
         | 
| 32 | 
            +
                    concat_latent=None,
         | 
| 33 | 
            +
                    strength=1.0,
         | 
| 34 | 
            +
                    width=512,
         | 
| 35 | 
            +
                    height=512,
         | 
| 36 | 
            +
                    frames=16,
         | 
| 37 | 
            +
                    real_guidance_scale=1.0,
         | 
| 38 | 
            +
                    distilled_guidance_scale=6.0,
         | 
| 39 | 
            +
                    guidance_rescale=0.0,
         | 
| 40 | 
            +
                    shift=None,
         | 
| 41 | 
            +
                    num_inference_steps=25,
         | 
| 42 | 
            +
                    batch_size=None,
         | 
| 43 | 
            +
                    generator=None,
         | 
| 44 | 
            +
                    prompt_embeds=None,
         | 
| 45 | 
            +
                    prompt_embeds_mask=None,
         | 
| 46 | 
            +
                    prompt_poolers=None,
         | 
| 47 | 
            +
                    negative_prompt_embeds=None,
         | 
| 48 | 
            +
                    negative_prompt_embeds_mask=None,
         | 
| 49 | 
            +
                    negative_prompt_poolers=None,
         | 
| 50 | 
            +
                    dtype=torch.bfloat16,
         | 
| 51 | 
            +
                    device=None,
         | 
| 52 | 
            +
                    negative_kwargs=None,
         | 
| 53 | 
            +
                    callback=None,
         | 
| 54 | 
            +
                    **kwargs,
         | 
| 55 | 
            +
            ):
         | 
| 56 | 
            +
                device = device or transformer.device
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                if batch_size is None:
         | 
| 59 | 
            +
                    batch_size = int(prompt_embeds.shape[0])
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                latents = torch.randn((batch_size, 16, (frames + 3) // 4, height // 8, width // 8), generator=generator, device=generator.device).to(device=device, dtype=torch.float32)
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                B, C, T, H, W = latents.shape
         | 
| 64 | 
            +
                seq_length = T * H * W // 4
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                if shift is None:
         | 
| 67 | 
            +
                    mu = calculate_flux_mu(seq_length, exp_max=7.0)
         | 
| 68 | 
            +
                else:
         | 
| 69 | 
            +
                    mu = math.log(shift)
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                sigmas = get_flux_sigmas_from_mu(num_inference_steps, mu).to(device)
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                k_model = fm_wrapper(transformer)
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                if initial_latent is not None:
         | 
| 76 | 
            +
                    sigmas = sigmas * strength
         | 
| 77 | 
            +
                    first_sigma = sigmas[0].to(device=device, dtype=torch.float32)
         | 
| 78 | 
            +
                    initial_latent = initial_latent.to(device=device, dtype=torch.float32)
         | 
| 79 | 
            +
                    latents = initial_latent.float() * (1.0 - first_sigma) + latents.float() * first_sigma
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                if concat_latent is not None:
         | 
| 82 | 
            +
                    concat_latent = concat_latent.to(latents)
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                distilled_guidance = torch.tensor([distilled_guidance_scale * 1000.0] * batch_size).to(device=device, dtype=dtype)
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                prompt_embeds = repeat_to_batch_size(prompt_embeds, batch_size)
         | 
| 87 | 
            +
                prompt_embeds_mask = repeat_to_batch_size(prompt_embeds_mask, batch_size)
         | 
| 88 | 
            +
                prompt_poolers = repeat_to_batch_size(prompt_poolers, batch_size)
         | 
| 89 | 
            +
                negative_prompt_embeds = repeat_to_batch_size(negative_prompt_embeds, batch_size)
         | 
| 90 | 
            +
                negative_prompt_embeds_mask = repeat_to_batch_size(negative_prompt_embeds_mask, batch_size)
         | 
| 91 | 
            +
                negative_prompt_poolers = repeat_to_batch_size(negative_prompt_poolers, batch_size)
         | 
| 92 | 
            +
                concat_latent = repeat_to_batch_size(concat_latent, batch_size)
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                sampler_kwargs = dict(
         | 
| 95 | 
            +
                    dtype=dtype,
         | 
| 96 | 
            +
                    cfg_scale=real_guidance_scale,
         | 
| 97 | 
            +
                    cfg_rescale=guidance_rescale,
         | 
| 98 | 
            +
                    concat_latent=concat_latent,
         | 
| 99 | 
            +
                    positive=dict(
         | 
| 100 | 
            +
                        pooled_projections=prompt_poolers,
         | 
| 101 | 
            +
                        encoder_hidden_states=prompt_embeds,
         | 
| 102 | 
            +
                        encoder_attention_mask=prompt_embeds_mask,
         | 
| 103 | 
            +
                        guidance=distilled_guidance,
         | 
| 104 | 
            +
                        **kwargs,
         | 
| 105 | 
            +
                    ),
         | 
| 106 | 
            +
                    negative=dict(
         | 
| 107 | 
            +
                        pooled_projections=negative_prompt_poolers,
         | 
| 108 | 
            +
                        encoder_hidden_states=negative_prompt_embeds,
         | 
| 109 | 
            +
                        encoder_attention_mask=negative_prompt_embeds_mask,
         | 
| 110 | 
            +
                        guidance=distilled_guidance,
         | 
| 111 | 
            +
                        **(kwargs if negative_kwargs is None else {**kwargs, **negative_kwargs}),
         | 
| 112 | 
            +
                    )
         | 
| 113 | 
            +
                )
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                if sampler == 'unipc':
         | 
| 116 | 
            +
                    results = sample_unipc(k_model, latents, sigmas, extra_args=sampler_kwargs, disable=False, callback=callback)
         | 
| 117 | 
            +
                else:
         | 
| 118 | 
            +
                    raise NotImplementedError(f'Sampler {sampler} is not supported.')
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                return results
         | 
 
			
