Spaces:

alexnasa
/

SuperResolution

Running on Zero

App Files Files Community

alexnasa commited on Jul 12

Commit

f4b4884

verified ·

1 Parent(s): c8b1497

Update pipelines/pipeline_seesr.py

Browse files

Files changed (1) hide show

pipelines/pipeline_seesr.py +38 -57

pipelines/pipeline_seesr.py CHANGED Viewed

@@ -99,7 +99,9 @@ EXAMPLE_DOC_STRING = """
 def kde_grad(x0: torch.Tensor, patch_size = 16, bandwidth = 0.1):
     # x0: (N, C, H, W) in float32
     N, C, H, W = x0.shape
-    patches = unfold(x0, kernel_size=patch_size, stride=patch_size) # (N, C*ps*ps, M)
     P, M = patches.shape[1], patches.shape[2]
     p_i = patches.unsqueeze(1)  # (N,1,P,M)
     p_j = patches.unsqueeze(0)  # (1,N,P,M)
@@ -111,15 +113,13 @@ def kde_grad(x0: torch.Tensor, patch_size = 16, bandwidth = 0.1):
     num = (w.unsqueeze(2) * diff).sum(dim=1)          # (N,P,M)
     denom = w.sum(dim=1, keepdim=True) + 1e-8         # (N,1,M)
     mshift = num / denom                             # (N,P,M)
     # fold back
     grad = fold(
-         mshift / bandwidth**2,
-         output_size=(H, W),
-         kernel_size=patch_size,
-         stride=patch_size
-    )                                           # (N, C, H, W)
     return grad
 class StableDiffusionControlNetPipeline(DiffusionPipeline, TextualInversionLoaderMixin):
@@ -835,8 +835,8 @@ class StableDiffusionControlNetPipeline(DiffusionPipeline, TextualInversionLoade
         num_particles: Optional[int] = 4,
         gamma_0: Optional[float] = 0.1,  # base steering strength
         use_KDS = True,
-        bandwidth = 0.1,
         patch_size = 16,
         args=None,
     ):
         r"""
@@ -1050,9 +1050,9 @@ class StableDiffusionControlNetPipeline(DiffusionPipeline, TextualInversionLoade
             for i, t in enumerate(timesteps):
                 with torch.no_grad():
                     # pass, if the timestep is larger than start_steps
-                    # if t > start_steps:
-                    #     print(f'pass {t} steps.')
-                    #     continue
                     # expand the latents if we are doing classifier free guidance
                     latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
@@ -1189,7 +1189,7 @@ class StableDiffusionControlNetPipeline(DiffusionPipeline, TextualInversionLoade
                                     cond_list = []
                                     img_list = []
-                                    noise_preds.append(model_out)
                         # Stitch noise predictions for all tiles
                         noise_pred = torch.zeros(latent_model_input.shape, device=latent_model_input.device)
@@ -1226,69 +1226,50 @@ class StableDiffusionControlNetPipeline(DiffusionPipeline, TextualInversionLoade
                 if use_KDS:
-                    # 2) Compute x₀ prediction for all particles
                     beta_t   = 1 - self.scheduler.alphas_cumprod[t]
                     alpha_t  = self.scheduler.alphas_cumprod[t].sqrt()
                     sigma_t  = beta_t.sqrt()
-                    x0_pred  = (latents - sigma_t * noise_pred) / alpha_t  # shape [2N, C, H, W]
-                    # — split into unconditional vs. conditional
-                    x0_uncond, x0_cond = x0_pred.chunk(2, dim=0)           # each [N, C, H, W]
-                    # 3) Apply KDE steering *only* on the conditional batch
-                    m_shift_cond = kde_grad(x0_cond, patch_size=patch_size, bandwidth=bandwidth)  # [N, C, H, W]
-                    delta_t      = gamma_0 * (1 - i / (len(timesteps) - 1))
-                    x0_cond_steer = x0_cond + delta_t * m_shift_cond      # steered conditional
-                    # 4) Recombine the latents: leave uncond untouched, use steered cond
-                    x0_steer = torch.cat([x0_uncond, x0_cond_steer], dim=0)  # [2N, C, H, W]
-                    # 5) Recompute “noise” for DDIM step
                     noise_pred_kds = (latents - alpha_t * x0_steer) / sigma_t
-                    # 6) Determine prev alphas and form next latent per DDIM
                     if i < len(timesteps) - 1:
-                        next_t = timesteps[i + 1]
-                        alpha_prev = self.scheduler.alphas_cumprod[next_t].sqrt()
                     else:
-                        alpha_prev = self.scheduler.final_alpha_cumprod.sqrt()
                     sigma_prev = (1 - alpha_prev**2).sqrt()
                     latents = (
-                        alpha_prev * x0_steer
-                        + sigma_prev * noise_pred_kds
                     ).detach().requires_grad_(True)
                 else:
                     # compute the previous noisy sample x_t -> x_t-1
                     latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
-        with torch.no_grad():
-            if use_KDS:
-                # Final-latent selection (once!)
-                # latents shape: [2*N, C, H, W]
-                uncond_latents, cond_latents = latents.chunk(2, dim=0)  # each [N, C, H, W]
-                # 1) ensemble mean
-                mean_cond = cond_latents.mean(dim=0, keepdim=True)      # [1, C, H, W]
-                # 2) distances
-                dists = ((cond_latents - mean_cond)
-                          .view(cond_latents.size(0), -1)
-                          .pow(2)
-                          .sum(dim=1))                             # [N]
-                # 3) best index
-                best_idx = dists.argmin().item()
-                # 4) select that latent (and its uncond pair)
-                best_uncond = uncond_latents[best_idx:best_idx+1]
-                best_cond   = cond_latents  [best_idx:best_idx+1]
-                latents     = torch.cat([best_uncond, best_cond], dim=0)  # [2, C, H, W]
-            # call the callback, if provided
-            if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
-                progress_bar.update()
-                if callback is not None and i % callback_steps == 0:
-                    callback(i, t, latents)
             # If we do sequential model offloading, let's offload unet and controlnet
             # manually for max memory savings
             if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:

 def kde_grad(x0: torch.Tensor, patch_size = 16, bandwidth = 0.1):
     # x0: (N, C, H, W) in float32
     N, C, H, W = x0.shape
+    patches = unfold(
+        x0, kernel_size=patch_size, stride=patch_size//2
+    )                           # (N, C*ps*ps, M)
     P, M = patches.shape[1], patches.shape[2]
     p_i = patches.unsqueeze(1)  # (N,1,P,M)
     p_j = patches.unsqueeze(0)  # (1,N,P,M)
     num = (w.unsqueeze(2) * diff).sum(dim=1)          # (N,P,M)
     denom = w.sum(dim=1, keepdim=True) + 1e-8         # (N,1,M)
     mshift = num / denom                             # (N,P,M)
     # fold back
     grad = fold(
+        mshift / bandwidth**2,
+        output_size=(H, W),
+        kernel_size=patch_size,
+        stride=patch_size//2
+    )                                                # (N, C, H, W)
     return grad
 class StableDiffusionControlNetPipeline(DiffusionPipeline, TextualInversionLoaderMixin):
         num_particles: Optional[int] = 4,
         gamma_0: Optional[float] = 0.1,  # base steering strength
         use_KDS = True,
         patch_size = 16,
+        bandwidth = 0.1,
         args=None,
     ):
         r"""
             for i, t in enumerate(timesteps):
                 with torch.no_grad():
                     # pass, if the timestep is larger than start_steps
+                    if t > start_steps:
+                        print(f'pass {t} steps.')
+                        continue
                     # expand the latents if we are doing classifier free guidance
                     latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
                                     cond_list = []
                                     img_list = []
+                                noise_preds.append(model_out)
                         # Stitch noise predictions for all tiles
                         noise_pred = torch.zeros(latent_model_input.shape, device=latent_model_input.device)
                 if use_KDS:
+                    # 2) Compute x₀ prediction
                     beta_t   = 1 - self.scheduler.alphas_cumprod[t]
                     alpha_t  = self.scheduler.alphas_cumprod[t].sqrt()
                     sigma_t  = beta_t.sqrt()
+                    x0_pred  = (latents - sigma_t * noise_pred) / alpha_t
+                    # 3) Apply KDE steering
+                    m_shift   = kde_grad(x0_pred, patch_size=patch_size, bandwidth=bandwidth)
+                    delta_t   = gamma_0 * (1 - i / (len(timesteps) - 1))
+                    x0_steer  = x0_pred + delta_t * m_shift
+                    # frac    = i / (len(timesteps) - 1)
+                    # delta_t = 0.0 if frac < 0.3 else 0.3
+                    # x0_steer = x0_pred + delta_t * gamma_0 * m_shift
+                    # 4) Recompute “noise” for DDIM step
                     noise_pred_kds = (latents - alpha_t * x0_steer) / sigma_t
+                    # 5) Determine prev alphas
                     if i < len(timesteps) - 1:
+                      next_t = timesteps[i + 1]
+                      alpha_prev = self.scheduler.alphas_cumprod[next_t].sqrt()
                     else:
+                      alpha_prev = self.scheduler.final_alpha_cumprod.sqrt()
                     sigma_prev = (1 - alpha_prev**2).sqrt()
+                    # 6) Form next latent per DDIM
                     latents = (
+                      alpha_prev * x0_steer
+                      + sigma_prev * noise_pred_kds
                     ).detach().requires_grad_(True)
                 else:
                     # compute the previous noisy sample x_t -> x_t-1
                     latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+                # call the callback, if provided
+                if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+                    progress_bar.update()
+                    if callback is not None and i % callback_steps == 0:
+                        callback(i, t, latents)
+        with torch.no_grad():
             # If we do sequential model offloading, let's offload unet and controlnet
             # manually for max memory savings
             if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: