Spaces:

Vedansh-7
/

Diffusion-unet-xray

Running

App Files Files Community

Vedansh-7 commited on 23 days ago

Commit

ff9cac6

1 Parent(s): dd9af11

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -79

app.py CHANGED Viewed

@@ -125,99 +125,91 @@ class DiffusionModel(nn.Module):
         self.model = model
         self.timesteps = timesteps
-        # More conservative noise schedule
-        scale = 1000 / timesteps
         beta_start = 0.0001
         beta_end = 0.02
-        self.betas = torch.linspace(beta_start, beta_end, timesteps, dtype=torch.float32)**1.5
         self.alphas = 1. - self.betas
         self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
         self.register_buffer('sqrt_alphas_cumprod', torch.sqrt(self.alphas_cumprod))
         self.register_buffer('sqrt_one_minus_alphas_cumprod', torch.sqrt(1. - self.alphas_cumprod))
-@torch.no_grad()
-def sample(self, num_images, timesteps, img_size, num_classes, labels, device, progress_callback=None):
-    # Initialize with reduced noise scale
-    x_t = torch.randn((num_images, 3, img_size, img_size), device=device) * 0.7
-    # Convert labels if needed
-    if labels.ndim == 1:
-        labels_one_hot = torch.zeros(num_images, num_classes, device=device)
-        labels_one_hot[torch.arange(num_images), labels] = 1
-        labels = labels_one_hot
-    for t in reversed(range(timesteps)):
-        if cancel_event.is_set():
-            return None
-        t_tensor = torch.full((num_images,), t, device=device, dtype=torch.long)
-        # Predict noise with model
-        pred_noise = self.model(x_t, labels, t_tensor.float())
-        # Get current alpha values
-        alpha_t = self.alphas[t]
-        alpha_bar_t = self.alphas_cumprod[t]
-        alpha_bar_t_prev = self.alphas_cumprod[t-1] if t > 0 else torch.tensor(1.0)
-        # Calculate predicted x0 with more stable equations
-        pred_x0 = (x_t - torch.sqrt(1 - alpha_bar_t) * pred_noise) / torch.sqrt(alpha_bar_t)
-        # Direction pointing to x_t with reduced noise impact
-        pred_dir = torch.sqrt(1 - alpha_bar_t_prev) * pred_noise
-        # Dynamic noise scaling based on timestep
-        if t > 0:
-            noise_scale = 0.3 * (t / timesteps)  # Reduce noise as we get closer to final image
-            noise = torch.randn_like(x_t) * noise_scale
-        else:
-            noise = torch.zeros_like(x_t)
-        # Update x_t with more stable combination
-        x_t = torch.sqrt(alpha_bar_t_prev) * pred_x0 + pred_dir + noise
-        # Progress callback
-        if progress_callback:
-            progress_callback((timesteps - t) / timesteps)
-    # Enhanced normalization with contrast adjustment
-    x_t = torch.clamp(x_t, -1, 1)
-    x_t = (x_t + 1) / 2  # Scale to [0,1]
-    # Post-processing directly in the tensor
-    x_t = self._post_process(x_t)
-    return x_t
-def _post_process(self, image_tensor):
-    """Apply simple post-processing to reduce noise"""
-    # Contrast adjustment
-    mean_val = image_tensor.mean()
-    image_tensor = (image_tensor - mean_val) * 1.2 + mean_val
-    # Mild Gaussian blur (implemented as depthwise convolution)
-    if hasattr(self, '_blur_kernel'):
-        blur_kernel = self._blur_kernel.to(image_tensor.device)
-    else:
-        blur_kernel = torch.tensor([
-            [0.05, 0.1, 0.05],
-            [0.1, 0.4, 0.1],
-            [0.05, 0.1, 0.05]
-        ], dtype=torch.float32).view(1, 1, 3, 3).repeat(3, 1, 1, 1)
-        self._blur_kernel = blur_kernel
-    # Apply blur to each channel
-    padding = (1, 1, 1, 1)
-    image_tensor = torch.nn.functional.conv2d(
-        image_tensor.permute(0, 3, 1, 2),  # NHWC to NCHW
-        blur_kernel,
-        padding=1,
-        groups=3
-    ).permute(0, 2, 3, 1)  # Back to NHWC
-    return torch.clamp(image_tensor, 0, 1)
 def load_model(model_path, device):
     unet = UNet(num_classes=NUM_CLASSES).to(device)
@@ -295,7 +287,6 @@ def generate_images(label_str, num_images, progress=gr.Progress()):
         with torch.no_grad():
             images = loaded_model.sample(
             num_images=num_images,
-            timesteps=int(TIMESTEPS * 1.5),  # More timesteps for cleaner images
             img_size=IMG_SIZE,
             num_classes=NUM_CLASSES,
             labels=labels,

         self.model = model
         self.timesteps = timesteps
+        # Improved noise schedule
         beta_start = 0.0001
         beta_end = 0.02
+        self.betas = torch.linspace(beta_start, beta_end, timesteps, dtype=torch.float32)
         self.alphas = 1. - self.betas
         self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
+        # Pre-calculate values for sampling
         self.register_buffer('sqrt_alphas_cumprod', torch.sqrt(self.alphas_cumprod))
         self.register_buffer('sqrt_one_minus_alphas_cumprod', torch.sqrt(1. - self.alphas_cumprod))
+        self.register_buffer('sqrt_recip_alphas', torch.sqrt(1. / self.alphas))
+        # Calculations for posterior q(x_{t-1} | x_t, x_0)
+        posterior_variance = self.betas * (1. - self.alphas_cumprod[:-1]) / (1. - self.alphas_cumprod[1:])
+        self.register_buffer('posterior_variance', posterior_variance)
+        # Blur kernel for post-processing
+        self.register_buffer('blur_kernel', torch.tensor([
+            [0.05, 0.1, 0.05],
+            [0.1, 0.4, 0.1],
+            [0.05, 0.1, 0.05]
+        ], dtype=torch.float32).view(1, 1, 3, 3).repeat(3, 1, 1, 1))
+    @torch.no_grad()
+    def sample(self, num_images, img_size, num_classes, labels, device, progress_callback=None):
+        """Generate samples from the model"""
+        shape = (num_images, 3, img_size, img_size)
+        x_t = torch.randn(shape, device=device) * 0.7  # Slightly reduced initial noise
+        if labels.ndim == 1:
+            labels = torch.zeros(num_images, num_classes, device=device).scatter_(1, labels.unsqueeze(1), 1)
+        for t in reversed(range(self.timesteps)):
+            if cancel_event.is_set():
+                return None
+            t_batch = torch.full((num_images,), t, device=device, dtype=torch.long)
+            pred_noise = self.model(x_t, labels, t_batch.float())
+            alpha_bar_t = self.alphas_cumprod[t]
+            alpha_bar_t_prev = self.alphas_cumprod[t-1] if t > 0 else torch.tensor(1.0)
+            # Calculate predicted x0
+            pred_x0 = (x_t - torch.sqrt(1 - alpha_bar_t) * pred_noise) / torch.sqrt(alpha_bar_t)
+            # Calculate direction pointing to x_t
+            pred_dir = torch.sqrt(1 - alpha_bar_t_prev) * pred_noise
+            # Dynamic noise scaling
+            if t > 0:
+                noise_scale = 0.3 * (t / self.timesteps)
+                noise = torch.randn_like(x_t) * noise_scale
+            else:
+                noise = torch.zeros_like(x_t)
+            # Update x_t
+            x_t = torch.sqrt(alpha_bar_t_prev) * pred_x0 + pred_dir + noise
+            if progress_callback:
+                progress_callback((self.timesteps - t) / self.timesteps)
+        # Post-processing
+        x_t = self._post_process(x_t)
+        return x_t
+    def _post_process(self, images):
+        """Apply post-processing to reduce noise and enhance contrast"""
+        # Normalize to [0,1]
+        images = torch.clamp(images, -1, 1)
+        images = (images + 1) / 2
+        # Apply mild blur (convert NHWC to NCHW for conv2d)
+        if images.dim() == 4 and images.shape[-1] != 3:  # NCHW format
+            images = images.permute(0, 2, 3, 1)
+        x = images.permute(0, 3, 1, 2)  # NHWC to NCHW
+        x = torch.nn.functional.conv2d(x, self.blur_kernel, padding=1, groups=3)
+        images = x.permute(0, 2, 3, 1)  # NCHW to NHWC
+        # Contrast adjustment
+        mean_val = images.mean(dim=(1,2,3), keepdim=True)
+        images = (images - mean_val) * 1.2 + mean_val
+        return torch.clamp(images, 0, 1)
 def load_model(model_path, device):
     unet = UNet(num_classes=NUM_CLASSES).to(device)
         with torch.no_grad():
             images = loaded_model.sample(
             num_images=num_images,
             img_size=IMG_SIZE,
             num_classes=NUM_CLASSES,
             labels=labels,