Spaces:

Vedansh-7
/

Diffusion-unet-xray

Running

App Files Files Community

Vedansh-7 commited on 24 days ago

Commit

dd9af11

1 Parent(s): 9378bae

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -67

app.py CHANGED Viewed

@@ -127,74 +127,98 @@ class DiffusionModel(nn.Module):
         # More conservative noise schedule
         scale = 1000 / timesteps
-        beta_start = scale * 0.0001
-        beta_end = scale * 0.02
-        self.betas = torch.linspace(beta_start, beta_end, timesteps, dtype=torch.float32)
         self.alphas = 1. - self.betas
         self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
         self.register_buffer('sqrt_alphas_cumprod', torch.sqrt(self.alphas_cumprod))
         self.register_buffer('sqrt_one_minus_alphas_cumprod', torch.sqrt(1. - self.alphas_cumprod))
-    @torch.no_grad()
-    def sample(self, num_images, timesteps, img_size, num_classes, labels, device, progress_callback=None):
-        # Initialize with standard normal distribution (scale=1.0)
-        x_t = torch.randn((num_images, 3, img_size, img_size), device=device)
-        if labels.ndim == 1:
-            labels_one_hot = torch.zeros(num_images, num_classes, device=device)
-            labels_one_hot[torch.arange(num_images), labels] = 1
-            labels = labels_one_hot
         else:
-            labels = labels.float().to(device)
-        for t in reversed(range(timesteps)):
-            if cancel_event.is_set():
-                return None
-            t_tensor = torch.full((num_images,), t, device=device, dtype=torch.long)
-            # Predict noise with model
-            pred_noise = self.model(x_t, labels, t_tensor.float())
-            # Get current alpha values
-            alpha_t = self.alphas[t]
-            alpha_bar_t = self.alphas_cumprod[t]
-            alpha_bar_t_prev = self.alphas_cumprod[t-1] if t > 0 else torch.tensor(1.0)
-            # Calculate coefficients
-            beta_t = self.betas[t]
-            sqrt_recip_alpha_t = torch.sqrt(1.0 / alpha_t)
-            sqrt_one_minus_alpha_bar_t = torch.sqrt(1.0 - alpha_bar_t)
-            # Calculate predicted x0
-            pred_x0 = (x_t - sqrt_one_minus_alpha_bar_t * pred_noise) * sqrt_recip_alpha_t
-            # Calculate direction pointing to x_t
-            pred_dir = torch.sqrt(1.0 - alpha_bar_t_prev) * pred_noise
-            # Noise for next step
-            if t > 0:
-                noise = torch.randn_like(x_t) * 0.5
-            else:
-                noise = torch.zeros_like(x_t)
-            # Update x_t with stability checks
-            x_t = torch.sqrt(alpha_bar_t_prev) * pred_x0 + pred_dir + noise * torch.sqrt(beta_t)
-            # Numerical stability check
-            if torch.isnan(x_t).any() or torch.isinf(x_t).any():
-                x_t = torch.randn_like(x_t) * 0.1
-            if progress_callback:
-                progress_callback((timesteps - t) / timesteps)
-        # Gentle normalization
-        x_t = (x_t - x_t.min()) / (x_t.max() - x_t.min() + 1e-8)  # [0, 1]
-        x_t = torch.clamp(x_t, 0, 1)  # Final safety clamp
-        return x_t
 def load_model(model_path, device):
     unet = UNet(num_classes=NUM_CLASSES).to(device)
     diffusion_model = DiffusionModel(unet).to(device)
@@ -270,24 +294,30 @@ def generate_images(label_str, num_images, progress=gr.Progress()):
         with torch.no_grad():
             images = loaded_model.sample(
-                num_images=num_images,
-                timesteps=TIMESTEPS,
-                img_size=IMG_SIZE,
-                num_classes=NUM_CLASSES,
-                labels=labels,
-                device=device,
-                progress_callback=progress_callback
             )
         if images is None:
             return None, None
         processed_images = []
         for img in images:
-            img_np = img.cpu().numpy().transpose(1, 2, 0)
             img_np = (img_np * 255).clip(0, 255).astype(np.uint8)
             pil_img = Image.fromarray(img_np)
             processed_images.append(pil_img)
         if num_images == 1:
             return processed_images[0], processed_images

         # More conservative noise schedule
         scale = 1000 / timesteps
+        beta_start = 0.0001
+        beta_end = 0.02
+        self.betas = torch.linspace(beta_start, beta_end, timesteps, dtype=torch.float32)**1.5
         self.alphas = 1. - self.betas
         self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
         self.register_buffer('sqrt_alphas_cumprod', torch.sqrt(self.alphas_cumprod))
         self.register_buffer('sqrt_one_minus_alphas_cumprod', torch.sqrt(1. - self.alphas_cumprod))
+@torch.no_grad()
+def sample(self, num_images, timesteps, img_size, num_classes, labels, device, progress_callback=None):
+    # Initialize with reduced noise scale
+    x_t = torch.randn((num_images, 3, img_size, img_size), device=device) * 0.7
+    # Convert labels if needed
+    if labels.ndim == 1:
+        labels_one_hot = torch.zeros(num_images, num_classes, device=device)
+        labels_one_hot[torch.arange(num_images), labels] = 1
+        labels = labels_one_hot
+    for t in reversed(range(timesteps)):
+        if cancel_event.is_set():
+            return None
+        t_tensor = torch.full((num_images,), t, device=device, dtype=torch.long)
+        # Predict noise with model
+        pred_noise = self.model(x_t, labels, t_tensor.float())
+        # Get current alpha values
+        alpha_t = self.alphas[t]
+        alpha_bar_t = self.alphas_cumprod[t]
+        alpha_bar_t_prev = self.alphas_cumprod[t-1] if t > 0 else torch.tensor(1.0)
+        # Calculate predicted x0 with more stable equations
+        pred_x0 = (x_t - torch.sqrt(1 - alpha_bar_t) * pred_noise) / torch.sqrt(alpha_bar_t)
+        # Direction pointing to x_t with reduced noise impact
+        pred_dir = torch.sqrt(1 - alpha_bar_t_prev) * pred_noise
+        # Dynamic noise scaling based on timestep
+        if t > 0:
+            noise_scale = 0.3 * (t / timesteps)  # Reduce noise as we get closer to final image
+            noise = torch.randn_like(x_t) * noise_scale
         else:
+            noise = torch.zeros_like(x_t)
+        # Update x_t with more stable combination
+        x_t = torch.sqrt(alpha_bar_t_prev) * pred_x0 + pred_dir + noise
+        # Progress callback
+        if progress_callback:
+            progress_callback((timesteps - t) / timesteps)
+    # Enhanced normalization with contrast adjustment
+    x_t = torch.clamp(x_t, -1, 1)
+    x_t = (x_t + 1) / 2  # Scale to [0,1]
+    # Post-processing directly in the tensor
+    x_t = self._post_process(x_t)
+    return x_t
+def _post_process(self, image_tensor):
+    """Apply simple post-processing to reduce noise"""
+    # Contrast adjustment
+    mean_val = image_tensor.mean()
+    image_tensor = (image_tensor - mean_val) * 1.2 + mean_val
+    # Mild Gaussian blur (implemented as depthwise convolution)
+    if hasattr(self, '_blur_kernel'):
+        blur_kernel = self._blur_kernel.to(image_tensor.device)
+    else:
+        blur_kernel = torch.tensor([
+            [0.05, 0.1, 0.05],
+            [0.1, 0.4, 0.1],
+            [0.05, 0.1, 0.05]
+        ], dtype=torch.float32).view(1, 1, 3, 3).repeat(3, 1, 1, 1)
+        self._blur_kernel = blur_kernel
+    # Apply blur to each channel
+    padding = (1, 1, 1, 1)
+    image_tensor = torch.nn.functional.conv2d(
+        image_tensor.permute(0, 3, 1, 2),  # NHWC to NCHW
+        blur_kernel,
+        padding=1,
+        groups=3
+    ).permute(0, 2, 3, 1)  # Back to NHWC
+    return torch.clamp(image_tensor, 0, 1)
 def load_model(model_path, device):
     unet = UNet(num_classes=NUM_CLASSES).to(device)
     diffusion_model = DiffusionModel(unet).to(device)
         with torch.no_grad():
             images = loaded_model.sample(
+            num_images=num_images,
+            timesteps=int(TIMESTEPS * 1.5),  # More timesteps for cleaner images
+            img_size=IMG_SIZE,
+            num_classes=NUM_CLASSES,
+            labels=labels,
+            device=device,
+            progress_callback=progress_callback
             )
         if images is None:
             return None, None
         processed_images = []
         for img in images:
+            img_np = img.cpu().numpy()
+        # Convert to PIL with enhanced contrast
             img_np = (img_np * 255).clip(0, 255).astype(np.uint8)
             pil_img = Image.fromarray(img_np)
+        # Apply additional PIL-based enhancements
+            pil_img = pil_img.filter(ImageFilter.SMOOTH_MORE)
             processed_images.append(pil_img)
         if num_images == 1:
             return processed_images[0], processed_images