Spaces:

Vedansh-7
/

Diffusion-unet-xray

Running

App Files Files Community

Vedansh-7 commited on 24 days ago

Commit

bb3aba9

1 Parent(s): ff9cac6

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -52

app.py CHANGED Viewed

@@ -125,70 +125,64 @@ class DiffusionModel(nn.Module):
         self.model = model
         self.timesteps = timesteps
-        # Improved noise schedule
         beta_start = 0.0001
         beta_end = 0.02
         self.betas = torch.linspace(beta_start, beta_end, timesteps, dtype=torch.float32)
         self.alphas = 1. - self.betas
-        self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
-        # Pre-calculate values for sampling
-        self.register_buffer('sqrt_alphas_cumprod', torch.sqrt(self.alphas_cumprod))
-        self.register_buffer('sqrt_one_minus_alphas_cumprod', torch.sqrt(1. - self.alphas_cumprod))
         self.register_buffer('sqrt_recip_alphas', torch.sqrt(1. / self.alphas))
-        # Calculations for posterior q(x_{t-1} | x_t, x_0)
-        posterior_variance = self.betas * (1. - self.alphas_cumprod[:-1]) / (1. - self.alphas_cumprod[1:])
-        self.register_buffer('posterior_variance', posterior_variance)
-        # Blur kernel for post-processing
-        self.register_buffer('blur_kernel', torch.tensor([
-            [0.05, 0.1, 0.05],
-            [0.1, 0.4, 0.1],
-            [0.05, 0.1, 0.05]
-        ], dtype=torch.float32).view(1, 1, 3, 3).repeat(3, 1, 1, 1))
     @torch.no_grad()
     def sample(self, num_images, img_size, num_classes, labels, device, progress_callback=None):
-        """Generate samples from the model"""
-        shape = (num_images, 3, img_size, img_size)
-        x_t = torch.randn(shape, device=device) * 0.7  # Slightly reduced initial noise
         if labels.ndim == 1:
             labels = torch.zeros(num_images, num_classes, device=device).scatter_(1, labels.unsqueeze(1), 1)
         for t in reversed(range(self.timesteps)):
             if cancel_event.is_set():
                 return None
-            t_batch = torch.full((num_images,), t, device=device, dtype=torch.long)
-            pred_noise = self.model(x_t, labels, t_batch.float())
-            alpha_bar_t = self.alphas_cumprod[t]
-            alpha_bar_t_prev = self.alphas_cumprod[t-1] if t > 0 else torch.tensor(1.0)
-            # Calculate predicted x0
-            pred_x0 = (x_t - torch.sqrt(1 - alpha_bar_t) * pred_noise) / torch.sqrt(alpha_bar_t)
-            # Calculate direction pointing to x_t
-            pred_dir = torch.sqrt(1 - alpha_bar_t_prev) * pred_noise
-            # Dynamic noise scaling
             if t > 0:
-                noise_scale = 0.3 * (t / self.timesteps)
-                noise = torch.randn_like(x_t) * noise_scale
             else:
                 noise = torch.zeros_like(x_t)
-            # Update x_t
-            x_t = torch.sqrt(alpha_bar_t_prev) * pred_x0 + pred_dir + noise
             if progress_callback:
                 progress_callback((self.timesteps - t) / self.timesteps)
-        # Post-processing
-        x_t = self._post_process(x_t)
         return x_t
     def _post_process(self, images):
@@ -275,7 +269,8 @@ def generate_images(label_str, num_images, progress=gr.Progress()):
     if label_str not in label_map:
         raise gr.Error("Invalid condition selected")
-    labels = torch.zeros(num_images, NUM_CLASSES, device=device)
     labels[:, label_map[label_str]] = 1
     try:
@@ -286,29 +281,24 @@ def generate_images(label_str, num_images, progress=gr.Progress()):
         with torch.no_grad():
             images = loaded_model.sample(
-            num_images=num_images,
-            img_size=IMG_SIZE,
-            num_classes=NUM_CLASSES,
-            labels=labels,
-            device=device,
-            progress_callback=progress_callback
             )
         if images is None:
             return None, None
         processed_images = []
         for img in images:
-            img_np = img.cpu().numpy()
-        # Convert to PIL with enhanced contrast
             img_np = (img_np * 255).clip(0, 255).astype(np.uint8)
             pil_img = Image.fromarray(img_np)
-        # Apply additional PIL-based enhancements
-            pil_img = pil_img.filter(ImageFilter.SMOOTH_MORE)
             processed_images.append(pil_img)
         if num_images == 1:
             return processed_images[0], processed_images

         self.model = model
         self.timesteps = timesteps
+        # Noise schedule from working code
         beta_start = 0.0001
         beta_end = 0.02
         self.betas = torch.linspace(beta_start, beta_end, timesteps, dtype=torch.float32)
         self.alphas = 1. - self.betas
+        self.register_buffer('alpha_bars', torch.cumprod(self.alphas, dim=0))
+        self.register_buffer('sqrt_one_minus_alpha_bars', torch.sqrt(1. - self.alpha_bars))
         self.register_buffer('sqrt_recip_alphas', torch.sqrt(1. / self.alphas))
     @torch.no_grad()
     def sample(self, num_images, img_size, num_classes, labels, device, progress_callback=None):
+        """Improved sampling method based on working code"""
+        x_t = torch.randn((num_images, 3, img_size, img_size), device=device)
+        # Handle labels (class indices or one-hot)
         if labels.ndim == 1:
             labels = torch.zeros(num_images, num_classes, device=device).scatter_(1, labels.unsqueeze(1), 1)
+        else:
+            labels = labels.float().to(device)
         for t in reversed(range(self.timesteps)):
             if cancel_event.is_set():
                 return None
+            t_tensor = torch.full((num_images,), t, device=device, dtype=torch.float)
+            # Predict noise with model
+            pred_noise = self.model(x_t, labels, t_tensor)
+            # Calculate coefficients from working code
+            beta_t = self.betas[t].to(device)
+            alpha_t = self.alphas[t].to(device)
+            alpha_bar_t = self.alpha_bars[t].to(device)
+            # Improved reverse diffusion step
+            mean = (1 / torch.sqrt(alpha_t)) * (x_t - (beta_t / torch.sqrt(1 - alpha_bar_t)) * pred_noise)
+            variance = beta_t
             if t > 0:
+                noise = torch.randn_like(x_t)
             else:
                 noise = torch.zeros_like(x_t)
+            x_t = mean + torch.sqrt(variance) * noise
             if progress_callback:
                 progress_callback((self.timesteps - t) / self.timesteps)
+        # Improved normalization from working code
+        x_t = torch.clamp(x_t, -1., 1.)
+        # Denormalize using ImageNet stats (from working code)
+        mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(device)
+        std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(device)
+        x_t = std * x_t + mean
+        x_t = torch.clamp(x_t, 0., 1.)
         return x_t
     def _post_process(self, images):
     if label_str not in label_map:
         raise gr.Error("Invalid condition selected")
+    # Create one-hot encoded labels
+    labels = torch.zeros(num_images, NUM_CLASSES)
     labels[:, label_map[label_str]] = 1
     try:
         with torch.no_grad():
             images = loaded_model.sample(
+                num_images=num_images,
+                img_size=IMG_SIZE,
+                num_classes=NUM_CLASSES,
+                labels=labels,
+                device=device,
+                progress_callback=progress_callback
             )
         if images is None:
             return None, None
         processed_images = []
         for img in images:
+            # Convert to numpy and permute dimensions (C,H,W) -> (H,W,C)
+            img_np = img.cpu().permute(1, 2, 0).numpy()
             img_np = (img_np * 255).clip(0, 255).astype(np.uint8)
             pil_img = Image.fromarray(img_np)
             processed_images.append(pil_img)
         if num_images == 1:
             return processed_images[0], processed_images