Spaces:

Vedansh-7
/

Diffusion-unet-xray

Running

App Files Files Community

Vedansh-7 commited on 6 days ago

Commit

47535f8

verified ·

1 Parent(s): 542a20e

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -50

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import math
 import os
 from threading import Event
 import traceback
 # Constants
 IMG_SIZE = 128
@@ -153,75 +154,74 @@ class DiffusionModel(nn.Module):
     @torch.no_grad()
     def sample(self, num_images, img_size, num_classes, labels, device, progress_callback=None):
-        # Constants
-        NOISE_SCALE = 0.9
-        NOISE_MIN_FACTOR = 0.6
-        SHARPEN_STRENGTH = 1.4
-        EDGE_BOOST = 0.15
-        EPS = 1e-8
-        # Initialize with scaled noise
-        x_t = torch.randn(num_images, 3, img_size, img_size, device=device) * NOISE_SCALE
-        # Label processing
         if labels.ndim == 1:
-            labels = torch.zeros(num_images, num_classes, device=device).scatter_(1, labels.unsqueeze(1), 1)
         else:
             labels = labels.to(device)
-        # Reverse diffusion process
         for t in reversed(range(self.timesteps)):
             if cancel_event.is_set():
                 return None
-            t_tensor = torch.full((num_images,), t, device=device, dtype=torch.float32)
             predicted_noise = self.model(x_t, labels, t_tensor)
-            beta_t = self.betas[t].to(device).float()
-            alpha_t = self.alphas[t].to(device).float()
-            alpha_bar_t = self.alpha_bars[t].to(device).float()
-            # Stable mean calculation
-            mean = (1 / (torch.sqrt(alpha_t) + EPS)) * (
-                x_t - (beta_t / (torch.sqrt(1 - alpha_bar_t) + EPS)) * predicted_noise
-            )
-            # Dynamic noise scaling
             if t > 0:
-                noise_factor = NOISE_MIN_FACTOR + (1 - NOISE_MIN_FACTOR) * (t / self.timesteps)
-                noise = torch.randn_like(x_t) * noise_factor
             else:
                 noise = torch.zeros_like(x_t)
-            x_t = mean + torch.sqrt(beta_t) * noise
-            if progress_callback is not None:
                 progress_callback((self.timesteps - t) / self.timesteps)
-        # Post-processing
-        x_0 = self._post_process(x_t, device)
-        return x_0
-    def _post_process(self, x_t, device):
-        """Apply denormalization and image enhancement"""
-        # Denormalization
-        norm_mean = torch.tensor([0.485, 0.456, 0.406], device=device).view(1, 3, 1, 1)
-        norm_std = torch.tensor([0.229, 0.224, 0.225], device=device).view(1, 3, 1, 1)
-        x_0 = torch.clamp(norm_std * torch.clamp(x_t, -1., 1.) + norm_mean, 0., 1.)
-        # Edge-preserving smoothing
-        blurred = torch.nn.functional.avg_pool2d(x_0, kernel_size=5, stride=1, padding=2)
-        mask = torch.abs(x_0 - blurred) < 0.1
-        x_0 = torch.where(mask, 0.7*x_0 + 0.3*blurred, x_0)
-        # Adaptive sharpening
-        low_pass = torch.nn.functional.avg_pool2d(x_0, kernel_size=3, stride=1, padding=1)
-        x_0 = torch.clamp((1 + self.SHARPEN_STRENGTH) * x_0 - self.SHARPEN_STRENGTH * low_pass, 0, 1)
-        # Edge boost
-        edges = x_0 - torch.nn.functional.avg_pool2d(x_0, kernel_size=5, stride=1, padding=2)
-        return torch.clamp(x_0 + edges * self.EDGE_BOOST, 0, 1)
 def load_model(model_path, device):
     unet_model = UNet(num_classes=NUM_CLASSES).to(device)
     diffusion_model = DiffusionModel(unet_model, timesteps=TIMESTEPS).to(device)
@@ -315,7 +315,7 @@ def generate_images(label_str, num_images, progress=gr.Progress()):
         raise gr.Error(f"Generation failed: {str(e)}")
     finally:
         torch.cuda.empty_cache()
 # Load model
 MODEL_NAME = "model_weights.pth"
 model_path = MODEL_NAME

 import os
 from threading import Event
 import traceback
+import cv2
 # Constants
 IMG_SIZE = 128
     @torch.no_grad()
     def sample(self, num_images, img_size, num_classes, labels, device, progress_callback=None):
+        # Start with random noise
+        x_t = torch.randn(num_images, 3, img_size, img_size).to(device)
+        # Label handling (one-hot if needed)
         if labels.ndim == 1:
+            labels_one_hot = torch.zeros(num_images, num_classes).to(device)
+            labels_one_hot[torch.arange(num_images), labels] = 1
+            labels = labels_one_hot
         else:
             labels = labels.to(device)
+        # ---- REVERTED SAMPLING LOOP WITH NOISE REDUCTION ----
         for t in reversed(range(self.timesteps)):
             if cancel_event.is_set():
                 return None
+            t_tensor = torch.full((num_images,), t, device=device, dtype=torch.float)
             predicted_noise = self.model(x_t, labels, t_tensor)
+            # Calculate coefficients
+            beta_t = self.betas[t].to(device)
+            alpha_t = self.alphas[t].to(device)
+            alpha_bar_t = self.alpha_bars[t].to(device)
+            mean = (1 / torch.sqrt(alpha_t)) * (x_t - (beta_t / torch.sqrt(1 - alpha_bar_t)) * predicted_noise)
+            variance = beta_t
+            # Reduced noise injection with lower multiplier
             if t > 0:
+                noise = torch.randn_like(x_t) * 0.8  # Reduced noise by 20%
             else:
                 noise = torch.zeros_like(x_t)
+            x_t = mean + torch.sqrt(variance) * noise
+            if progress_callback:
                 progress_callback((self.timesteps - t) / self.timesteps)
+        # Clamp and denormalize
+        x_0 = torch.clamp(x_t, -1., 1.)
+        mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(device)
+        std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(device)
+        x_0 = std * x_0 + mean
+        x_0 = torch.clamp(x_0, 0., 1.)
+        # ---- ENHANCED SHARPENING ----
+        # First apply mild bilateral filtering to reduce noise while preserving edges
+        x_np = x_0.cpu().permute(0, 2, 3, 1).numpy()
+        filtered = []
+        for img in x_np:
+            img = (img * 255).astype(np.uint8)
+            filtered_img = cv2.bilateralFilter(img, d=5, sigmaColor=15, sigmaSpace=15)
+            filtered.append(filtered_img / 255.0)
+        x_0 = torch.tensor(np.array(filtered), device=device).permute(0, 3, 1, 2)
+        # Then apply stronger unsharp masking
+        kernel = torch.ones(3, 1, 5, 5, device=device) / 75
+        kernel = kernel.to(x_0.dtype)
+        blurred = torch.nn.functional.conv2d(
+            x_0,
+            kernel,
+            padding=2,
+            groups=3
+        )
+        x_0 = torch.clamp(1.5 * x_0 - 0.5 * blurred, 0., 1.)  # Increased sharpening factor
+        return x_0
 def load_model(model_path, device):
     unet_model = UNet(num_classes=NUM_CLASSES).to(device)
     diffusion_model = DiffusionModel(unet_model, timesteps=TIMESTEPS).to(device)
         raise gr.Error(f"Generation failed: {str(e)}")
     finally:
         torch.cuda.empty_cache()
 # Load model
 MODEL_NAME = "model_weights.pth"
 model_path = MODEL_NAME