Spaces:

Vedansh-7
/

Diffusion-unet-xray

Running

App Files Files Community

Vedansh-7 commited on 17 days ago

Commit

49fdbe4

verified ·

1 Parent(s): 3886050

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -44

app.py CHANGED Viewed

@@ -125,7 +125,6 @@ class DiffusionModel(nn.Module):
         self.model = model
         self.timesteps = timesteps
-        # Use the exact same noise schedule as Colab
         beta_start = 0.0001
         beta_end = 0.02
         self.betas = torch.linspace(beta_start, beta_end, timesteps, dtype=torch.float32)
@@ -134,29 +133,28 @@ class DiffusionModel(nn.Module):
     @torch.no_grad()
     def sample(self, num_images, img_size, num_classes, labels, device, progress_callback=None):
-        """Identical implementation to Colab version"""
-        # Start with random noise (same scale)
-        x_t = torch.randn((num_images, 3, img_size, img_size), device=device)
-        # Identical label handling
         if labels.ndim == 1:
-            labels = torch.zeros(num_images, num_classes, device=device).scatter_(1, labels.unsqueeze(1), 1)
-        labels = labels.to(device)
-        # Same sampling loop
         for t in reversed(range(self.timesteps)):
             if cancel_event.is_set():
                 return None
-            t_tensor = torch.full((num_images,), t, device=device, dtype=torch.float)
             predicted_noise = self.model(x_t, labels, t_tensor)
-            # Identical coefficients calculation
             beta_t = self.betas[t].to(device)
             alpha_t = self.alphas[t].to(device)
             alpha_bar_t = self.alpha_bars[t].to(device)
-            # Same mean/variance calculation
             mean = (1 / torch.sqrt(alpha_t)) * (x_t - (beta_t / torch.sqrt(1 - alpha_bar_t)) * predicted_noise)
             variance = beta_t
@@ -170,35 +168,15 @@ class DiffusionModel(nn.Module):
             if progress_callback:
                 progress_callback((self.timesteps - t) / self.timesteps)
-        # Identical denormalization
-        x_t = torch.clamp(x_t, -1., 1.)
         mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(device)
         std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(device)
-        x_t = std * x_t + mean
-        x_t = torch.clamp(x_t, 0., 1.)
-        return x_t
-    def _post_process(self, images):
-        """Apply post-processing to reduce noise and enhance contrast"""
-        # Normalize to [0,1]
-        images = torch.clamp(images, -1, 1)
-        images = (images + 1) / 2
-        # Apply mild blur (convert NHWC to NCHW for conv2d)
-        if images.dim() == 4 and images.shape[-1] != 3:  # NCHW format
-            images = images.permute(0, 2, 3, 1)
-        x = images.permute(0, 3, 1, 2)  # NHWC to NCHW
-        x = torch.nn.functional.conv2d(x, self.blur_kernel, padding=1, groups=3)
-        images = x.permute(0, 2, 3, 1)  # NCHW to NHWC
-        # Contrast adjustment
-        mean_val = images.mean(dim=(1,2,3), keepdim=True)
-        images = (images - mean_val) * 1.2 + mean_val
-        return torch.clamp(images, 0, 1)
 def load_model(model_path, device):
     unet = UNet(num_classes=NUM_CLASSES).to(device)
     diffusion_model = DiffusionModel(unet).to(device)
@@ -207,20 +185,17 @@ def load_model(model_path, device):
         try:
             checkpoint = torch.load(model_path, map_location=device)
-            # Handle both full model and state_dict loading
             if 'model_state_dict' in checkpoint:
                 state_dict = checkpoint['model_state_dict']
             else:
                 state_dict = checkpoint
-            # Handle both prefixed and non-prefixed state dicts
             if all(k.startswith('model.') for k in state_dict.keys()):
                 state_dict = {k[6:]: v for k, v in state_dict.items()}
             unet.load_state_dict(state_dict, strict=False)
             print("Model loaded successfully")
-            # Verify model loading
             test_input = torch.randn(1, 3, IMG_SIZE, IMG_SIZE).to(device)
             test_labels = torch.zeros(1, NUM_CLASSES).to(device)
             test_time = torch.tensor([1]).to(device)
@@ -244,7 +219,6 @@ try:
     print("Model loaded successfully!")
 except Exception as e:
     print(f"Failed to load model: {e}")
-    # Create a dummy model if loading fails
     print("Creating dummy model for demonstration")
     loaded_model = DiffusionModel(UNet(num_classes=NUM_CLASSES)).to(device)
@@ -263,7 +237,6 @@ def generate_images(label_str, num_images, progress=gr.Progress()):
     if label_str not in label_map:
         raise gr.Error("Invalid condition selected")
-    # Create one-hot encoded labels
     labels = torch.zeros(num_images, NUM_CLASSES)
     labels[:, label_map[label_str]] = 1
@@ -288,7 +261,6 @@ def generate_images(label_str, num_images, progress=gr.Progress()):
         processed_images = []
         for img in images:
-            # Convert to numpy and permute dimensions (C,H,W) -> (H,W,C)
             img_np = img.cpu().permute(1, 2, 0).numpy()
             img_np = (img_np * 255).clip(0, 255).astype(np.uint8)
             pil_img = Image.fromarray(img_np)

         self.model = model
         self.timesteps = timesteps
         beta_start = 0.0001
         beta_end = 0.02
         self.betas = torch.linspace(beta_start, beta_end, timesteps, dtype=torch.float32)
     @torch.no_grad()
     def sample(self, num_images, img_size, num_classes, labels, device, progress_callback=None):
+        """Your exact sampling function from Colab"""
+        x_t = torch.randn(num_images, 3, img_size, img_size).to(device)
         if labels.ndim == 1:
+            labels_one_hot = torch.zeros(num_images, num_classes).to(device)
+            labels_one_hot[torch.arange(num_images), labels] = 1
+            labels = labels_one_hot
+        else:
+            labels = labels.to(device)
         for t in reversed(range(self.timesteps)):
             if cancel_event.is_set():
                 return None
+            t_tensor = torch.full((num_images,), t, device=device, dtype=torch.float)  # Pass time as float
             predicted_noise = self.model(x_t, labels, t_tensor)
             beta_t = self.betas[t].to(device)
             alpha_t = self.alphas[t].to(device)
             alpha_bar_t = self.alpha_bars[t].to(device)
             mean = (1 / torch.sqrt(alpha_t)) * (x_t - (beta_t / torch.sqrt(1 - alpha_bar_t)) * predicted_noise)
             variance = beta_t
             if progress_callback:
                 progress_callback((self.timesteps - t) / self.timesteps)
+        x_0 = torch.clamp(x_t, -1., 1.)
         mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(device)
         std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(device)
+        x_0 = std * x_0 + mean
+        x_0 = torch.clamp(x_0, 0., 1.)
+        return x_0
 def load_model(model_path, device):
     unet = UNet(num_classes=NUM_CLASSES).to(device)
     diffusion_model = DiffusionModel(unet).to(device)
         try:
             checkpoint = torch.load(model_path, map_location=device)
             if 'model_state_dict' in checkpoint:
                 state_dict = checkpoint['model_state_dict']
             else:
                 state_dict = checkpoint
             if all(k.startswith('model.') for k in state_dict.keys()):
                 state_dict = {k[6:]: v for k, v in state_dict.items()}
             unet.load_state_dict(state_dict, strict=False)
             print("Model loaded successfully")
             test_input = torch.randn(1, 3, IMG_SIZE, IMG_SIZE).to(device)
             test_labels = torch.zeros(1, NUM_CLASSES).to(device)
             test_time = torch.tensor([1]).to(device)
     print("Model loaded successfully!")
 except Exception as e:
     print(f"Failed to load model: {e}")
     print("Creating dummy model for demonstration")
     loaded_model = DiffusionModel(UNet(num_classes=NUM_CLASSES)).to(device)
     if label_str not in label_map:
         raise gr.Error("Invalid condition selected")
     labels = torch.zeros(num_images, NUM_CLASSES)
     labels[:, label_map[label_str]] = 1
         processed_images = []
         for img in images:
             img_np = img.cpu().permute(1, 2, 0).numpy()
             img_np = (img_np * 255).clip(0, 255).astype(np.uint8)
             pil_img = Image.fromarray(img_np)