Spaces:

Vedansh-7
/

Diffusion-unet-xray

Running

App Files Files Community

Vedansh-7 commited on 17 days ago

Commit

06a1915

verified ·

1 Parent(s): 48cbe75

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -122

app.py CHANGED Viewed

@@ -19,25 +19,20 @@ cancel_event = Event()
 # Device Configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# --- Model Definitions (from second file) ---
 class SinusoidalPositionEmbeddings(nn.Module):
     def __init__(self, dim):
         super().__init__()
         self.dim = dim
-        self.register_buffer('embeddings', self._precompute_embeddings(dim))
-    def _precompute_embeddings(self, dim):
         half_dim = dim // 2
         emb = math.log(10000) / (half_dim - 1)
-        emb = torch.exp(torch.arange(half_dim) * -emb)
-        return emb
     def forward(self, time):
-        device = time.device
-        embeddings = self.embeddings.to(device)
-        embeddings = time[:, None] * embeddings[None, :]
-        output = torch.cat([embeddings.sin(), embeddings.cos()], dim=-1)
-        return output
 class UNet(nn.Module):
     def __init__(self, in_channels=3, out_channels=3, num_classes=2, time_dim=256):
@@ -125,95 +120,97 @@ class UNet(nn.Module):
         return output
 class DiffusionModel(nn.Module):
-    def __init__(self, model, timesteps=500, time_dim=256):
         super().__init__()
         self.model = model
         self.timesteps = timesteps
         self.time_dim = time_dim
-        self.betas = self.linear_schedule(timesteps)
-        self.alphas = 1. - self.betas
-        self.register_buffer('alpha_bars', torch.cumprod(self.alphas, dim=0).float())
-    def linear_schedule(self, timesteps):
         scale = 1000 / timesteps
         beta_start = scale * 0.0001
         beta_end = scale * 0.02
-        return torch.linspace(beta_start, beta_end, timesteps, dtype=torch.float64)
-    def forward_diffusion(self, x_0, t, noise):
-        x_0 = x_0.float()
-        noise = noise.float()
-        alpha_bar_t = self.alpha_bars[t].view(-1, 1, 1, 1)
-        x_t = torch.sqrt(alpha_bar_t) * x_0 + torch.sqrt(1. - alpha_bar_t) * noise
-        return x_t
-    def forward(self, x_0, labels):
-        t = torch.randint(0, self.timesteps, (x_0.shape[0],), device=x_0.device).long()
-        noise = torch.randn_like(x_0)
-        x_t = self.forward_diffusion(x_0, t, noise)
-        predicted_noise = self.model(x_t, labels, t.float())
-        return predicted_noise, noise, t
-@torch.no_grad()
-def sample(model, num_images, timesteps, img_size, num_classes, labels, device, progress_callback=None):
-    x_t = torch.randn(num_images, 3, img_size, img_size).to(device)
-    if labels.ndim == 1:
-        labels_one_hot = torch.zeros(num_images, num_classes).to(device)
-        labels_one_hot[torch.arange(num_images), labels] = 1
-        labels = labels_one_hot
-    else:
-        labels = labels.to(device)
-    for t in reversed(range(timesteps)):
-        if cancel_event.is_set():
-            return None
-        t_tensor = torch.full((num_images,), t, device=device, dtype=torch.float)
-        predicted_noise = model.model(x_t, labels, t_tensor)
-        beta_t = model.betas[t].to(device)
-        alpha_t = model.alphas[t].to(device)
-        alpha_bar_t = model.alpha_bars[t].to(device)
-        mean = (1 / torch.sqrt(alpha_t)) * (x_t - (beta_t / torch.sqrt(1 - alpha_bar_t)) * predicted_noise)
-        variance = beta_t
-        if t > 0:
-            noise = torch.randn_like(x_t)
-        else:
-            noise = torch.zeros_like(x_t)
-        x_t = mean + torch.sqrt(variance) * noise
-        if progress_callback:
-            progress_callback((timesteps - t) / timesteps)
-    x_0 = torch.clamp(x_t, -1., 1.)
-    mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(device)
-    std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(device)
-    x_0 = std * x_0 + mean
-    x_0 = torch.clamp(x_0, 0., 1.)
-    return x_0
 def load_model(model_path, device):
-    unet_model = UNet(num_classes=NUM_CLASSES).to(device)
-    diffusion_model = DiffusionModel(unet_model, timesteps=TIMESTEPS).to(device)
-    try:
-        checkpoint = torch.load(model_path, map_location=device)
-        if 'model_state_dict' in checkpoint:
-            diffusion_model.model.load_state_dict(checkpoint['model_state_dict'])
-        else:
-            diffusion_model.model.load_state_dict(checkpoint)
-        print(f"Successfully loaded model from {model_path}")
-    except Exception as e:
-        print(f"Error loading model: {e}")
-        print("Using randomly initialized weights")
     diffusion_model.eval()
     return diffusion_model
@@ -222,32 +219,6 @@ def cancel_generation():
     cancel_event.set()
     return "Generation cancelled"
-def generate_single_image(label_str):
-    label_map = {'Pneumonia': 0, 'Pneumothorax': 1}
-    try:
-        label_index = label_map[label_str]
-    except KeyError:
-        raise gr.Error(f"Invalid label '{label_str}'. Please select either 'Pneumonia' or 'Pneumothorax'.")
-    labels = torch.zeros(1, NUM_CLASSES, device=device)
-    labels[0, label_index] = 1
-    with torch.no_grad():
-        generated_image = sample(
-            model=loaded_model,
-            num_images=1,
-            timesteps=TIMESTEPS,
-            img_size=IMG_SIZE,
-            num_classes=NUM_CLASSES,
-            labels=labels,
-            device=device
-        )
-    img_np = generated_image.squeeze(0).cpu().permute(1, 2, 0).numpy()
-    img_np = np.clip(img_np, 0, 1)
-    img_pil = Image.fromarray((img_np * 255).astype(np.uint8))
-    return img_pil
 def generate_images(label_str, num_images, progress=gr.Progress()):
     global loaded_model
     cancel_event.clear()
@@ -260,7 +231,7 @@ def generate_images(label_str, num_images, progress=gr.Progress()):
     if label_str not in label_map:
         raise gr.Error("Invalid condition selected")
-    labels = torch.zeros(num_images, NUM_CLASSES, device=device)
     labels[:, label_map[label_str]] = 1
     try:
@@ -270,10 +241,11 @@ def generate_images(label_str, num_images, progress=gr.Progress()):
                 raise gr.Error("Generation was cancelled by user")
         with torch.no_grad():
-            images = sample(
-                model=loaded_model,
                 num_images=num_images,
-                timesteps=TIMESTEPS,
                 img_size=IMG_SIZE,
                 num_classes=NUM_CLASSES,
                 labels=labels,
@@ -284,15 +256,21 @@ def generate_images(label_str, num_images, progress=gr.Progress()):
         if images is None:
             return None, None
-        # Process all generated images
         processed_images = []
         for img in images:
-            img_np = img.cpu().permute(1, 2, 0).numpy()
-            img_np = np.clip(img_np, 0, 1)
-            pil_img = Image.fromarray((img_np * 255).astype(np.uint8))
             processed_images.append(pil_img)
-        # Return both single image and gallery based on count
         if num_images == 1:
             return processed_images[0], processed_images
         else:
@@ -317,7 +295,7 @@ print("Loading model...")
 loaded_model = load_model(model_path, device)
 print("Model loaded successfully!")
-# Unified Gradio UI
 with gr.Blocks(theme=gr.themes.Soft(
     primary_hue="violet",
     neutral_hue="slate",
@@ -356,7 +334,6 @@ with gr.Blocks(theme=gr.themes.Soft(
             """)
         with gr.Column(scale=2):
-            # Unified output display that adapts to single/batch
             with gr.Tabs():
                 with gr.TabItem("Output", id="output_tab"):
                     single_image = gr.Image(

 # Device Configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# --- Model Definitions ---
 class SinusoidalPositionEmbeddings(nn.Module):
     def __init__(self, dim):
         super().__init__()
         self.dim = dim
         half_dim = dim // 2
         emb = math.log(10000) / (half_dim - 1)
+        emb = torch.exp(torch.arange(half_dim, dtype=torch.float32) * -emb)
+        self.register_buffer('embeddings', emb)
     def forward(self, time):
+        embeddings = self.embeddings.to(time.device)
+        embeddings = time.float()[:, None] * embeddings[None, :]
+        return torch.cat([embeddings.sin(), embeddings.cos()], dim=-1)
 class UNet(nn.Module):
     def __init__(self, in_channels=3, out_channels=3, num_classes=2, time_dim=256):
         return output
 class DiffusionModel(nn.Module):
+    def __init__(self, model, timesteps=TIMESTEPS, time_dim=256):
         super().__init__()
         self.model = model
         self.timesteps = timesteps
         self.time_dim = time_dim
+        # Fix 1: Ensure consistent float32 types
         scale = 1000 / timesteps
         beta_start = scale * 0.0001
         beta_end = scale * 0.02
+        self.betas = torch.linspace(beta_start, beta_end, timesteps, dtype=torch.float32)
+        self.alphas = 1. - self.betas
+        self.register_buffer('alpha_bars', torch.cumprod(self.alphas, dim=0))
+    @torch.no_grad()
+    def sample(self, num_images, img_size, num_classes, labels, device, progress_callback=None):
+        # Initialize with noise
+        x_t = torch.randn((num_images, 3, img_size, img_size), device=device, dtype=torch.float32)
+        # Convert labels to proper format
+        if labels.ndim == 1:
+            labels_one_hot = torch.zeros(num_images, num_classes, device=device)
+            labels_one_hot[torch.arange(num_images), labels] = 1
+            labels = labels_one_hot
+        else:
+            labels = labels.to(device)
+        for i in reversed(range(0, self.timesteps)):
+            if cancel_event.is_set():
+                return None
+            t = torch.full((num_images,), i, device=device, dtype=torch.long)
+            # Model prediction with type stability
+            pred_noise = self.model(x_t, labels, t.float())
+            # Calculate diffusion parameters
+            beta_t = self.betas[t].view(-1, 1, 1, 1).to(device)
+            alpha_t = self.alphas[t].view(-1, 1, 1, 1).to(device)
+            alpha_bar_t = self.alpha_bars[t].view(-1, 1, 1, 1).to(device)
+            # Improved denoising step (Fix 2)
+            if i > 0:
+                noise = torch.randn_like(x_t)
+            else:
+                noise = torch.zeros_like(x_t)
+            x_t = (x_t - (1 - alpha_t)/torch.sqrt(1 - alpha_bar_t) * pred_noise) / torch.sqrt(alpha_t)
+            x_t += noise * torch.sqrt(beta_t)
+            if progress_callback:
+                progress_callback((self.timesteps - i) / self.timesteps)
+        # Fix 3: Simplified scaling
+        x_t = torch.clamp(x_t, -1., 1.)
+        return (x_t + 1) / 2  # Scale to [0,1]
 def load_model(model_path, device):
+    unet = UNet(num_classes=NUM_CLASSES).to(device)
+    diffusion_model = DiffusionModel(unet).to(device)
+    if os.path.exists(model_path):
+        try:
+            checkpoint = torch.load(model_path, map_location=device)
+            # Handle both full model and state_dict loading
+            if 'model_state_dict' in checkpoint:
+                state_dict = checkpoint['model_state_dict']
+            else:
+                state_dict = checkpoint
+            # Handle both prefixed and non-prefixed state dicts
+            if all(k.startswith('model.') for k in state_dict.keys()):
+                state_dict = {k[6:]: v for k, v in state_dict.items()}
+            unet.load_state_dict(state_dict, strict=False)
+            print("Model loaded successfully")
+            # Verify model loading
+            test_input = torch.randn(1, 3, IMG_SIZE, IMG_SIZE).to(device)
+            test_labels = torch.zeros(1, NUM_CLASSES).to(device)
+            test_labels[0, 0] = 1
+            test_time = torch.tensor([1]).to(device)
+            output = unet(test_input, test_labels, test_time)
+            print(f"Model test output shape: {output.shape}")
+        except Exception as e:
+            traceback.print_exc()
+            raise ValueError(f"Error loading model: {str(e)}")
+    else:
+        raise FileNotFoundError(f"Model weights not found at {model_path}")
     diffusion_model.eval()
     return diffusion_model
     cancel_event.set()
     return "Generation cancelled"
 def generate_images(label_str, num_images, progress=gr.Progress()):
     global loaded_model
     cancel_event.clear()
     if label_str not in label_map:
         raise gr.Error("Invalid condition selected")
+    labels = torch.zeros(num_images, NUM_CLASSES, device=device, dtype=torch.float32)
     labels[:, label_map[label_str]] = 1
     try:
                 raise gr.Error("Generation was cancelled by user")
         with torch.no_grad():
+            print(f"Generating {num_images} images for {label_str}")
+            print(f"Labels shape: {labels.shape}, device: {labels.device}")
+            images = loaded_model.sample(
                 num_images=num_images,
                 img_size=IMG_SIZE,
                 num_classes=NUM_CLASSES,
                 labels=labels,
         if images is None:
             return None, None
+        # Diagnostic print
+        print(f"Generated images range: {images.min().item():.3f}, {images.max().item():.3f}")
         processed_images = []
         for img in images:
+            # Fix 3: Improved image conversion
+            img_np = (img.cpu().numpy().transpose(1, 2, 0) * 255).clip(0, 255).astype(np.uint8)
+            print(f"Image range after conversion: {img_np.min()}, {img_np.max()}")
+            if img_np.shape[2] == 1:  # Handle grayscale if needed
+                img_np = img_np.squeeze(-1)
+            pil_img = Image.fromarray(img_np)
             processed_images.append(pil_img)
+        # Return appropriate outputs based on count
         if num_images == 1:
             return processed_images[0], processed_images
         else:
 loaded_model = load_model(model_path, device)
 print("Model loaded successfully!")
+# Gradio UI
 with gr.Blocks(theme=gr.themes.Soft(
     primary_hue="violet",
     neutral_hue="slate",
             """)
         with gr.Column(scale=2):
             with gr.Tabs():
                 with gr.TabItem("Output", id="output_tab"):
                     single_image = gr.Image(