Spaces:

Vedansh-7
/

Diffusion-unet-xray

Sleeping

App Files Files Community

Vedansh-7 commited on 19 days ago

Commit

190a6d4

verified ·

1 Parent(s): 49fdbe4

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -208

app.py CHANGED Viewed

@@ -5,41 +5,37 @@ from PIL import Image
 import numpy as np
 import math
 import os
-from threading import Event
-import traceback
 # Constants
 IMG_SIZE = 128
-TIMESTEPS = 500
 NUM_CLASSES = 2
-# Global Cancellation Flag
-cancel_event = Event()
-# Device Configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# --- Model Definitions ---
 class SinusoidalPositionEmbeddings(nn.Module):
     def __init__(self, dim):
         super().__init__()
         self.dim = dim
         half_dim = dim // 2
         emb = math.log(10000) / (half_dim - 1)
-        emb = torch.exp(torch.arange(half_dim, dtype=torch.float32) * -emb)
         self.register_buffer('embeddings', emb)
     def forward(self, time):
-        embeddings = self.embeddings.to(time.device)
-        embeddings = time.float()[:, None] * embeddings[None, :]
         return torch.cat([embeddings.sin(), embeddings.cos()], dim=-1)
 class UNet(nn.Module):
     def __init__(self, in_channels=3, out_channels=3, num_classes=2, time_dim=256):
         super().__init__()
         self.num_classes = num_classes
         self.label_embedding = nn.Embedding(num_classes, time_dim)
         self.time_mlp = nn.Sequential(
             SinusoidalPositionEmbeddings(time_dim),
             nn.Linear(time_dim, time_dim),
@@ -47,13 +43,16 @@ class UNet(nn.Module):
             nn.Linear(time_dim, time_dim)
         )
         self.inc = self.double_conv(in_channels, 64)
         self.down1 = self.down(64 + time_dim * 2, 128)
         self.down2 = self.down(128 + time_dim * 2, 256)
         self.down3 = self.down(256 + time_dim * 2, 512)
         self.bottleneck = self.double_conv(512 + time_dim * 2, 1024)
         self.up1 = nn.ConvTranspose2d(1024, 256, kernel_size=2, stride=2)
         self.upconv1 = self.double_conv(256 + 256 + time_dim * 2, 256)
@@ -80,6 +79,7 @@ class UNet(nn.Module):
         )
     def forward(self, x, labels, time):
         label_indices = torch.argmax(labels, dim=1)
         label_emb = self.label_embedding(label_indices)
         t_emb = self.time_mlp(time)
@@ -116,24 +116,41 @@ class UNet(nn.Module):
         x = torch.cat([x, combined_emb.repeat(1, 1, x.shape[-2], x.shape[-1])], dim=1)
         x = self.upconv3(x)
-        output = self.outc(x)
-        return output
 class DiffusionModel(nn.Module):
-    def __init__(self, model, timesteps=TIMESTEPS):
         super().__init__()
         self.model = model
         self.timesteps = timesteps
-        beta_start = 0.0001
-        beta_end = 0.02
-        self.betas = torch.linspace(beta_start, beta_end, timesteps, dtype=torch.float32)
         self.alphas = 1. - self.betas
-        self.register_buffer('alpha_bars', torch.cumprod(self.alphas, dim=0))
     @torch.no_grad()
-    def sample(self, num_images, img_size, num_classes, labels, device, progress_callback=None):
-        """Your exact sampling function from Colab"""
         x_t = torch.randn(num_images, 3, img_size, img_size).to(device)
         if labels.ndim == 1:
@@ -144,11 +161,7 @@ class DiffusionModel(nn.Module):
             labels = labels.to(device)
         for t in reversed(range(self.timesteps)):
-            if cancel_event.is_set():
-                return None
-            t_tensor = torch.full((num_images,), t, device=device, dtype=torch.float)  # Pass time as float
             predicted_noise = self.model(x_t, labels, t_tensor)
             beta_t = self.betas[t].to(device)
@@ -164,12 +177,10 @@ class DiffusionModel(nn.Module):
                 noise = torch.zeros_like(x_t)
             x_t = mean + torch.sqrt(variance) * noise
-            if progress_callback:
-                progress_callback((self.timesteps - t) / self.timesteps)
         x_0 = torch.clamp(x_t, -1., 1.)
         mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(device)
         std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(device)
         x_0 = std * x_0 + mean
@@ -177,197 +188,74 @@ class DiffusionModel(nn.Module):
         return x_0
 def load_model(model_path, device):
-    unet = UNet(num_classes=NUM_CLASSES).to(device)
-    diffusion_model = DiffusionModel(unet).to(device)
     if os.path.exists(model_path):
-        try:
-            checkpoint = torch.load(model_path, map_location=device)
-            if 'model_state_dict' in checkpoint:
-                state_dict = checkpoint['model_state_dict']
-            else:
-                state_dict = checkpoint
-            if all(k.startswith('model.') for k in state_dict.keys()):
-                state_dict = {k[6:]: v for k, v in state_dict.items()}
-            unet.load_state_dict(state_dict, strict=False)
-            print("Model loaded successfully")
-            test_input = torch.randn(1, 3, IMG_SIZE, IMG_SIZE).to(device)
-            test_labels = torch.zeros(1, NUM_CLASSES).to(device)
-            test_time = torch.tensor([1]).to(device)
-            output = unet(test_input, test_labels, test_time)
-            print(f"Model test output shape: {output.shape}")
-        except Exception as e:
-            traceback.print_exc()
-            raise ValueError(f"Error loading model: {str(e)}")
     else:
-        raise FileNotFoundError(f"Model weights not found at {model_path}")
     diffusion_model.eval()
     return diffusion_model
-MODEL_NAME = "model_weights.pth"
-model_path = MODEL_NAME
-print("Loading model...")
-try:
-    loaded_model = load_model(model_path, device)
-    print("Model loaded successfully!")
-except Exception as e:
-    print(f"Failed to load model: {e}")
-    print("Creating dummy model for demonstration")
-    loaded_model = DiffusionModel(UNet(num_classes=NUM_CLASSES)).to(device)
-def cancel_generation():
-    cancel_event.set()
-    return "Generation cancelled"
-def generate_images(label_str, num_images, progress=gr.Progress()):
-    global loaded_model
-    cancel_event.clear()
-    if num_images < 1 or num_images > 10:
-        raise gr.Error("Number of images must be between 1 and 10")
     label_map = {'Pneumonia': 0, 'Pneumothorax': 1}
     if label_str not in label_map:
-        raise gr.Error("Invalid condition selected")
-    labels = torch.zeros(num_images, NUM_CLASSES)
-    labels[:, label_map[label_str]] = 1
-    try:
-        def progress_callback(progress_val):
-            progress(progress_val, desc="Generating...")
-            if cancel_event.is_set():
-                raise gr.Error("Generation was cancelled by user")
-        with torch.no_grad():
-            images = loaded_model.sample(
-                num_images=num_images,
-                img_size=IMG_SIZE,
-                num_classes=NUM_CLASSES,
-                labels=labels,
-                device=device,
-                progress_callback=progress_callback
-            )
-        if images is None:
-            return None, None
-        processed_images = []
-        for img in images:
-            img_np = img.cpu().permute(1, 2, 0).numpy()
-            img_np = (img_np * 255).clip(0, 255).astype(np.uint8)
-            pil_img = Image.fromarray(img_np)
-            processed_images.append(pil_img)
-        if num_images == 1:
-            return processed_images[0], processed_images
-        else:
-            return None, processed_images
-    except Exception as e:
-        traceback.print_exc()
-        raise gr.Error(f"Generation failed: {str(e)}")
-    finally:
-        torch.cuda.empty_cache()
-# Gradio UI
-with gr.Blocks(theme=gr.themes.Soft(
-    primary_hue="violet",
-    neutral_hue="slate",
-    font=[gr.themes.GoogleFont("Poppins")],
-    text_size="md"
-)) as demo:
-    gr.Markdown("""
-    <center>
-    <h1>Synthetic X-ray Generator</h1>
-    <p><em>Generate synthetic chest X-rays conditioned on pathology</em></p>
-    </center>
-    """)
-    with gr.Row():
-        with gr.Column(scale=1):
-            condition = gr.Dropdown(
-                ["Pneumonia", "Pneumothorax"],
-                label="Select Condition",
-                value="Pneumonia",
-                interactive=True
-            )
-            num_images = gr.Slider(
-                1, 10, value=1, step=1,
-                label="Number of Images",
-                interactive=True
-            )
-            with gr.Row():
-                submit_btn = gr.Button("Generate", variant="primary")
-                cancel_btn = gr.Button("Cancel", variant="stop")
-            gr.Markdown("""
-            <div style="text-align: center; margin-top: 10px;">
-                <small>Note: Generation may take several seconds per image</small>
-            </div>
-            """)
-        with gr.Column(scale=2):
-            with gr.Tabs():
-                with gr.TabItem("Output", id="output_tab"):
-                    single_image = gr.Image(
-                        label="Generated X-ray",
-                        height=400,
-                        visible=True
-                    )
-                    gallery = gr.Gallery(
-                        label="Generated X-rays",
-                        columns=3,
-                        height="auto",
-                        object_fit="contain",
-                        visible=False
-                    )
-    def update_ui_based_on_count(num_images):
-        if num_images == 1:
-            return {
-                single_image: gr.update(visible=True),
-                gallery: gr.update(visible=False)
-            }
-        else:
-            return {
-                single_image: gr.update(visible=False),
-                gallery: gr.update(visible=True)
-            }
-    num_images.change(
-        fn=update_ui_based_on_count,
-        inputs=num_images,
-        outputs=[single_image, gallery]
-    )
-    submit_btn.click(
-        fn=generate_images,
-        inputs=[condition, num_images],
-        outputs=[single_image, gallery]
-    )
-    cancel_btn.click(
-        fn=cancel_generation,
-        outputs=None
     )
-    demo.css = """
-    .gradio-container {
-        background: linear-gradient(135deg, #f5f7fa 0%, #e4e8f0 100%);
-    }
-    .gallery-container {
-        background-color: white !important;
-    }
-    """
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import numpy as np
 import math
 import os
 # Constants
 IMG_SIZE = 128
+TIMESTEPS = 300
 NUM_CLASSES = 2
+# Device configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# 1. Sinusoidal Embeddings
 class SinusoidalPositionEmbeddings(nn.Module):
     def __init__(self, dim):
         super().__init__()
         self.dim = dim
         half_dim = dim // 2
         emb = math.log(10000) / (half_dim - 1)
+        emb = torch.exp(torch.arange(half_dim) * -emb)
         self.register_buffer('embeddings', emb)
     def forward(self, time):
+        device = time.device
+        embeddings = self.embeddings.to(device)
+        embeddings = time[:, None] * embeddings[None, :]
         return torch.cat([embeddings.sin(), embeddings.cos()], dim=-1)
+# 2. UNet Model (matches your original architecture exactly)
 class UNet(nn.Module):
     def __init__(self, in_channels=3, out_channels=3, num_classes=2, time_dim=256):
         super().__init__()
         self.num_classes = num_classes
         self.label_embedding = nn.Embedding(num_classes, time_dim)
         self.time_mlp = nn.Sequential(
             SinusoidalPositionEmbeddings(time_dim),
             nn.Linear(time_dim, time_dim),
             nn.Linear(time_dim, time_dim)
         )
+        # Encoder (matches your original channel sizes)
         self.inc = self.double_conv(in_channels, 64)
         self.down1 = self.down(64 + time_dim * 2, 128)
         self.down2 = self.down(128 + time_dim * 2, 256)
         self.down3 = self.down(256 + time_dim * 2, 512)
+        # Bottleneck (matches your original)
         self.bottleneck = self.double_conv(512 + time_dim * 2, 1024)
+        # Decoder (matches your original upsampling structure)
         self.up1 = nn.ConvTranspose2d(1024, 256, kernel_size=2, stride=2)
         self.upconv1 = self.double_conv(256 + 256 + time_dim * 2, 256)
         )
     def forward(self, x, labels, time):
+        # Matches your original forward pass exactly
         label_indices = torch.argmax(labels, dim=1)
         label_emb = self.label_embedding(label_indices)
         t_emb = self.time_mlp(time)
         x = torch.cat([x, combined_emb.repeat(1, 1, x.shape[-2], x.shape[-1])], dim=1)
         x = self.upconv3(x)
+        return self.outc(x)
+# 3. Diffusion Model (matches your original implementation)
 class DiffusionModel(nn.Module):
+    def __init__(self, model, timesteps=500, time_dim=256):
         super().__init__()
         self.model = model
         self.timesteps = timesteps
+        self.time_dim = time_dim
+        # Linear beta schedule (matches your original)
+        scale = 1000 / timesteps
+        beta_start = scale * 0.0001
+        beta_end = scale * 0.02
+        self.betas = torch.linspace(beta_start, beta_end, timesteps, dtype=torch.float64)
         self.alphas = 1. - self.betas
+        self.register_buffer('alpha_bars', torch.cumprod(self.alphas, dim=0).float())
+    def forward_diffusion(self, x_0, t, noise):
+        x_0 = x_0.float()
+        noise = noise.float()
+        alpha_bar_t = self.alpha_bars[t].view(-1, 1, 1, 1)
+        x_t = torch.sqrt(alpha_bar_t) * x_0 + torch.sqrt(1. - alpha_bar_t) * noise
+        return x_t
+    def forward(self, x_0, labels):
+        t = torch.randint(0, self.timesteps, (x_0.shape[0],), device=x_0.device).long()
+        noise = torch.randn_like(x_0)
+        x_t = self.forward_diffusion(x_0, t, noise)
+        predicted_noise = self.model(x_t, labels, t.float())
+        return predicted_noise, noise, t
     @torch.no_grad()
+    def sample(self, num_images, img_size, num_classes, labels, device):
+        # Matches your original sampling exactly
         x_t = torch.randn(num_images, 3, img_size, img_size).to(device)
         if labels.ndim == 1:
             labels = labels.to(device)
         for t in reversed(range(self.timesteps)):
+            t_tensor = torch.full((num_images,), t, device=device, dtype=torch.float)
             predicted_noise = self.model(x_t, labels, t_tensor)
             beta_t = self.betas[t].to(device)
                 noise = torch.zeros_like(x_t)
             x_t = mean + torch.sqrt(variance) * noise
         x_0 = torch.clamp(x_t, -1., 1.)
+        # Normalization matching your original code
         mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(device)
         std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(device)
         x_0 = std * x_0 + mean
         return x_0
+# 4. Model Loading (with improved error handling)
 def load_model(model_path, device):
+    unet_model = UNet(num_classes=NUM_CLASSES).to(device)
+    diffusion_model = DiffusionModel(unet_model, timesteps=TIMESTEPS).to(device)
     if os.path.exists(model_path):
+        checkpoint = torch.load(model_path, map_location=device)
+        if 'model_state_dict' in checkpoint:
+            # Filter out DiffusionModel-specific keys
+            state_dict = {
+                k[6:]: v for k, v in checkpoint['model_state_dict'].items()
+                if k.startswith('model.') and not k.startswith('model.alpha_bars')
+            }
+            # Load into UNet only
+            missing, unexpected = unet_model.load_state_dict(state_dict, strict=False)
+            print(f"Loaded UNet weights. Missing keys: {missing}. Unexpected keys: {unexpected}")
+            # Reinitialize diffusion model with loaded UNet
+            diffusion_model = DiffusionModel(unet_model, timesteps=TIMESTEPS).to(device)
+        else:
+            # Handle case where it's not a training checkpoint
+            diffusion_model.load_state_dict({
+                k: v for k, v in checkpoint.items()
+                if not k.startswith('alpha_bars')
+            })
+        print(f"Model successfully loaded from {model_path}")
     else:
+        print(f"Weights file not found at {model_path}")
+        print("Using randomly initialized weights")
     diffusion_model.eval()
     return diffusion_model
+# 5. Gradio Interface (matches your original)
+def generate_image(label_str):
     label_map = {'Pneumonia': 0, 'Pneumothorax': 1}
     if label_str not in label_map:
+        raise gr.Error("Invalid label selected.")
+    label_index = label_map[label_str]
+    labels_to_generate = torch.zeros(1, 2).to(device)
+    labels_to_generate[:, label_index] = 1
+    generated_images_tensor = loaded_model.sample(
+        1, IMG_SIZE, NUM_CLASSES, labels_to_generate, device
     )
+    img_np = generated_images_tensor.squeeze(0).permute(1, 2, 0).cpu().numpy()
+    img_pil = Image.fromarray((img_np * 255).astype(np.uint8), 'RGB')
+    return img_pil
+# Main Execution
 if __name__ == "__main__":
+    # Load model
+    model_path = "model_weights.pth"  # Match your filename
+    loaded_model = load_model(model_path, device)
+    # Create interface
+    iface = gr.Interface(
+        fn=generate_image,
+        inputs=gr.Dropdown(["Pneumonia", "Pneumothorax"], label="Select Condition"),
+        outputs=gr.Image(type="pil", label="Generated X-ray Image"),
+        title="CheXpert X-ray Image Generator",
+        description="Generate synthetic chest X-ray images conditioned on selected conditions (Pneumonia or Pneumothorax) using a diffusion model."
+    )
+    iface.launch()