Spaces:

CristianLazoQuispe
/

mnist-diffusion-flow

Running

App Files Files Community

CristianLazoQuispe commited on May 24

Commit

3dc5131

1 Parent(s): 5ccd64d

flow support localized noise

Browse files

Files changed (3) hide show

app.py +37 -97
src/demo.py +78 -0
src/utils.py +20 -1

app.py CHANGED Viewed

@@ -1,37 +1,18 @@
-import os
-import cv2
-import sys
 import torch
-import numpy as np
-import gradio as gr
-import matplotlib.pyplot as plt
-from src.model import ConditionalUNet
-from huggingface_hub import hf_hub_download
 import time
-device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
-#device = 'cpu'
-img_shape = (1, 28, 28)
-def resize(image,size=(200,200)):
-    stretch_near = cv2.resize(image, size, interpolation = cv2.INTER_LINEAR)
-    return stretch_near
-model_diff = ConditionalUNet().to(device)
-model_path = hf_hub_download(repo_id="CristianLazoQuispe/MNIST_Diff_Flow_matching", filename="outputs/diffusion/diffusion_model.pth",
-                        cache_dir="models")
-print("Diff Downloaded!")
-model_diff.load_state_dict(torch.load(model_path, map_location=device))
-model_diff.eval()
-model_flow = ConditionalUNet().to(device)
-model_path = hf_hub_download(repo_id="CristianLazoQuispe/MNIST_Diff_Flow_matching", filename="outputs/flow_matching/flow_model.pth",
-                        cache_dir="models")
-print("Flow Downloaded!")
-model_flow.load_state_dict(torch.load(model_path, map_location=device))
-model_flow.eval()
 @torch.no_grad()
 def generate_diffusion_intermediates_streaming(label):
@@ -39,6 +20,7 @@ def generate_diffusion_intermediates_streaming(label):
     betas = torch.linspace(1e-4, 0.02, timesteps)
     alphas = 1.0 - betas
     alphas_cumprod = torch.cumprod(alphas, dim=0).to(device)
     x = torch.randn(1, *img_shape).to(device)
     y = torch.tensor([label], dtype=torch.long, device=device)
@@ -63,63 +45,33 @@ def generate_diffusion_intermediates_streaming(label):
             x += v.sqrt() * noise
         x = x.clamp(-1, 1)
-        if t in [499, 399, 299, 199, 99, 0]:
-            step_idx = {499: 6, 399: 7, 299: 8, 199: 9, 99: 10, 0: 11}[t]
-            v_mag = noise_pred[0, 0].abs().clamp(0, 3).cpu().numpy()
-            v_mag = (v_mag - v_mag.min()) / (v_mag.max() - v_mag.min() + 1e-5)
-            vel_colored = plt.get_cmap("coolwarm")(v_mag)[:, :, :3]
-            vel_colored = (vel_colored * 255).astype(np.uint8)
-            outputs[step_idx] = resize(vel_colored)
-            yield tuple(outputs)
-        outputs[12] = resize(((x + 1) / 2.0)[0, 0].cpu().numpy(),(300,300))
-        if t in [400, 300, 200, 100, 1, 0]:
-            step_idx = {400: 1, 300: 2, 200: 3, 100: 4, 1: 5, 0 :12}[t]
-            if t==0:
-                outputs[step_idx] = resize(((x + 1) / 2.0)[0, 0].cpu().numpy(),(300,300))
-            else:
-                outputs[step_idx] = resize(((x + 1) / 2.0)[0, 0].cpu().numpy())
-            yield tuple(outputs)
         if t % 10 == 0:
             yield tuple(outputs)
             time.sleep(0.06)
-        #time.sleep(0.1)
-    yield tuple(outputs)
-def generate_localized_noise(shape, radius=5):
-    """Genera una imagen con ruido solo en un círculo en el centro."""
-    B, C, H, W = shape
-    assert C == 1, "Solo imágenes en escala de grises."
-    # Crear máscara circular
-    yy, xx = torch.meshgrid(torch.arange(H), torch.arange(W), indexing='ij')
-    center_y, center_x = H // 2, W // 2
-    mask = ((yy - center_y)**2 + (xx - center_x)**2) <= radius**2
-    mask = mask.float().unsqueeze(0).unsqueeze(0)  # (1, 1, H, W)
-    # Aplicar máscara a ruido
-    noise = torch.randn(B, C, H, W)
-    localized_noise = noise * mask + -1*(1-mask)  # solo hay ruido dentro del círculo
-    #mask = ((yy - center_y)**2 + (xx - center_x)**2) >= (radius//2)**2
-    #mask = mask.float().unsqueeze(0).unsqueeze(0)  # (1, 1, H, W)
-    #localized_noise = localized_noise * mask + -1*(1-mask)  # solo hay ruido dentro del círculo
-    return localized_noise
 @torch.no_grad()
-def generate_flow_intermediates_streaming(label):
-    x = torch.randn(1, *img_shape).to(device)
-    #x = generate_localized_noise((1, 1, 28, 28), radius=12).to(device)
     y = torch.full((1,), label, dtype=torch.long, device=device)
     steps = 50
     dt = 1.0 / steps
-    images = [(x + 1) / 2.0]  # initial noise
-    vel_magnitudes = []
     # Inicial
     img_np = ((x + 1) / 2.0)[0, 0].clamp(0, 1).cpu().numpy()
@@ -131,36 +83,16 @@ def generate_flow_intermediates_streaming(label):
     time.sleep(0.2)
-    for i in range(steps):
         t = torch.full((1,), i * dt, device=device)
         v = model_flow(x, t, y)
         x = x + v * dt
-        outputs[12] =  resize(((x + 1) / 2.0)[0, 0].clamp(0, 1).cpu().numpy(),(300,300))
-        if i in [10,20,30,40,48,49]: #
-            step_idx = {10: 1, 20: 2, 30: 3, 40: 4, 48: 5,49:12}[i] #,
-            if i==49:
-                outputs[step_idx] = resize(((x + 1) / 2.0)[0, 0].clamp(0, 1).cpu().numpy(),(300,300))
-            else:
-                outputs[step_idx] = resize(((x + 1) / 2.0)[0, 0].clamp(0, 1).cpu().numpy())
-            yield tuple(outputs)
-            # Compute velocity magnitude and convert to numpy for visualization
-        if i in [0,11,21,31,41,49]:
-            v_mag = dt*v[0, 0].abs().clamp(0, 3).cpu().numpy()  # Clamp to max value for better contrast
-            v_mag = (v_mag - v_mag.min()) / (v_mag.max() - v_mag.min() + 1e-5)
-            vel_colored = plt.get_cmap("coolwarm")(v_mag)[:, :, :3]  # (H,W,3)
-            vel_colored = (vel_colored * 255).astype(np.uint8)
-            step_idx = {0: 6, 11: 7, 21: 8, 31: 9, 41: 10, 49:11}[i]
-            outputs[step_idx] = resize(vel_colored)
-            yield tuple(outputs)
         if t % 10 == 0:
             yield tuple(outputs)
             time.sleep(0.06)
-        #time.sleep(0.1)
     yield tuple(outputs)
@@ -196,7 +128,13 @@ with gr.Blocks() as demo:
         btn_d.click(fn=generate_diffusion_intermediates_streaming, inputs=label_d, outputs=outs_d+diff_noise_imgs+diff_result_imgs)
     with gr.Tab("Flow Matching"):
-        label_f = gr.Slider(0, 9, step=1, label="Digit Label")
         btn_f = gr.Button("Generate")
         with gr.Row():
             outs_f = [
@@ -221,8 +159,10 @@ with gr.Blocks() as demo:
             flow_result_imgs = [
                 gr.Image(label="Flow step=49",streaming=True),
             ]
-        btn_f.click(fn=generate_flow_intermediates_streaming, inputs=label_f, outputs=outs_f+flow_vel_imgs+flow_result_imgs)
-demo.launch()
-#demo.launch(share=False, server_port=9071)

 import torch
 import time
+import gradio as gr
+from src.utils import generate_centered_gaussian_noise
+from src.demo import resize,plot_flow,load_models,plot_diff
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+img_shape = (1, 28, 28)
+ENV = "DEPLOY"
+TIME_SLEEP = 0.05
+model_diff_standard,model_flow_standard,model_flow_localized = load_models(ENV,device=device)
 @torch.no_grad()
 def generate_diffusion_intermediates_streaming(label):
     betas = torch.linspace(1e-4, 0.02, timesteps)
     alphas = 1.0 - betas
     alphas_cumprod = torch.cumprod(alphas, dim=0).to(device)
+    model_diff = model_diff_standard
     x = torch.randn(1, *img_shape).to(device)
     y = torch.tensor([label], dtype=torch.long, device=device)
             x += v.sqrt() * noise
         x = x.clamp(-1, 1)
+        outputs = plot_diff(outputs,x,t,noise_pred)
         if t % 10 == 0:
             yield tuple(outputs)
             time.sleep(0.06)
+        if ENV=="LOCAL":
+            time.sleep(TIME_SLEEP)
+    yield tuple(outputs)
 @torch.no_grad()
+def generate_flow_intermediates_streaming(label,noise_type):
+    if noise_type=="Localized":
+        x = generate_centered_gaussian_noise((1, *img_shape)).to(device)
+        model_flow = model_flow_localized
+    else:
+        x = torch.randn(1, *img_shape).to(device)
+        model_flow = model_flow_standard
     y = torch.full((1,), label, dtype=torch.long, device=device)
     steps = 50
     dt = 1.0 / steps
     # Inicial
     img_np = ((x + 1) / 2.0)[0, 0].clamp(0, 1).cpu().numpy()
     time.sleep(0.2)
+    for i in range(steps):
         t = torch.full((1,), i * dt, device=device)
         v = model_flow(x, t, y)
         x = x + v * dt
+        outputs = plot_flow(outputs,i,x,dt,v)
         if t % 10 == 0:
             yield tuple(outputs)
             time.sleep(0.06)
+        if ENV=="LOCAL":
+            time.sleep(TIME_SLEEP)
     yield tuple(outputs)
         btn_d.click(fn=generate_diffusion_intermediates_streaming, inputs=label_d, outputs=outs_d+diff_noise_imgs+diff_result_imgs)
     with gr.Tab("Flow Matching"):
+        with gr.Row():
+            noise_selector_f = gr.Radio(
+                ["Standard", "Localized"],
+                label="Noise Type:",
+                value="Standard"  # o "Standard", según quieras el valor por defecto
+            )
+            label_f = gr.Slider(0, 9, step=1, label="Digit Label")
         btn_f = gr.Button("Generate")
         with gr.Row():
             outs_f = [
             flow_result_imgs = [
                 gr.Image(label="Flow step=49",streaming=True),
             ]
+        btn_f.click(fn=generate_flow_intermediates_streaming, inputs=[label_f,noise_selector_f], outputs=outs_f+flow_vel_imgs+flow_result_imgs)
+if ENV=="DEPLOY":
+    demo.launch()
+else:
+    demo.launch(share=True, server_port=9071)

src/demo.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import cv2
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+from .model import ConditionalUNet
+from huggingface_hub import hf_hub_download
+def load_models(ENV,device):
+    if ENV=="DEPLOY":
+        model_path = hf_hub_download(repo_id="CristianLazoQuispe/MNIST_Diff_Flow_matching", filename="outputs/diffusion/diffusion_model.pth",cache_dir="models")
+    else:
+        model_path  = "outputs/diffusion/diffusion_model.pth"
+    print("Diff Downloaded!")
+    model_diff_standard  = ConditionalUNet().to(device)
+    model_diff_standard.load_state_dict(torch.load(model_path, map_location=device))
+    model_diff_standard.eval()
+    if ENV=="DEPLOY":
+        model_path_standard  = hf_hub_download(repo_id="CristianLazoQuispe/MNIST_Diff_Flow_matching", filename="outputs/flow_matching/flow_model.pth",cache_dir="models")
+        model_path_localized = hf_hub_download(repo_id="CristianLazoQuispe/MNIST_Diff_Flow_matching", filename="outputs/flow_matching/flow_model_localized_noise.pth",cache_dir="models")
+    else:
+        model_path_standard  = "outputs/flow_matching/flow_model.pth"
+        model_path_localized = "outputs/flow_matching/flow_model_localized_noise.pth"
+    print("Flow Downloaded!")
+    model_flow_standard  = ConditionalUNet().to(device)
+    model_flow_standard.load_state_dict(torch.load(model_path_standard, map_location=device))
+    model_flow_standard.eval()
+    model_flow_localized = ConditionalUNet().to(device)
+    model_flow_localized.load_state_dict(torch.load(model_path_localized, map_location=device))
+    model_flow_localized.eval()
+    return model_diff_standard,model_flow_standard,model_flow_localized
+def resize(image,size=(200,200)):
+    stretch_near = cv2.resize(image, size, interpolation = cv2.INTER_LINEAR)
+    return stretch_near
+def plot_diff(outputs,x,t,noise_pred):
+    if t in [499, 399, 299, 199, 99, 0]:
+        step_idx = {499: 6, 399: 7, 299: 8, 199: 9, 99: 10, 0: 11}[t]
+        v_mag = noise_pred[0, 0].abs().clamp(0, 3).cpu().numpy()
+        v_mag = (v_mag - v_mag.min()) / (v_mag.max() - v_mag.min() + 1e-5)
+        vel_colored = plt.get_cmap("coolwarm")(v_mag)[:, :, :3]
+        vel_colored = (vel_colored * 255).astype(np.uint8)
+        outputs[step_idx] = resize(vel_colored)
+    outputs[12] = resize(((x + 1) / 2.0)[0, 0].cpu().numpy(),(300,300))
+    if t in [400, 300, 200, 100, 1, 0]:
+        step_idx = {400: 1, 300: 2, 200: 3, 100: 4, 1: 5, 0 :12}[t]
+        if t==0:
+            outputs[step_idx] = resize(((x + 1) / 2.0)[0, 0].cpu().numpy(),(300,300))
+        else:
+            outputs[step_idx] = resize(((x + 1) / 2.0)[0, 0].cpu().numpy())
+    return outputs
+def plot_flow(outputs,i,x,dt,v):
+    # Compute velocity magnitude and convert to numpy for visualization
+    outputs[12] =  resize(((x + 1) / 2.0)[0, 0].clamp(0, 1).cpu().numpy(),(300,300))
+    if i in [10,20,30,40,48,49]: #
+        step_idx = {10: 1, 20: 2, 30: 3, 40: 4, 48: 5,49:12}[i] #,
+        if i==49:
+            outputs[step_idx] = resize(((x + 1) / 2.0)[0, 0].clamp(0, 1).cpu().numpy(),(300,300))
+        else:
+            outputs[step_idx] = resize(((x + 1) / 2.0)[0, 0].clamp(0, 1).cpu().numpy())
+    if i in [0,11,21,31,41,49]:
+        v_mag = dt*v[0, 0].abs().clamp(0, 3).cpu().numpy()  # Clamp to max value for better contrast
+        v_mag = (v_mag - v_mag.min()) / (v_mag.max() - v_mag.min() + 1e-5)
+        vel_colored = plt.get_cmap("coolwarm")(v_mag)[:, :, :3]  # (H,W,3)
+        vel_colored = (vel_colored * 255).astype(np.uint8)
+        step_idx = {0: 6, 11: 7, 21: 8, 31: 9, 41: 10, 49:11}[i]
+        outputs[step_idx] = resize(vel_colored)
+    return outputs

src/utils.py CHANGED Viewed

@@ -11,4 +11,23 @@ def set_seed(seed):
     torch.manual_seed(seed)
     torch.cuda.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
-    torch.backends.cudnn.deterministic = True

     torch.manual_seed(seed)
     torch.cuda.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+def generate_centered_gaussian_noise(shape=(1, 1, 28, 28), sigma=5.0, mu=0):
+    B, C, H, W = shape
+    assert C == 1, "only image gray"
+    yy, xx = torch.meshgrid(torch.arange(H), torch.arange(W), indexing='ij')
+    yy = yy.to(torch.float32)
+    xx = xx.to(torch.float32)
+    center_y, center_x = H / 2, W / 2
+    gauss = torch.exp(-((yy - center_y)**2 + (xx - center_x)**2) / (2 * sigma**2))
+    gauss = gauss / gauss.max()  # Normalization to [0, 1]
+    gauss = gauss.unsqueeze(0).unsqueeze(0).expand(B, C, H, W)
+    noise = mu + torch.randn(B, C, H, W)  # Noise with mean mu
+    localized_noise = noise * gauss + mu * (1 - gauss)
+    return localized_noise