Spaces:

CristianLazoQuispe
/

mnist-diffusion-flow

Running

App Files Files Community

CristianLazoQuispe commited on May 13

Commit

d36e58a

1 Parent(s): 29ff960

streaming outputs

Browse files

Files changed (1) hide show

app.py +82 -42

app.py CHANGED Viewed

@@ -7,8 +7,10 @@ import gradio as gr
 import matplotlib.pyplot as plt
 from src.model import ConditionalUNet
 from huggingface_hub import hf_hub_download
 device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
 img_shape = (1, 28, 28)
@@ -33,17 +35,24 @@ model_flow.load_state_dict(torch.load(model_path, map_location=device))
 model_flow.eval()
 @torch.no_grad()
-def generate_diffusion_intermediates(label):
     timesteps = 500
-    img_shape = (1, 28, 28)
     betas = torch.linspace(1e-4, 0.02, timesteps)
     alphas = 1.0 - betas
     alphas_cumprod = torch.cumprod(alphas, dim=0).to(device)
     x = torch.randn(1, *img_shape).to(device)
     y = torch.tensor([label], dtype=torch.long, device=device)
-    noise_magnitudes = []
-    intermediates = [resize(((x + 1) / 2.0)[0][0].clamp(0, 1).cpu().numpy())]
     for t in reversed(range(timesteps)):
         t_tensor = torch.full((x.size(0),), t, device=device, dtype=torch.float)
@@ -53,22 +62,26 @@ def generate_diffusion_intermediates(label):
             noise = torch.randn(1, *img_shape).to(device)
             v = (1 - alphas_cumprod[t - 1]) / (1 - alphas_cumprod[t]) * betas[t]
             x += v.sqrt() * noise
         x = x.clamp(-1, 1)
-        if t in [400, 300, 200, 100,0]:
-            #print("t:",t)
-            img_np = ((x + 1) / 2)[0, 0].cpu().numpy()
-            intermediates.append(resize(img_np))
-        if t in [499, 399, 299, 199,99,0]:
-            # Compute velocity magnitude and convert to numpy for visualization
-            v_mag = noise_pred[0, 0].abs().clamp(0, 3).cpu().numpy()  # Clamp to max value for better contrast
             v_mag = (v_mag - v_mag.min()) / (v_mag.max() - v_mag.min() + 1e-5)
-            vel_colored = plt.get_cmap("coolwarm")(v_mag)[:, :, :3]  # (H,W,3)
             vel_colored = (vel_colored * 255).astype(np.uint8)
-            noise_magnitudes.append(resize(vel_colored, (100, 100)))
-    return intermediates+noise_magnitudes
 def generate_localized_noise(shape, radius=5):
@@ -89,7 +102,7 @@ def generate_localized_noise(shape, radius=5):
 @torch.no_grad()
-def generate_flow_intermediates(label):
     x = torch.randn(1, *img_shape).to(device)
     #x = generate_localized_noise((1, 1, 28, 28), radius=12).to(device)
     y = torch.full((1,), label, dtype=torch.long, device=device)
@@ -98,23 +111,43 @@ def generate_flow_intermediates(label):
     images = [(x + 1) / 2.0]  # initial noise
     vel_magnitudes = []
     for i in range(steps):
         t = torch.full((1,), i * dt, device=device)
         v = model_flow(x, t, y)
         x = x + v * dt
-        if i in [10,20,30,40,49]:
-            images.append((x + 1) / 2.0)
             # Compute velocity magnitude and convert to numpy for visualization
-        if i in [0,10,20,30,40,49]:
             v_mag = dt*v[0, 0].abs().clamp(0, 3).cpu().numpy()  # Clamp to max value for better contrast
             v_mag = (v_mag - v_mag.min()) / (v_mag.max() - v_mag.min() + 1e-5)
             vel_colored = plt.get_cmap("coolwarm")(v_mag)[:, :, :3]  # (H,W,3)
             vel_colored = (vel_colored * 255).astype(np.uint8)
-            vel_magnitudes.append(resize(vel_colored, (100, 100)))
-    return [resize(images[0][0][0].clamp(0, 1).cpu().numpy())]+[resize(img[0][0].clamp(0, 1).cpu().numpy()) for img in images[-5:]]+vel_magnitudes
 with gr.Blocks() as demo:
     gr.Markdown("# Conditional MNIST Generation: Diffusion vs Flow Matching")
@@ -124,24 +157,28 @@ with gr.Blocks() as demo:
         btn_d = gr.Button("Generate")
         with gr.Row():
             outs_d = [
-                gr.Image(label="Noise"),
-                gr.Image(label="Diffusion t=400"),
-                gr.Image(label="Diffusion t=300"),
-                gr.Image(label="Diffusion t=200"),
-                gr.Image(label="Diffusion t=100"),
-                gr.Image(label="Diffusion t=0"),
             ]
         with gr.Row():
             #400, 300, 200, 100,0
-            flow_noise_imgs = [
-                gr.Image(label="Noise pred t=500"),
-                gr.Image(label="Noise pred t=400"),
-                gr.Image(label="Noise pred t=300"),
-                gr.Image(label="Noise pred t=200"),
-                gr.Image(label="Noise pred t=100"),
-                gr.Image(label="Noise pred t=0")
             ]
-        btn_d.click(fn=generate_diffusion_intermediates, inputs=label_d, outputs=outs_d+flow_noise_imgs)
     with gr.Tab("Flow Matching"):
         label_f = gr.Slider(0, 9, step=1, label="Digit Label")
@@ -153,7 +190,7 @@ with gr.Blocks() as demo:
                 gr.Image(label="Flow step=20"),
                 gr.Image(label="Flow step=30"),
                 gr.Image(label="Flow step=40"),
-                gr.Image(label="Flow step=49"),
             ]
         with gr.Row():
             #100,200,300,400,499
@@ -163,10 +200,13 @@ with gr.Blocks() as demo:
                 gr.Image(label="Velocity step=20"),
                 gr.Image(label="Velocity step=30"),
                 gr.Image(label="Velocity step=40"),
-                gr.Image(label="Velocity step=49")
             ]
-        btn_f.click(fn=generate_flow_intermediates, inputs=label_f, outputs=outs_f+flow_vel_imgs)
-demo.launch()
-#demo.launch(share=False, server_port=9070)

 import matplotlib.pyplot as plt
 from src.model import ConditionalUNet
 from huggingface_hub import hf_hub_download
+import time
 device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+device = 'cpu'
 img_shape = (1, 28, 28)
 model_flow.eval()
 @torch.no_grad()
+def generate_diffusion_intermediates_streaming(label):
     timesteps = 500
     betas = torch.linspace(1e-4, 0.02, timesteps)
     alphas = 1.0 - betas
     alphas_cumprod = torch.cumprod(alphas, dim=0).to(device)
     x = torch.randn(1, *img_shape).to(device)
     y = torch.tensor([label], dtype=torch.long, device=device)
+    # Inicial
+    img_np = ((x + 1) / 2.0)[0, 0].clamp(0, 1).cpu().numpy()
+    # Para mantener la posición de cada imagen
+    outputs = [None] * 13
+    yield tuple(outputs)
+    outputs[0] = resize(img_np)
+    yield tuple(outputs)
+    #time.sleep(0.5)
     for t in reversed(range(timesteps)):
         t_tensor = torch.full((x.size(0),), t, device=device, dtype=torch.float)
             noise = torch.randn(1, *img_shape).to(device)
             v = (1 - alphas_cumprod[t - 1]) / (1 - alphas_cumprod[t]) * betas[t]
             x += v.sqrt() * noise
         x = x.clamp(-1, 1)
+        if t in [499, 399, 299, 199, 99, 0]:
+            step_idx = {499: 6, 399: 7, 299: 8, 199: 9, 99: 10, 0: 11}[t]
+            v_mag = noise_pred[0, 0].abs().clamp(0, 3).cpu().numpy()
             v_mag = (v_mag - v_mag.min()) / (v_mag.max() - v_mag.min() + 1e-5)
+            vel_colored = plt.get_cmap("coolwarm")(v_mag)[:, :, :3]
             vel_colored = (vel_colored * 255).astype(np.uint8)
+            outputs[step_idx] = resize(vel_colored)
+            yield tuple(outputs)
+            time.sleep(0.5)
+        if t in [400, 300, 200, 100, 1, 0]:
+            step_idx = {400: 1, 300: 2, 200: 3, 100: 4, 1: 5, 0 :12}[t]
+            if t==0:
+                outputs[step_idx] = resize(((x + 1) / 2.0)[0, 0].cpu().numpy(),(400,400))
+            else:
+                outputs[step_idx] = resize(((x + 1) / 2.0)[0, 0].cpu().numpy())
+            yield tuple(outputs)
 def generate_localized_noise(shape, radius=5):
 @torch.no_grad()
+def generate_flow_intermediates_streaming(label):
     x = torch.randn(1, *img_shape).to(device)
     #x = generate_localized_noise((1, 1, 28, 28), radius=12).to(device)
     y = torch.full((1,), label, dtype=torch.long, device=device)
     images = [(x + 1) / 2.0]  # initial noise
     vel_magnitudes = []
+    # Inicial
+    img_np = ((x + 1) / 2.0)[0, 0].clamp(0, 1).cpu().numpy()
+    # Para mantener la posición de cada imagen
+    outputs = [None] * 13
+    yield tuple(outputs)
+    outputs[0] = resize(img_np)
+    yield tuple(outputs)
+    #time.sleep(0.5)
     for i in range(steps):
         t = torch.full((1,), i * dt, device=device)
         v = model_flow(x, t, y)
         x = x + v * dt
+        if i in [10,20,30,40,48,49]:
+            #images.append((x + 1) / 2.0)
+            step_idx = {10: 1, 20: 2, 30: 3, 40: 4, 48: 5, 49:12}[i]
+            outputs[step_idx] = resize(((x + 1) / 2.0)[0, 0].clamp(0, 1).cpu().numpy())
+            yield tuple(outputs)
             # Compute velocity magnitude and convert to numpy for visualization
+        if i in [0,11,21,31,41,49]:
             v_mag = dt*v[0, 0].abs().clamp(0, 3).cpu().numpy()  # Clamp to max value for better contrast
             v_mag = (v_mag - v_mag.min()) / (v_mag.max() - v_mag.min() + 1e-5)
             vel_colored = plt.get_cmap("coolwarm")(v_mag)[:, :, :3]  # (H,W,3)
             vel_colored = (vel_colored * 255).astype(np.uint8)
+            step_idx = {0: 6, 11: 7, 21: 8, 31: 9, 41: 10, 49:11}[i]
+            if i==49:
+                outputs[step_idx] = resize(vel_colored, (400, 400))
+            else:
+                outputs[step_idx] = resize(vel_colored)
+            yield tuple(outputs)
 with gr.Blocks() as demo:
     gr.Markdown("# Conditional MNIST Generation: Diffusion vs Flow Matching")
         btn_d = gr.Button("Generate")
         with gr.Row():
             outs_d = [
+                gr.Image(label="Noise",streaming=True),
+                gr.Image(label="Diffusion t=400",streaming=True),
+                gr.Image(label="Diffusion t=300",streaming=True),
+                gr.Image(label="Diffusion t=200",streaming=True),
+                gr.Image(label="Diffusion t=100",streaming=True),
+                gr.Image(label="Diffusion t=1",streaming=True),
             ]
         with gr.Row():
             #400, 300, 200, 100,0
+            diff_noise_imgs = [
+                gr.Image(label="Noise pred t=500",streaming=True),
+                gr.Image(label="Noise pred t=400",streaming=True),
+                gr.Image(label="Noise pred t=300",streaming=True),
+                gr.Image(label="Noise pred t=200",streaming=True),
+                gr.Image(label="Noise pred t=100",streaming=True),
+                gr.Image(label="Noise pred t=1",streaming=True),
             ]
+        with gr.Row():
+            diff_result_imgs = [
+                gr.Image(label="Diffusion t=0",streaming=True),
+            ]
+        btn_d.click(fn=generate_diffusion_intermediates_streaming, inputs=label_d, outputs=outs_d+diff_noise_imgs+diff_result_imgs)
     with gr.Tab("Flow Matching"):
         label_f = gr.Slider(0, 9, step=1, label="Digit Label")
                 gr.Image(label="Flow step=20"),
                 gr.Image(label="Flow step=30"),
                 gr.Image(label="Flow step=40"),
+                gr.Image(label="Flow step=48"),
             ]
         with gr.Row():
             #100,200,300,400,499
                 gr.Image(label="Velocity step=20"),
                 gr.Image(label="Velocity step=30"),
                 gr.Image(label="Velocity step=40"),
+                gr.Image(label="Velocity step=48")
             ]
+        with gr.Row():
+            flow_result_imgs = [
+                gr.Image(label="Flow step=49",streaming=True),
+            ]
+        btn_f.click(fn=generate_flow_intermediates_streaming, inputs=label_f, outputs=outs_f+flow_vel_imgs+flow_result_imgs)
+#demo.launch()
+demo.launch(share=False, server_port=9071)