Spaces:

ondrejbiza
/

isa

Build error

App Files Files Community

ondrejbiza commited on Apr 28, 2023

Commit

1e7763d

1 Parent(s): 8530ae8

Fix global state bug.

Browse files

Files changed (1) hide show

app.py +53 -47

app.py CHANGED Viewed

@@ -1,12 +1,14 @@
 import os
 from clu import checkpoint
 import gradio as gr
 import jax
 import jax.numpy as jnp
 import numpy as np
 from PIL import Image
-from huggingface_hub import snapshot_download
 from invariant_slot_attention.configs.clevr_with_masks.equiv_transl_scale import get_config
 from invariant_slot_attention.lib import utils
@@ -14,7 +16,6 @@ from invariant_slot_attention.lib import utils
 def load_model(config, checkpoint_dir):
   rng = jax.random.PRNGKey(42)
-  rng, data_rng = jax.random.split(rng)
   # Initialize model
   model = utils.build_model_from_config(config.model)
@@ -55,10 +56,9 @@ def load_image(name):
   img = Image.open(f"images/{name}.png")
   img = img.crop((64, 29, 64 + 192, 29 + 192))
   img = img.resize((128, 128))
-  img_ = np.array(img)
   img = np.array(img)[:, :, :3] / 255.
   img = jnp.array(img, dtype=jnp.float32)
-  return img, img_
 download_path = snapshot_download(repo_id="ondrejbiza/isa")
@@ -68,8 +68,7 @@ model, state, rng = load_model(get_config(), checkpoint_dir)
 rng, init_rng = jax.random.split(rng, num=2)
-from flax import linen as nn
-from typing import Callable
 class DecoderWrapper(nn.Module):
     decoder: Callable[[], nn.Module]
     @nn.compact
@@ -77,17 +76,12 @@ class DecoderWrapper(nn.Module):
         return self.decoder()(slots, train)
 decoder_model = DecoderWrapper(decoder=model.decoder)
-slots = np.zeros((11, 64), dtype=np.float32)
-pos = np.zeros((11, 2), dtype=np.float32)
-scale = np.zeros((11, 2), dtype=np.float32)
-probs = np.zeros((11, 128, 128), dtype=np.float32)
 with gr.Blocks() as demo:
-    # work in progress
-    # with gr.Row():
-    #     gr_gallery = gr.Gallery(value=[f"images/img{i}.png" for i in range(1, 9)])
-    #     gr_gallery = gr_gallery.style(columns=[3], rows=[3], object_fit="contain", height="auto")
     with gr.Row():
@@ -116,89 +110,101 @@ with gr.Blocks() as demo:
     def update_image_and_segmentation(name, idx):
       idx = idx - 1
-      img_input, _ = load_image(name)
       out = model.apply(
         {"params": state.params, **state.variables},
         video=img_input[None, None],
         rngs={"state_init": init_rng},
         train=False)
-      probs[:] = nn.softmax(out["outputs"]["segmentation_logits"][0, 0, :, :, :, 0], axis=0)
       img = np.array(out["outputs"]["video"][0, 0])
       img = np.clip(img, 0, 1)
-      slots_ = out["states"]
-      slots[:] = slots_[0, 0, :, :-4]
-      pos[:] = slots_[0, 0, :, -4: -2]
-      scale[:] = slots_[0, 0, :, -2:]
       return (img * 255).astype(np.uint8), (probs[idx] * 255).astype(np.uint8), float(pos[idx, 0]), \
-             float(pos[idx, 1]), float(scale[idx, 0]), float(scale[idx, 1])
     gr_choose_image.change(
        fn=update_image_and_segmentation,
        inputs=[gr_choose_image, gr_slot_slider],
-       outputs=[gr_image_1, gr_image_2, gr_x_slider, gr_y_slider, gr_sx_slider, gr_sy_slider]
     )
-    def update_sliders(idx):
       idx = idx - 1  # 1-indexing to 0-indexing
-      return (probs[idx] * 255).astype(np.uint8), float(pos[idx, 0]), \
-             float(pos[idx, 1]), float(scale[idx, 0]), float(scale[idx, 1])
     gr_slot_slider.change(
       fn=update_sliders,
-      inputs=gr_slot_slider,
       outputs=[gr_image_2, gr_x_slider, gr_y_slider, gr_sx_slider, gr_sy_slider]
     )
-    def update_pos_x(idx, val):
-       pos[idx - 1, 0] = val
-    def update_pos_y(idx, val):
-       pos[idx - 1, 1] = val
-    def update_scale_x(idx, val):
-       scale[idx - 1, 0] = val
-    def update_scale_y(idx, val):
-       scale[idx - 1, 1] = val
     gr_x_slider.change(
        fn=update_pos_x,
-       inputs=[gr_slot_slider, gr_x_slider]
     )
     gr_y_slider.change(
        fn=update_pos_y,
-       inputs=[gr_slot_slider, gr_y_slider]
     )
     gr_sx_slider.change(
        fn=update_scale_x,
-       inputs=[gr_slot_slider, gr_sx_slider]
     )
     gr_sy_slider.change(
        fn=update_scale_y,
-       inputs=[gr_slot_slider, gr_sy_slider]
     )
-    def render(idx):
       idx = idx - 1
-      slots_ = np.concatenate([slots, pos, scale], axis=-1)
-      slots_ = jnp.array(slots_)
       out = decoder_model.apply(
         {"params": state.params, **state.variables},
-        slots=slots_[None, None],
         train=False
       )
-      probs[:] = nn.softmax(out["segmentation_logits"][0, 0, :, :, :, 0], axis=0)
       image = np.array(out["video"][0, 0])
       image = np.clip(image, 0, 1)
-      return (image * 255).astype(np.uint8), (probs[idx] * 255).astype(np.uint8)
     gr_button.click(
         fn=render,
-        inputs=gr_slot_slider,
-        outputs=[gr_image_1, gr_image_2]
     )
 demo.launch()

 import os
+from typing import Callable
 from clu import checkpoint
+from flax import linen as nn
 import gradio as gr
+from huggingface_hub import snapshot_download
 import jax
 import jax.numpy as jnp
 import numpy as np
 from PIL import Image
 from invariant_slot_attention.configs.clevr_with_masks.equiv_transl_scale import get_config
 from invariant_slot_attention.lib import utils
 def load_model(config, checkpoint_dir):
   rng = jax.random.PRNGKey(42)
   # Initialize model
   model = utils.build_model_from_config(config.model)
   img = Image.open(f"images/{name}.png")
   img = img.crop((64, 29, 64 + 192, 29 + 192))
   img = img.resize((128, 128))
   img = np.array(img)[:, :, :3] / 255.
   img = jnp.array(img, dtype=jnp.float32)
+  return img
 download_path = snapshot_download(repo_id="ondrejbiza/isa")
 rng, init_rng = jax.random.split(rng, num=2)
 class DecoderWrapper(nn.Module):
     decoder: Callable[[], nn.Module]
     @nn.compact
         return self.decoder()(slots, train)
 decoder_model = DecoderWrapper(decoder=model.decoder)
 with gr.Blocks() as demo:
+    local_slots = gr.State(np.zeros((11, 64), dtype=np.float32))
+    local_pos = gr.State(np.zeros((11, 2), dtype=np.float32))
+    local_scale = gr.State(np.zeros((11, 2), dtype=np.float32))
+    local_probs = gr.State(np.zeros((11, 128, 128), dtype=np.float32))
     with gr.Row():
     def update_image_and_segmentation(name, idx):
       idx = idx - 1
+      img_input = load_image(name)
       out = model.apply(
         {"params": state.params, **state.variables},
         video=img_input[None, None],
         rngs={"state_init": init_rng},
         train=False)
+      probs = np.array(nn.softmax(out["outputs"]["segmentation_logits"][0, 0, :, :, :, 0], axis=0))
       img = np.array(out["outputs"]["video"][0, 0])
       img = np.clip(img, 0, 1)
+      slots_ = np.array(out["states"])
+      slots = slots_[0, 0, :, :-4]
+      pos = slots_[0, 0, :, -4: -2]
+      scale = slots_[0, 0, :, -2:]
       return (img * 255).astype(np.uint8), (probs[idx] * 255).astype(np.uint8), float(pos[idx, 0]), \
+             float(pos[idx, 1]), float(scale[idx, 0]), float(scale[idx, 1]), probs, slots, pos, scale
     gr_choose_image.change(
        fn=update_image_and_segmentation,
        inputs=[gr_choose_image, gr_slot_slider],
+       outputs=[gr_image_1, gr_image_2, gr_x_slider, gr_y_slider, gr_sx_slider, gr_sy_slider,
+                local_probs, local_slots, local_pos, local_scale]
     )
+    def update_sliders(idx, local_probs, local_pos, local_scale):
       idx = idx - 1  # 1-indexing to 0-indexing
+      return (local_probs[idx] * 255).astype(np.uint8), float(local_pos[idx, 0]), \
+             float(local_pos[idx, 1]), float(local_scale[idx, 0]), float(local_scale[idx, 1])
     gr_slot_slider.change(
       fn=update_sliders,
+      inputs=[gr_slot_slider, local_probs, local_pos, local_scale],
       outputs=[gr_image_2, gr_x_slider, gr_y_slider, gr_sx_slider, gr_sy_slider]
     )
+    def update_pos_x(idx, val, local_pos):
+       local_pos[idx - 1, 0] = val
+       return local_pos
+    def update_pos_y(idx, val, local_pos):
+       local_pos[idx - 1, 1] = val
+       return local_pos
+    def update_scale_x(idx, val, local_scale):
+       local_scale[idx - 1, 0] = val
+       return local_scale
+    def update_scale_y(idx, val, local_scale):
+       local_scale[idx - 1, 1] = val
+       return local_scale
     gr_x_slider.change(
        fn=update_pos_x,
+       inputs=[gr_slot_slider, gr_x_slider, local_pos],
+       outputs=local_pos
     )
     gr_y_slider.change(
        fn=update_pos_y,
+       inputs=[gr_slot_slider, gr_y_slider, local_pos],
+       outputs=local_pos
     )
     gr_sx_slider.change(
        fn=update_scale_x,
+       inputs=[gr_slot_slider, gr_sx_slider, local_scale],
+       outputs=local_scale
     )
     gr_sy_slider.change(
        fn=update_scale_y,
+       inputs=[gr_slot_slider, gr_sy_slider, local_scale],
+       outputs=local_scale
     )
+    def render(idx, local_slots, local_pos, local_scale):
       idx = idx - 1
+      slots = np.concatenate([local_slots, local_pos, local_scale], axis=-1)
+      slots = jnp.array(slots)
       out = decoder_model.apply(
         {"params": state.params, **state.variables},
+        slots=slots[None, None],
         train=False
       )
+      probs = np.array(nn.softmax(out["segmentation_logits"][0, 0, :, :, :, 0], axis=0))
       image = np.array(out["video"][0, 0])
       image = np.clip(image, 0, 1)
+      return (image * 255).astype(np.uint8), (probs[idx] * 255).astype(np.uint8), probs
     gr_button.click(
         fn=render,
+        inputs=[gr_slot_slider, local_slots, local_pos, local_scale],
+        outputs=[gr_image_1, gr_image_2, local_probs]
     )
 demo.launch()