Spaces:

huzey
/

ncut-pytorch

Running on Zero

App Files Files Community

huzey commited on Sep 3, 2024

Commit

94eb803

1 Parent(s): 976cf1e

add paper

Browse files

Files changed (2) hide show

app.py +151 -17
packages.txt +2 -0

app.py CHANGED Viewed

@@ -183,6 +183,29 @@ downscaled_outputs = default_outputs
 example_items = downscaled_images[:3] + downscaled_outputs[:3]
 def ncut_run(
     model,
@@ -212,7 +235,11 @@ def ncut_run(
     video_output=False,
 ):
     logging_str = ""
-    resolution = RES_DICT[model_name]
     logging_str += f"Resolution: {resolution}\n"
     if perplexity >= num_sample_tsne or n_neighbors >= num_sample_tsne:
         # raise gr.Error("Perplexity must be less than the number of samples for t-SNE.")
@@ -227,9 +254,13 @@ def ncut_run(
     node_type = node_type.split(":")[0].strip()
     start = time.time()
-    features = extract_features(
-        images, model, node_type=node_type, layer=layer-1, batch_size=BATCH_SIZE
-    )
     # print(f"Feature extraction time (gpu): {time.time() - start:.2f}s")
     logging_str += f"Backbone time: {time.time() - start:.2f}s\n"
@@ -301,8 +332,25 @@ def ncut_run(
         )
         logging_str += _logging_str
         rgb = dont_use_too_much_green(rgb)
     if video_output:
         video_path = get_random_path()
@@ -313,16 +361,19 @@ def ncut_run(
         return to_pil_images(rgb), logging_str
 def _ncut_run(*args, **kwargs):
-    try:
-        ret = ncut_run(*args, **kwargs)
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        return ret
-    except Exception as e:
-        gr.Error(str(e))
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        return [], "Error: " + str(e)
 if USE_HUGGINGFACE_ZEROGPU:
     @spaces.GPU(duration=20)
@@ -376,6 +427,28 @@ def transform_image(image, resolution=(1024, 1024)):
     image = (image - 0.5) / 0.5
     return image
 def run_fn(
     images,
     model_name="SAM(sam_vit_b)",
@@ -416,12 +489,21 @@ def run_fn(
         sampling_method = "farthest"
     # resize the images before acquiring GPU
-    resolution = RES_DICT[model_name]
     images = [tup[0] for tup in images]
     images = [transform_image(image, resolution=resolution) for image in images]
     images = torch.stack(images)
-    model = load_model(model_name)
     if "stable" in model_name.lower() and "diffusion" in model_name.lower():
         model.timestep = layer
         layer = 1
@@ -932,7 +1014,59 @@ with demo:
         # Last button only reveals the last row and hides itself
         buttons[-1].click(fn=lambda x: gr.update(visible=True), outputs=rows[-1])
         buttons[-1].click(fn=lambda x: gr.update(visible=False), outputs=buttons[-1])
     with gr.Row():
         with gr.Column():
             gr.Markdown("##### POWERED BY [ncut-pytorch](https://ncut-pytorch.readthedocs.io/) ")

 example_items = downscaled_images[:3] + downscaled_outputs[:3]
+def run_alignedthreemodelattnnodes(images, model, batch_size=1):
+    use_cuda = torch.cuda.is_available()
+    device = torch.device("cuda" if use_cuda else "cpu")
+    if use_cuda:
+        model = model.to(device)
+    chunked_idxs = torch.split(torch.arange(images.shape[0]), batch_size)
+    outputs = []
+    for idxs in chunked_idxs:
+        inp = images[idxs]
+        if use_cuda:
+            inp = inp.to(device)
+        out = model(inp)
+        # normalize before save
+        out = F.normalize(out, dim=-1)
+        outputs.append(out.cpu().float())
+    outputs = torch.cat(outputs, dim=0)
+    return outputs
 def ncut_run(
     model,
     video_output=False,
 ):
     logging_str = ""
+    if "AlignedThreeModelAttnNodes" == model_name:
+        # dirty patch for the alignedcut paper
+        resolution = (672, 672)
+    else:
+        resolution = RES_DICT[model_name]
     logging_str += f"Resolution: {resolution}\n"
     if perplexity >= num_sample_tsne or n_neighbors >= num_sample_tsne:
         # raise gr.Error("Perplexity must be less than the number of samples for t-SNE.")
     node_type = node_type.split(":")[0].strip()
     start = time.time()
+    if "AlignedThreeModelAttnNodes" == model_name:
+        # dirty patch for the alignedcut paper
+        features = run_alignedthreemodelattnnodes(images, model, batch_size=BATCH_SIZE)
+    else:
+        features = extract_features(
+            images, model, node_type=node_type, layer=layer-1, batch_size=BATCH_SIZE
+        )
     # print(f"Feature extraction time (gpu): {time.time() - start:.2f}s")
     logging_str += f"Backbone time: {time.time() - start:.2f}s\n"
         )
         logging_str += _logging_str
+        if "AlignedThreeModelAttnNodes" == model_name:
+            # dirty patch for the alignedcut paper
+            galleries = []
+            for i_node in range(rgb.shape[1]):
+                _rgb = rgb[:, i_node]
+                galleries.append(to_pil_images(_rgb))
+            return *galleries, logging_str
         rgb = dont_use_too_much_green(rgb)
+    if "AlignedThreeModelAttnNodes" == model_name:
+        # dirty patch for the alignedcut paper
+        print("AlignedThreeModelAttnNodes")
+        galleries = []
+        for i_node in range(rgb.shape[1]):
+            _rgb = rgb[:, i_node]
+            print(_rgb.shape)
+            galleries.append(to_pil_images(_rgb))
+        return *galleries, logging_str
     if video_output:
         video_path = get_random_path()
         return to_pil_images(rgb), logging_str
 def _ncut_run(*args, **kwargs):
+    # try:
+    #     ret = ncut_run(*args, **kwargs)
+    #     if torch.cuda.is_available():
+    #         torch.cuda.empty_cache()
+    #     return ret
+    # except Exception as e:
+    #     gr.Error(str(e))
+    #     if torch.cuda.is_available():
+    #         torch.cuda.empty_cache()
+    #     return [], "Error: " + str(e)
+    ret = ncut_run(*args, **kwargs)
+    return ret
 if USE_HUGGINGFACE_ZEROGPU:
     @spaces.GPU(duration=20)
     image = (image - 0.5) / 0.5
     return image
+def load_alignedthreemodel():
+    os.system("git clone https://huggingface.co/huzey/alignedthreeattn >> /dev/null 2>&1")
+    # pull
+    os.system("git -C alignedthreeattn pull >> /dev/null 2>&1")
+    # add to path
+    import sys
+    sys.path.append("alignedthreeattn")
+    from alignedthreeattn.alignedthreeattn_model import ThreeAttnNodes
+    align_weights = torch.load("alignedthreeattn/align_weights.pth")
+    model = ThreeAttnNodes(align_weights)
+    # url = 'https://huggingface.co/huzey/aligned_model_test/resolve/main/3attn_nodes.pth'
+    # save_path = "alignedthreemodel.pth"
+    # if not os.path.exists(save_path):
+    #     os.system(f"wget {url} -O {save_path} -q")
+    # model = torch.load(save_path)
+    return model
 def run_fn(
     images,
     model_name="SAM(sam_vit_b)",
         sampling_method = "farthest"
     # resize the images before acquiring GPU
+    if "AlignedThreeModelAttnNodes" == model_name:
+        # dirty patch for the alignedcut paper
+        resolution = (672, 672)
+    else:
+        resolution = RES_DICT[model_name]
     images = [tup[0] for tup in images]
     images = [transform_image(image, resolution=resolution) for image in images]
     images = torch.stack(images)
+    if "AlignedThreeModelAttnNodes" == model_name:
+        # dirty patch for the alignedcut paper
+        model = load_alignedthreemodel()
+    else:
+        model = load_model(model_name)
     if "stable" in model_name.lower() and "diffusion" in model_name.lower():
         model.timestep = layer
         layer = 1
         # Last button only reveals the last row and hides itself
         buttons[-1].click(fn=lambda x: gr.update(visible=True), outputs=rows[-1])
         buttons[-1].click(fn=lambda x: gr.update(visible=False), outputs=buttons[-1])
+    with gr.Tab('Compare (Aligned)'):
+        gr.Markdown('This page reproduce the results from the paper [AlignedCut](https://arxiv.org/abs/2406.18344)')
+        gr.Markdown('---')
+        gr.Markdown('**Features are aligned across models and layers.** A linear alignment transform is trained for each model/layer, learning signal comes from 1) fMRI brain activation and 2) segmentation preserving eigen-constraints.')
+        gr.Markdown('NCUT is computed on the concatenated graph of all models, layers, and images. Color is **aligned** across all models and layers.')
+        gr.Markdown('---')
+        with gr.Row():
+            with gr.Column(scale=5, min_width=200):
+                input_gallery, submit_button, clear_images_button = make_input_images_section()
+                dataset_dropdown, num_images_slider, random_seed_slider, load_images_button = make_dataset_images_section(advanced=True)
+                num_images_slider.value = 100
+            with gr.Column(scale=5, min_width=200):
+                gr.Markdown('Model: CLIP(ViT-B-16/openai), DiNOv2reg(dinov2_vitb14_reg), MAE(vit_base)')
+                gr.Markdown('Layer type: attention output (attn), without sum of residual')
+                [
+                    model_dropdown, layer_slider, node_type_dropdown, num_eig_slider,
+                    affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
+                    embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
+                    perplexity_slider, n_neighbors_slider, min_dist_slider,
+                    sampling_method_dropdown
+                ] = make_parameters_section()
+                model_dropdown.value = "AlignedThreeModelAttnNodes"
+                model_dropdown.visible = False
+                layer_slider.visible = False
+                node_type_dropdown.visible = False
+                # logging text box
+                logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
+        galleries = []
+        for i_model, model_name in enumerate(["CLIP", "DINO", "MAE"]):
+            with gr.Row():
+                for i_layer in range(1, 13):
+                    with gr.Column(scale=5, min_width=200):
+                        gr.Markdown(f'### {model_name} Layer {i_layer}')
+                        output_gallery = gr.Gallery(value=[], label="NCUT Embedding", show_label=False, elem_id="ncut", columns=[3], rows=[1], object_fit="contain", height="auto")
+                        galleries.append(output_gallery)
+        clear_images_button.click(lambda x: [] * (len(galleries) + 1), outputs=[input_gallery] + galleries)
+        submit_button.click(
+            run_fn,
+            inputs=[
+                input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
+                affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
+                embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
+                perplexity_slider, n_neighbors_slider, min_dist_slider, sampling_method_dropdown
+            ],
+            outputs=galleries + [logging_text],
+        )
     with gr.Row():
         with gr.Column():
             gr.Markdown("##### POWERED BY [ncut-pytorch](https://ncut-pytorch.readthedocs.io/) ")

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ git-all
2	+ git-lfs