Spaces:

sergiopaniego
/

sam_vs_sam_hq_image_comparison

Running on L4

App Files Files Community

sergiopaniego HF Staff commited on May 9

Commit

d6ff06e

1 Parent(s): 6afc369

Started Space

Browse files

Files changed (19) hide show

.gitattributes +2 -0
app.py +177 -0
examples.json +51 -0
images/image_0.png +3 -0
images/image_1.png +3 -0
images/image_2.png +3 -0
images/image_3.png +3 -0
images/image_4.png +3 -0
images/image_5.png +3 -0
images/image_6.png +3 -0
images/original_image_0.png +3 -0
images/original_image_1.png +3 -0
images/original_image_2.png +3 -0
images/original_image_3.png +3 -0
images/original_image_4.png +3 -0
images/original_image_5.png +3 -0
images/original_image_6.png +3 -0
requirements.txt +8 -0
utils.py +148 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import gradio as gr
+import torch
+import spaces
+import json
+import base64
+from io import BytesIO
+from transformers import SamHQModel, SamHQProcessor, SamModel, SamProcessor
+import os
+import pandas as pd
+from utils import *
+from PIL import Image
+# Carga de modelos
+sam_hq_model = SamHQModel.from_pretrained("syscv-community/sam-hq-vit-huge")
+sam_hq_processor = SamHQProcessor.from_pretrained("syscv-community/sam-hq-vit-huge")
+sam_model = SamModel.from_pretrained("facebook/sam-vit-huge")
+sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
+@spaces.GPU
+def predict_masks_and_scores(model, processor, raw_image, input_points=None, input_boxes=None):
+    if input_boxes is not None:
+        input_boxes = [input_boxes]
+    inputs = processor(raw_image, input_boxes=input_boxes, input_points=input_points, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**inputs)
+    masks = processor.image_processor.post_process_masks(
+        outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
+    )
+    scores = outputs.iou_scores
+    return masks, scores
+def encode_pil_to_base64(pil_image):
+    buffer = BytesIO()
+    pil_image.save(buffer, format="PNG")
+    return base64.b64encode(buffer.getvalue()).decode("utf-8")
+def compare_images_points_and_masks(user_image, input_boxes, input_points):
+    for example_path, example_data in example_data_map.items():
+        if example_data["size"] == list(user_image.size):
+            user_image = Image.open(example_data['original_image_path'])
+    input_boxes = input_boxes.values.tolist()
+    input_points = input_points.values.tolist()
+    input_boxes = [[[int(coord) for coord in box] for box in input_boxes if any(box)]]
+    input_points = [[[int(coord) for coord in point] for point in input_points if any(point)]]
+    input_boxes = input_boxes if input_boxes[0] else None
+    input_points = input_points if input_points[0] else None
+    sam_masks, sam_scores = predict_masks_and_scores(sam_model, sam_processor, user_image, input_boxes=input_boxes, input_points=input_points)
+    sam_hq_masks, sam_hq_scores = predict_masks_and_scores(sam_hq_model, sam_hq_processor, user_image, input_boxes=input_boxes, input_points=input_points)
+    if input_boxes and input_points:
+        img1_b64 = show_all_annotations_on_image_base64(user_image, sam_masks[0][0], sam_scores[:, 0, :], input_boxes[0], input_points[0], model_name='SAM')
+        img2_b64 = show_all_annotations_on_image_base64(user_image, sam_hq_masks[0][0], sam_hq_scores[:, 0, :], input_boxes[0], input_points[0], model_name='SAM_HQ')
+    elif input_boxes:
+        img1_b64 = show_all_annotations_on_image_base64(user_image, sam_masks[0][0], sam_scores[:, 0, :], input_boxes[0], None, model_name='SAM')
+        img2_b64 = show_all_annotations_on_image_base64(user_image, sam_hq_masks[0][0], sam_hq_scores[:, 0, :], input_boxes[0], None, model_name='SAM_HQ')
+    elif input_points:
+        img1_b64 = show_all_annotations_on_image_base64(user_image, sam_masks[0][0], sam_scores[:, 0, :], None, input_points[0], model_name='SAM')
+        img2_b64 = show_all_annotations_on_image_base64(user_image, sam_hq_masks[0][0], sam_hq_scores[:, 0, :], None, input_points[0], model_name='SAM_HQ')
+    print('user_image', user_image)
+    print("img1_b64", img1_b64)
+    print("img2_b64", img2_b64)
+    html_code = f"""
+    <div style="position: relative; width: 100%; max-width: 600px; margin: 0 auto;" id="imageCompareContainer">
+        <div style="position: relative; width: 100%;">
+            <img src="data:image/png;base64,{img1_b64}" style="width:100%; display:block;">
+            <div id="topWrapper" style="position:absolute; top:0; left:0; width:100%; overflow:hidden;">
+                <img id="topImage" src="data:image/png;base64,{img2_b64}" style="width:100%;">
+            </div>
+            <div id="sliderLine" style="position:absolute; top:0; left:0; width:2px; height:100%; background-color:red; pointer-events:none;"></div>
+        </div>
+        <input type="range" min="0" max="100" value="0"
+            style="width:100%; margin-top: 10px;"
+            oninput="
+            const val = this.value;
+            const container = document.getElementById('imageCompareContainer');
+            const width = container.offsetWidth;
+            const clipValue = 100 - val;
+            document.getElementById('topImage').style.clipPath = 'inset(0 ' + clipValue + '% 0 0)';
+            document.getElementById('sliderLine').style.left = (width * val / 100) + 'px';
+            ">
+    </div>
+    """
+    return html_code
+def load_examples(json_file="examples.json"):
+    with open(json_file, "r") as f:
+        examples = json.load(f)
+    return examples
+examples = load_examples()
+example_paths = [example["image_path"] for example in examples]
+example_data_map = {
+    example["image_path"]: {
+        "original_image_path": example["original_image_path"],
+        "points": example["points"],
+        "boxes": example["boxes"],
+        "size": example["size"]
+    }
+    for example in examples
+}
+theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald")
+with gr.Blocks(theme=theme, title="🔍 Compare SAM vs SAM-HQ") as demo:
+    image_path_box = gr.Textbox(visible=False)
+    gr.Markdown("## 🔍 Compare SAM vs SAM-HQ")
+    gr.Markdown("Compare the performance of SAM and SAM-HQ on various images. Click on an example to load it")
+    gr.Markdown("[SAM-HQ](https://huggingface.co/syscv-community/sam-hq-vit-huge) - [SAM](https://huggingface.co/facebook/sam-vit-huge)")
+    with gr.Row():
+        image_input = gr.Image(
+            type="pil",
+            label="Example image (click below to load)",
+            interactive=False,
+            height=500,
+            show_label=True
+        )
+    gr.Examples(
+        examples=example_paths,
+        inputs=[image_input],
+        label="Click an example to try 👇",
+    )
+    result_html = gr.HTML(elem_id="result-html")
+    with gr.Row():
+        points_input = gr.Dataframe(
+            headers=["x", "y"],
+            label="Points",
+            datatype=["number", "number"],
+            col_count=(2, "fixed")
+        )
+        boxes_input = gr.Dataframe(
+            headers=["x0", "y0", "x1", "y1"],
+            label="Boxes",
+            datatype=["number", "number", "number", "number"],
+            col_count=(4, "fixed")
+        )
+    def on_image_change(image):
+        for example_path, example_data in example_data_map.items():
+            print(image.size)
+            if example_data["size"] == list(image.size):
+                return example_data["points"], example_data["boxes"]
+        return [], []
+    image_input.change(
+        fn=on_image_change,
+        inputs=[image_input],
+        outputs=[points_input, boxes_input]
+    )
+    compare_button = gr.Button("Compare points and masks")
+    compare_button.click(fn=compare_images_points_and_masks, inputs=[image_input, boxes_input, points_input], outputs=result_html)
+    gr.HTML("""
+        <style>
+        #result-html {
+            min-height: 500px;
+            border: 1px solid #ccc;
+            padding: 10px;
+            box-sizing: border-box;
+            background-color: #fff;
+            border-radius: 8px;
+            box-shadow: 0 2px 6px rgba(0, 0, 0, 0.1);
+        }
+        </style>
+    """)
+demo.launch()

examples.json ADDED Viewed

	@@ -0,0 +1,51 @@

+[
+    {
+        "image_path": "./images/image_0.png",
+        "original_image_path": "./images/original_image_0.png",
+        "points": null,
+        "boxes": [[4,13,1007,1023]],
+        "size": [1024, 1024]
+    },
+    {
+        "image_path": "./images/image_1.png",
+        "original_image_path": "./images/original_image_1.png",
+        "points": null,
+        "boxes": [[230, 99, 694, 670]],
+        "size": [768, 768]
+    },
+    {
+        "image_path": "./images/image_2.png",
+        "original_image_path": "./images/original_image_2.png",
+        "points": [[495,518],[217,140]],
+        "boxes": null,
+        "size": [894, 1000]
+    },
+    {
+        "image_path": "./images/image_3.png",
+        "original_image_path": "./images/original_image_3.png",
+        "points": [[111, 241],[249, 317],[375, 190]],
+        "boxes": null,
+        "size": [512, 512]
+    },
+    {
+        "image_path": "./images/image_4.png",
+        "original_image_path": "./images/original_image_4.png",
+        "points": null,
+        "boxes": [[128, 152, 1880, 1838]],
+        "size": [2048, 2048]
+    },
+    {
+        "image_path": "./images/image_5.png",
+        "original_image_path": "./images/original_image_5.png",
+        "points": [[373,363], [452, 575]],
+        "boxes": null,
+        "size": [1024, 683]
+    },
+    {
+        "image_path": "./images/image_6.png",
+        "original_image_path": "./images/original_image_6.png",
+        "points": null,
+        "boxes": [[181, 196, 757, 495]],
+        "size": [800, 533]
+    }
+]

images/image_0.png ADDED Viewed

Git LFS Details

SHA256: 8acfddb52061db75859d2452ba8e168ee96245ac31618729edaa31548854f0ea
Pointer size: 132 Bytes
Size of remote file: 1.97 MB

images/image_1.png ADDED Viewed

Git LFS Details

SHA256: 6082fb3d5849b14ad082d234c1c1c7ff8de195af6a062ad152386d75d39fb9bf
Pointer size: 131 Bytes
Size of remote file: 628 kB

images/image_2.png ADDED Viewed

Git LFS Details

SHA256: fb89b1ab049b0bbf11943b11c644e2a7970eccf1ac2bcde73c68ed6bb53096c9
Pointer size: 132 Bytes
Size of remote file: 1.61 MB

images/image_3.png ADDED Viewed

Git LFS Details

SHA256: 9e53cb4a9b443b74a1b7991d9c198e7435181c061e7802b10184ef055da3e384
Pointer size: 131 Bytes
Size of remote file: 433 kB

images/image_4.png ADDED Viewed

Git LFS Details

SHA256: b4aa85548f056d5717742e20f81f7a55674e3ce05db3b0b6ac70c0dfeffdaa56
Pointer size: 132 Bytes
Size of remote file: 6.14 MB

images/image_5.png ADDED Viewed

Git LFS Details

SHA256: 9944d95815a122c048b9deec0ae28443f7d64f915455ad7036c852a00a8ad4bd
Pointer size: 131 Bytes
Size of remote file: 951 kB

images/image_6.png ADDED Viewed

Git LFS Details

SHA256: f440f6ba3b9bb265740edddcdb803836b54add9609282464d7e82ba5f452237d
Pointer size: 131 Bytes
Size of remote file: 307 kB

images/original_image_0.png ADDED Viewed

Git LFS Details

SHA256: 75b113e521d89addb6c48344ef27fefd0f494eafc703e9d0657978929fce4601
Pointer size: 132 Bytes
Size of remote file: 2.32 MB

images/original_image_1.png ADDED Viewed

Git LFS Details

SHA256: 7e5ccc2cbc51e4849bba6d8984b5705835f332506a187dda680b207cc7a1fab2
Pointer size: 131 Bytes
Size of remote file: 613 kB

images/original_image_2.png ADDED Viewed

Git LFS Details

SHA256: d42a70173297297b654cd067e7ed3de717c3d2b37fd6d13b0396e5fc58449850
Pointer size: 132 Bytes
Size of remote file: 1.54 MB

images/original_image_3.png ADDED Viewed

Git LFS Details

SHA256: 23fe057297248971db5dc01f17b6c631636cc462711ee52c8d221b131c8a456d
Pointer size: 131 Bytes
Size of remote file: 470 kB

images/original_image_4.png ADDED Viewed

Git LFS Details

SHA256: e697f853c0cdc07e3bf4982e96e38b77707f91256aef087147c8897784fe90bc
Pointer size: 132 Bytes
Size of remote file: 6.05 MB

images/original_image_5.png ADDED Viewed

Git LFS Details

SHA256: 453a3e1627effb4d8ed6049e8d457ebe7f869537acf8e2846b36cc62ee23d1a6
Pointer size: 132 Bytes
Size of remote file: 1.22 MB

images/original_image_6.png ADDED Viewed

Git LFS Details

SHA256: 631bc19a9b5a3bd291de6375abf63c33234aaee5194ca95245d418581ff294d1
Pointer size: 131 Bytes
Size of remote file: 384 kB

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio
+huggingface_hub
+requests
+pillow
+torch
+git+https://github.com/huggingface/transformers.git
+matplotlib
+numpy

utils.py ADDED Viewed

	@@ -0,0 +1,148 @@

+from io import BytesIO
+import base64
+import numpy as np
+import matplotlib.pyplot as plt
+import torch
+def fig_to_base64(fig):
+    buf = BytesIO()
+    fig.savefig(buf, format='png', bbox_inches='tight')
+    plt.close(fig)
+    buf.seek(0)
+    return base64.b64encode(buf.getvalue()).decode()
+def show_mask(mask, ax, random_color=False):
+    if random_color:
+        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
+    else:
+        color = np.array([30/255, 144/255, 255/255, 0.6])
+    h, w = mask.shape[-2:]
+    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
+    ax.imshow(mask_image)
+def show_box(box, ax):
+    x0, y0 = box[0], box[1]
+    w, h = box[2] - box[0], box[3] - box[1]
+    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))
+def show_points(coords, labels, ax, marker_size=375):
+    pos_points = coords[labels==1]
+    neg_points = coords[labels==0]
+    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
+    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
+def show_boxes_on_image_base64(raw_image, boxes):
+    fig, ax = plt.subplots(figsize=(10,10))
+    ax.imshow(raw_image)
+    for box in boxes:
+        show_box(box, ax)
+    ax.axis('off')
+    return fig_to_base64(fig)
+def show_points_on_image_base64(raw_image, input_points, input_labels=None):
+    fig, ax = plt.subplots(figsize=(10,10))
+    ax.imshow(raw_image)
+    input_points = np.array(input_points)
+    labels = np.ones_like(input_points[:, 0]) if input_labels is None else np.array(input_labels)
+    show_points(input_points, labels, ax)
+    ax.axis('off')
+    return fig_to_base64(fig)
+def show_points_and_boxes_on_image_base64(raw_image, boxes, input_points, input_labels=None):
+    fig, ax = plt.subplots(figsize=(10,10))
+    ax.imshow(raw_image)
+    input_points = np.array(input_points)
+    labels = np.ones_like(input_points[:, 0]) if input_labels is None else np.array(input_labels)
+    show_points(input_points, labels, ax)
+    for box in boxes:
+        show_box(box, ax)
+    ax.axis('off')
+    return fig_to_base64(fig)
+def show_masks_on_image_base64(raw_image, masks, scores):
+    if len(masks.shape) == 4:
+        masks = masks.squeeze()
+    if scores.shape[0] == 1:
+        scores = scores.squeeze()
+    nb_predictions = scores.shape[-1]
+    print(f"Number of predictions: {nb_predictions}")
+    fig, axes = plt.subplots(1, nb_predictions, figsize=(5 * nb_predictions, 5))
+    if nb_predictions == 1:
+        axes = [axes]
+    for i, (mask, score) in enumerate(zip(masks, scores)):
+        print(i)
+        mask = mask.cpu().detach().numpy()
+        axes[i].imshow(np.array(raw_image))
+        show_mask(mask, axes[i])
+        axes[i].title.set_text(f"Mask {i+1}, Score: {score.item():.3f}")
+        axes[i].axis("off")
+    return fig_to_base64(fig)
+def show_first_mask_on_image_base64(raw_image, masks, scores):
+    if masks.ndim == 4:
+        mask = masks[0, 0]
+    elif masks.ndim == 3:
+        mask = masks[0]
+    else:
+        mask = masks
+    if isinstance(mask, torch.Tensor):
+        mask = mask.cpu().detach().numpy()
+    score_text = ""
+    if scores is not None:
+        if isinstance(scores, torch.Tensor):
+            scores = scores.flatten()
+            score = scores[0].item()
+        else:
+            score = float(np.array(scores).flatten()[0])
+        score_text = f"Score: {score:.3f}"
+    fig, ax = plt.subplots(figsize=(5, 5))
+    ax.imshow(np.array(raw_image))
+    show_mask(mask, ax)
+    ax.set_title(score_text)
+    ax.axis("off")
+    return fig_to_base64(fig)
+def show_all_annotations_on_image_base64(raw_image, masks=None, scores=None, boxes=None, input_points=None, input_labels=None, model_name=None):
+    fig, ax = plt.subplots(figsize=(10, 10))
+    ax.imshow(np.array(raw_image))
+    if masks is not None:
+        if masks.ndim == 4:
+            mask = masks[0, 0]
+        elif masks.ndim == 3:
+            mask = masks[0]
+        else:
+            mask = masks
+        if isinstance(mask, torch.Tensor):
+            mask = mask.cpu().detach().numpy()
+        show_mask(mask, ax)
+        if scores is not None:
+            if isinstance(scores, torch.Tensor):
+                scores = scores.flatten()
+                score = scores[0].item()
+            else:
+                score = float(np.array(scores).flatten()[0])
+            ax.set_title(f"{model_name} - Score: {score:.3f}")
+    if input_points is not None:
+        input_points = np.array(input_points)
+        labels = np.ones_like(input_points[:, 0]) if input_labels is None else np.array(input_labels)
+        show_points(input_points, labels, ax)
+    if boxes is not None:
+        for box in boxes:
+            show_box(box, ax)
+    ax.axis("off")
+    return fig_to_base64(fig)