Spaces:
Running
on
Zero
Running
on
Zero
File size: 7,362 Bytes
85fbac8 a88efc1 6c176fb 0149a5e 6c176fb 0149a5e 6c176fb 0149a5e 6c176fb a88efc1 6c176fb 85fbac8 f7c1ee7 6c176fb f7c1ee7 a88efc1 f7c1ee7 6c176fb f7c1ee7 a88efc1 f7c1ee7 a88efc1 f7c1ee7 a88efc1 f7c1ee7 a88efc1 f7c1ee7 a88efc1 f7c1ee7 a88efc1 f7c1ee7 a88efc1 f7c1ee7 b79bee3 f7c1ee7 a88efc1 f7c1ee7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
import spaces
import gradio as gr
import os
os.environ["TORCH_CUDNN_SDPA_ENABLED"] = "1"
import torch
import numpy as np
import cv2
import matplotlib.pyplot as plt
from PIL import Image, ImageFilter
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor
def preprocess_image(image):
return image, gr.State([]), gr.State([]), image
def get_point(point_type, tracking_points, trackings_input_label, first_frame_path, evt: gr.SelectData):
print(f"You selected {evt.value} at {evt.index} from {evt.target}")
tracking_points.value.append(evt.index)
print(f"TRACKING POINTS: {tracking_points.value}")
if point_type == "include":
trackings_input_label.value.append(1)
elif point_type == "exclude":
trackings_input_label.value.append(0)
print(f"TRACKING INPUT LABELS: {trackings_input_label.value}")
# Open the image and get its dimensions
transparent_background = Image.open(first_frame_path).convert('RGBA')
w, h = transparent_background.size
# Define the circle radius as a fraction of the smaller dimension
fraction = 0.02 # You can adjust this value as needed
radius = int(fraction * min(w, h))
# Create a transparent layer to draw on
transparent_layer = np.zeros((h, w, 4), dtype=np.uint8)
for index, track in enumerate(tracking_points.value):
if trackings_input_label.value[index] == 1:
cv2.circle(transparent_layer, track, radius, (0, 255, 0, 255), -1)
else:
cv2.circle(transparent_layer, track, radius, (255, 0, 0, 255), -1)
# Convert the transparent layer back to an image
transparent_layer = Image.fromarray(transparent_layer, 'RGBA')
selected_point_map = Image.alpha_composite(transparent_background, transparent_layer)
return tracking_points, trackings_input_label, selected_point_map
def show_mask(mask, ax, random_color=False, borders=True):
if random_color:
color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
else:
color = np.array([30/255, 144/255, 255/255, 0.6])
h, w = mask.shape[-2:]
mask = mask.astype(np.uint8)
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
if borders:
import cv2
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# Try to smooth contours
contours = [cv2.approxPolyDP(contour, epsilon=0.01, closed=True) for contour in contours]
mask_image = cv2.drawContours(mask_image, contours, -1, (1, 1, 1, 0.5), thickness=2)
ax.imshow(mask_image)
def show_points(coords, labels, ax, marker_size=375):
pos_points = coords[labels == 1]
neg_points = coords[labels == 0]
ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
def show_box(box, ax):
x0, y0 = box[0], box[1]
w, h = box[2] - box[0], box[3] - box[1]
ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2))
def show_masks(image, masks, scores, point_coords=None, box_coords=None, input_labels=None, borders=True):
combined_images = [] # List to store filenames of images with masks overlaid
mask_images = [] # List to store filenames of separate mask images
for i, (mask, score) in enumerate(zip(masks, scores)):
# ---- Original Image with Mask Overlaid ----
plt.figure(figsize=(10, 10))
plt.imshow(image)
show_mask(mask, plt.gca(), borders=borders) # Draw the mask with borders
if box_coords is not None:
show_box(box_coords, plt.gca())
if len(scores) > 1:
plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18)
plt.axis('off')
# Save the figure as a JPG file
combined_filename = f"combined_image_{i+1}.jpg"
plt.savefig(combined_filename, format='jpg', bbox_inches='tight')
combined_images.append(combined_filename)
plt.close() # Close the figure to free up memory
# ---- Separate Mask Image (White Mask on Black Background) ----
# Create a black image
mask_image = np.zeros_like(image, dtype=np.uint8)
# The mask is a binary array where the masked area is 1, else 0.
# Convert the mask to a white color in the mask_image
mask_layer = (mask > 0).astype(np.uint8) * 255
for c in range(3): # Assuming RGB, repeat mask for all channels
mask_image[:, :, c] = mask_layer
# Save the mask image
mask_filename = f"mask_image_{i+1}.png"
Image.fromarray(mask_image).save(mask_filename)
mask_images.append(mask_filename)
plt.close() # Close the figure to free up memory
return combined_images, mask_images
@spaces.GPU()
def sam_process(original_image, points, labels):
print(f"Points: {points}")
print(f"Labels: {labels}")
# Convert image to numpy array for SAM2 processing
image = np.array(original_image)
predictor = SAM2ImagePredictor.from_pretrained("facebook/sam2.1-hiera-large")
predictor.set_image(image)
input_point = np.array(points)
input_label = np.array(labels)
if not input_point.size or not input_label.size:
print("No points or labels provided, returning None")
return None
masks, scores, _= predictor.predict(input_point, input_label, multimask_output=False)
sorted_indices = np.argsort(scores)[::-1]
masks = masks[sorted_indices]
# Generate mask image
mask = masks[0] * 255
mask_image = Image.fromarray(mask.astype(np.uint8))
return mask_image
def create_sam2_tab():
first_frame = gr.State() # Tracks original image
tracking_points = gr.State([])
trackings_input_label = gr.State([])
with gr.Column():
gr.Markdown("# SAM2 Image Predictor")
gr.Markdown("1. Upload your image\n2. Click points to mask\n3. Submit")
points_map = gr.Image(label="Points Map", type="pil", interactive=True)
input_image = gr.Image(type="pil", visible=False) # Original image
with gr.Row():
point_type = gr.Radio(["include", "exclude"], value="include", label="Point Type")
clear_button = gr.Button("Clear Points")
submit_button = gr.Button("Submit")
output_image = gr.Image("Segmented Output")
# Event handlers
points_map.upload(
lambda img: (img, img, [], []),
inputs=points_map,
outputs=[input_image, first_frame, tracking_points, trackings_input_label]
)
clear_button.click(
lambda img: ([], [], img),
inputs=first_frame,
outputs=[tracking_points, trackings_input_label, points_map]
)
points_map.select(
get_point,
inputs=[point_type, tracking_points, trackings_input_label, first_frame],
outputs=[tracking_points, trackings_input_label, points_map]
)
submit_button.click(
sam_process,
inputs=[input_image, tracking_points, trackings_input_label],
outputs=output_image
)
return input_image, points_map, output_image
|