import gradio as gr import numpy as np from PIL import Image import torch from transformers import AutoProcessor, CLIPSegForImageSegmentation # Load the CLIPSeg model and processor processor = AutoProcessor.from_pretrained("CIDAS/clipseg-rd64-refined") model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined") def segment_everything(image): inputs = processor(text=["object"], images=[image], padding="max_length", return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) preds = outputs.logits.squeeze().sigmoid() segmentation = (preds.numpy() * 255).astype(np.uint8) return Image.fromarray(segmentation) def segment_box(image, box): if box is None: return image x1, y1, x2, y2 = box cropped_image = image[y1:y2, x1:x2] inputs = processor(text=["object"], images=[cropped_image], padding="max_length", return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) preds = outputs.logits.squeeze().sigmoid() segmentation = np.zeros((image.shape[0], image.shape[1]), dtype=np.uint8) segmentation[y1:y2, x1:x2] = (preds.numpy() * 255).astype(np.uint8) return Image.fromarray(segmentation) def update_image(image, segmentation): if segmentation is None: return image image_pil = Image.fromarray((image * 255).astype(np.uint8)) seg_pil = Image.fromarray(segmentation).convert('RGBA') blended = Image.blend(image_pil.convert('RGBA'), seg_pil, 0.5) return np.array(blended) with gr.Blocks() as demo: gr.Markdown("# Segment Anything-like Demo") with gr.Row(): with gr.Column(scale=1): input_image = gr.Image(label="Input Image", tool="select") with gr.Row(): everything_btn = gr.Button("Everything") box_btn = gr.Button("Box") with gr.Column(scale=1): output_image = gr.Image(label="Segmentation Result") everything_btn.click( fn=segment_everything, inputs=[input_image], outputs=[output_image] ) box_btn.click( fn=segment_box, inputs=[input_image, input_image], outputs=[output_image] ) output_image.change( fn=update_image, inputs=[input_image, output_image], outputs=[output_image] ) demo.launch()