dis-background-removal

Sleeping

File size: 5,666 Bytes

2e2f07b
 
 
 
 
 
 
 
 
 
 
efae294
66a61d0
2e2f07b
07d7c0a
2e2f07b
 
 
 
 
 
 
 
 
 
 
 
afd2efd
66a61d0
2e2f07b
 
 
 
 
 
 
 
 
07d7c0a
2e2f07b
 
07d7c0a
2e2f07b
 
 
 
66a61d0
2e2f07b
66a61d0
2e2f07b
efae294
 
2e2f07b
efae294
2e2f07b
 
 
 
 
 
 
07d7c0a
 
2e2f07b
07d7c0a
2e2f07b
 
66a61d0
2e2f07b
 
07d7c0a
2e2f07b
 
 
 
07d7c0a
efae294
07d7c0a
66a61d0
 
efae294
2e2f07b
 
 
07d7c0a
 
2e2f07b
07d7c0a
efae294
66a61d0
2e2f07b
66a61d0
07d7c0a
 
 
 
 
 
 
 
 
 
 
 
 
2e2f07b
 
07d7c0a
 
 
 
efae294
07d7c0a
66a61d0
07d7c0a
 
 
 
 
66a61d0
 
 
07d7c0a
 
66a61d0
 
 
07d7c0a
66a61d0
 
 
07d7c0a
efae294
 
66a61d0
07d7c0a
66a61d0
 
 
 
07d7c0a
2e2f07b
07d7c0a
66a61d0
2e2f07b
 
efae294
07d7c0a
 
efae294
 
 
 
 
 
 
 
2e2f07b
 
 
07d7c0a
 
 
 
 
 
efae294
66a61d0
efae294
 
 
07d7c0a
 
 
 
2e2f07b
 
 
10bcd3a
07d7c0a
efae294

import cv2
import gradio as gr
import os
from PIL import Image
import numpy as np
import torch
from torch.autograd import Variable
from torchvision import transforms
import torch.nn.functional as F
import matplotlib.pyplot as plt
import warnings
import time
warnings.filterwarnings("ignore")

# Clone the DIS repo and move contents (make sure this only happens once per session)
os.system("git clone https://github.com/xuebinqin/DIS")
os.system("mv DIS/IS-Net/* .")

# project imports
from data_loader_cache import normalize, im_reader, im_preprocess 
from models import *

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Download official weights
if not os.path.exists("saved_models"):
    os.mkdir("saved_models")
    os.system("mv isnet.pth saved_models/")

class GOSNormalize(object):
    '''
    Normalize the Image using torch.transforms
    '''
    def __init__(self, mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]):
        self.mean = mean
        self.std = std

    def __call__(self,image):
        image = normalize(image, self.mean, self.std)
        return image

transform = transforms.Compose([GOSNormalize([0.5, 0.5, 0.5], [1.0, 1.0, 1.0])])

def load_image(im_path, hypar):
    im = im_reader(im_path)
    im, im_shp = im_preprocess(im, hypar["cache_size"])
    im = torch.divide(im, 255.0)
    shape = torch.from_numpy(np.array(im_shp))
    return transform(im).unsqueeze(0), shape.unsqueeze(0)

def build_model(hypar, device):
    net = hypar["model"]

    # convert to half precision if needed
    if(hypar["model_digit"]=="half"):
        net.half()
        for layer in net.modules():
            if isinstance(layer, nn.BatchNorm2d):
                layer.float()

    net.to(device)
    if hypar["restore_model"] != "":
        net.load_state_dict(torch.load(os.path.join(hypar["model_path"], hypar["restore_model"]), map_location=device))
        net.to(device)
    net.eval()
    return net

def predict(net, inputs_val, shapes_val, hypar, device):
    net.eval()

    if hypar["model_digit"] == "full":
        inputs_val = inputs_val.type(torch.FloatTensor)
    else:
        inputs_val = inputs_val.type(torch.HalfTensor)

    inputs_val_v = Variable(inputs_val, requires_grad=False).to(device)
    ds_val = net(inputs_val_v)[0]
    pred_val = ds_val[0][0, :, :, :]
    pred_val = torch.squeeze(F.upsample(torch.unsqueeze(pred_val, 0),
                                        (shapes_val[0][0], shapes_val[0][1]),
                                        mode='bilinear'))

    ma = torch.max(pred_val)
    mi = torch.min(pred_val)
    # normalize to [0, 1], add a small epsilon to avoid division by zero
    pred_val = (pred_val - mi) / (ma - mi + 1e-8)

    if device == 'cuda':
        torch.cuda.empty_cache()
    return (pred_val.detach().cpu().numpy() * 255).astype(np.uint8)

# Parameters
hypar = {
    "model_path": "./saved_models",
    "restore_model": "isnet.pth",
    "interm_sup": False,
    "model_digit": "full",
    "seed": 0,
    "cache_size": [1024, 1024],
    "input_size": [1024, 1024],
    "crop_size": [1024, 1024],
    "model": ISNetDIS()
}

# Build the model
net = build_model(hypar, device)

def inference(img1, img2, img3, logs):
    """
    Process up to 3 images in parallel (each can be None if not provided).
    """
    start_time = time.time()
    logs = logs or ""  # initialize logs if None

    # Gather images into a list (filter out None)
    image_paths = [i for i in [img1, img2, img3] if i is not None]
    if not image_paths:
        # No images were uploaded
        logs += f"No images to process.\n"
        return [], logs, logs

    processed_pairs = []
    for path in image_paths:
        image_tensor, orig_size = load_image(path, hypar)
        mask = predict(net, image_tensor, orig_size, hypar, device)

        pil_mask = Image.fromarray(mask).convert('L')
        im_rgb = Image.open(path).convert("RGB")
        im_rgba = im_rgb.copy()
        im_rgba.putalpha(pil_mask)
        processed_pairs.append([im_rgba, pil_mask])

    end_time = time.time()
    elapsed = round(end_time - start_time, 2)

    # Flatten into final gallery list
    final_images = []
    for pair in processed_pairs:
        final_images.extend(pair)

    logs += f"Processed {len(processed_pairs)} image(s) in {elapsed} second(s).\n"

    # Return the flattened gallery, state, and logs text
    return final_images, logs, logs

title = "Highly Accurate Dichotomous Image Segmentation"
description = (
    "This is an unofficial demo for DIS, a model that can remove the background from up to 3 images. "
    "Simply upload 1 to 3 images, or use the example images. "
    "GitHub: https://github.com/xuebinqin/DIS<br>"
    "Telegram bot: https://t.me/restoration_photo_bot<br>"
    "[![](https://img.shields.io/twitter/follow/DoEvent?label=@DoEvent&style=social)](https://twitter.com/DoEvent)"
)
article = (
    "<div><center><img src='https://visitor-badge.glitch.me/badge?page_id=max_skobeev_dis_cmp_public' "
    "alt='visitor badge'></center></div>"
)

interface = gr.Interface(
    fn=inference,
    inputs=[
        gr.Image(type='filepath', label='Image 1'),
        gr.Image(type='filepath', label='Image 2'),
        gr.Image(type='filepath', label='Image 3'),
        gr.State()
    ],
    outputs=[
        gr.Gallery(label="Output (rgba + mask)"),
        gr.State(),
        gr.Textbox(label="Logs", lines=6)
    ],
    examples=[
        ["robot.png", None, None],
        ["robot.png", "ship.png", None],
    ],
    title=title,
    description=description,
    article=article,
    flagging_mode="never",
    cache_mode="lazy"
).queue().launch(show_api=True, show_error=True)