File size: 5,666 Bytes
2e2f07b
 
 
 
 
 
 
 
 
 
 
efae294
66a61d0
2e2f07b
07d7c0a
2e2f07b
 
 
 
 
 
 
 
 
 
 
 
afd2efd
66a61d0
2e2f07b
 
 
 
 
 
 
 
 
07d7c0a
2e2f07b
 
07d7c0a
2e2f07b
 
 
 
66a61d0
2e2f07b
66a61d0
2e2f07b
efae294
 
2e2f07b
efae294
2e2f07b
 
 
 
 
 
 
07d7c0a
 
2e2f07b
07d7c0a
2e2f07b
 
66a61d0
2e2f07b
 
07d7c0a
2e2f07b
 
 
 
07d7c0a
efae294
07d7c0a
66a61d0
 
efae294
2e2f07b
 
 
07d7c0a
 
2e2f07b
07d7c0a
efae294
66a61d0
2e2f07b
66a61d0
07d7c0a
 
 
 
 
 
 
 
 
 
 
 
 
2e2f07b
 
07d7c0a
 
 
 
efae294
07d7c0a
66a61d0
07d7c0a
 
 
 
 
66a61d0
 
 
07d7c0a
 
66a61d0
 
 
07d7c0a
66a61d0
 
 
07d7c0a
efae294
 
66a61d0
07d7c0a
66a61d0
 
 
 
07d7c0a
2e2f07b
07d7c0a
66a61d0
2e2f07b
 
efae294
07d7c0a
 
efae294
 
 
 
 
 
 
 
2e2f07b
 
 
07d7c0a
 
 
 
 
 
efae294
66a61d0
efae294
 
 
07d7c0a
 
 
 
2e2f07b
 
 
10bcd3a
07d7c0a
efae294
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import cv2
import gradio as gr
import os
from PIL import Image
import numpy as np
import torch
from torch.autograd import Variable
from torchvision import transforms
import torch.nn.functional as F
import matplotlib.pyplot as plt
import warnings
import time
warnings.filterwarnings("ignore")

# Clone the DIS repo and move contents (make sure this only happens once per session)
os.system("git clone https://github.com/xuebinqin/DIS")
os.system("mv DIS/IS-Net/* .")

# project imports
from data_loader_cache import normalize, im_reader, im_preprocess 
from models import *

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Download official weights
if not os.path.exists("saved_models"):
    os.mkdir("saved_models")
    os.system("mv isnet.pth saved_models/")

class GOSNormalize(object):
    '''
    Normalize the Image using torch.transforms
    '''
    def __init__(self, mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]):
        self.mean = mean
        self.std = std

    def __call__(self,image):
        image = normalize(image, self.mean, self.std)
        return image

transform = transforms.Compose([GOSNormalize([0.5, 0.5, 0.5], [1.0, 1.0, 1.0])])

def load_image(im_path, hypar):
    im = im_reader(im_path)
    im, im_shp = im_preprocess(im, hypar["cache_size"])
    im = torch.divide(im, 255.0)
    shape = torch.from_numpy(np.array(im_shp))
    return transform(im).unsqueeze(0), shape.unsqueeze(0)

def build_model(hypar, device):
    net = hypar["model"]

    # convert to half precision if needed
    if(hypar["model_digit"]=="half"):
        net.half()
        for layer in net.modules():
            if isinstance(layer, nn.BatchNorm2d):
                layer.float()

    net.to(device)
    if hypar["restore_model"] != "":
        net.load_state_dict(torch.load(os.path.join(hypar["model_path"], hypar["restore_model"]), map_location=device))
        net.to(device)
    net.eval()
    return net

def predict(net, inputs_val, shapes_val, hypar, device):
    net.eval()

    if hypar["model_digit"] == "full":
        inputs_val = inputs_val.type(torch.FloatTensor)
    else:
        inputs_val = inputs_val.type(torch.HalfTensor)

    inputs_val_v = Variable(inputs_val, requires_grad=False).to(device)
    ds_val = net(inputs_val_v)[0]
    pred_val = ds_val[0][0, :, :, :]
    pred_val = torch.squeeze(F.upsample(torch.unsqueeze(pred_val, 0),
                                        (shapes_val[0][0], shapes_val[0][1]),
                                        mode='bilinear'))

    ma = torch.max(pred_val)
    mi = torch.min(pred_val)
    # normalize to [0, 1], add a small epsilon to avoid division by zero
    pred_val = (pred_val - mi) / (ma - mi + 1e-8)

    if device == 'cuda':
        torch.cuda.empty_cache()
    return (pred_val.detach().cpu().numpy() * 255).astype(np.uint8)

# Parameters
hypar = {
    "model_path": "./saved_models",
    "restore_model": "isnet.pth",
    "interm_sup": False,
    "model_digit": "full",
    "seed": 0,
    "cache_size": [1024, 1024],
    "input_size": [1024, 1024],
    "crop_size": [1024, 1024],
    "model": ISNetDIS()
}

# Build the model
net = build_model(hypar, device)

def inference(img1, img2, img3, logs):
    """
    Process up to 3 images in parallel (each can be None if not provided).
    """
    start_time = time.time()
    logs = logs or ""  # initialize logs if None

    # Gather images into a list (filter out None)
    image_paths = [i for i in [img1, img2, img3] if i is not None]
    if not image_paths:
        # No images were uploaded
        logs += f"No images to process.\n"
        return [], logs, logs

    processed_pairs = []
    for path in image_paths:
        image_tensor, orig_size = load_image(path, hypar)
        mask = predict(net, image_tensor, orig_size, hypar, device)

        pil_mask = Image.fromarray(mask).convert('L')
        im_rgb = Image.open(path).convert("RGB")
        im_rgba = im_rgb.copy()
        im_rgba.putalpha(pil_mask)
        processed_pairs.append([im_rgba, pil_mask])

    end_time = time.time()
    elapsed = round(end_time - start_time, 2)

    # Flatten into final gallery list
    final_images = []
    for pair in processed_pairs:
        final_images.extend(pair)

    logs += f"Processed {len(processed_pairs)} image(s) in {elapsed} second(s).\n"

    # Return the flattened gallery, state, and logs text
    return final_images, logs, logs

title = "Highly Accurate Dichotomous Image Segmentation"
description = (
    "This is an unofficial demo for DIS, a model that can remove the background from up to 3 images. "
    "Simply upload 1 to 3 images, or use the example images. "
    "GitHub: https://github.com/xuebinqin/DIS<br>"
    "Telegram bot: https://t.me/restoration_photo_bot<br>"
    "[![](https://img.shields.io/twitter/follow/DoEvent?label=@DoEvent&style=social)](https://twitter.com/DoEvent)"
)
article = (
    "<div><center><img src='https://visitor-badge.glitch.me/badge?page_id=max_skobeev_dis_cmp_public' "
    "alt='visitor badge'></center></div>"
)

interface = gr.Interface(
    fn=inference,
    inputs=[
        gr.Image(type='filepath', label='Image 1'),
        gr.Image(type='filepath', label='Image 2'),
        gr.Image(type='filepath', label='Image 3'),
        gr.State()
    ],
    outputs=[
        gr.Gallery(label="Output (rgba + mask)"),
        gr.State(),
        gr.Textbox(label="Logs", lines=6)
    ],
    examples=[
        ["robot.png", None, None],
        ["robot.png", "ship.png", None],
    ],
    title=title,
    description=description,
    article=article,
    flagging_mode="never",
    cache_mode="lazy"
).queue().launch(show_api=True, show_error=True)