Duplicate from afmck/stable-diffusion-inpainting-segmentation
Browse filesCo-authored-by: Alex McKinney <[email protected]>
- .gitattributes +34 -0
- README.md +18 -0
- app.css +114 -0
- app.py +239 -0
- app_header.html +58 -0
- app_license.html +27 -0
- example.png +0 -0
- requirements.txt +12 -0
.gitattributes
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Stable Diffusion Inpainting Segmentation
|
3 |
+
emoji: 😷
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: black
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.9
|
8 |
+
app_file: app.py
|
9 |
+
pinned: true
|
10 |
+
license: creativeml-openrail-m
|
11 |
+
duplicated_from: afmck/stable-diffusion-inpainting-segmentation
|
12 |
+
---
|
13 |
+
|
14 |
+
### ToDos:
|
15 |
+
- [ ] setting a random seed
|
16 |
+
- [ ] click support for segmentation
|
17 |
+
- [ ] draw on mask
|
18 |
+
- [ ] batching support
|
app.css
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.gradio-container {
|
2 |
+
font-family: 'IBM Plex Sans', sans-serif;
|
3 |
+
}
|
4 |
+
.gr-button {
|
5 |
+
color: white;
|
6 |
+
border-color: black;
|
7 |
+
background: black;
|
8 |
+
}
|
9 |
+
input[type='range'] {
|
10 |
+
accent-color: black;
|
11 |
+
}
|
12 |
+
.dark input[type='range'] {
|
13 |
+
accent-color: #dfdfdf;
|
14 |
+
}
|
15 |
+
.container {
|
16 |
+
max-width: 730px;
|
17 |
+
margin: auto;
|
18 |
+
padding-top: 1.5rem;
|
19 |
+
}
|
20 |
+
#gallery {
|
21 |
+
min-height: 22rem;
|
22 |
+
margin-bottom: 15px;
|
23 |
+
margin-left: auto;
|
24 |
+
margin-right: auto;
|
25 |
+
border-bottom-right-radius: .5rem !important;
|
26 |
+
border-bottom-left-radius: .5rem !important;
|
27 |
+
}
|
28 |
+
#gallery>div>.h-full {
|
29 |
+
min-height: 20rem;
|
30 |
+
}
|
31 |
+
.details:hover {
|
32 |
+
text-decoration: underline;
|
33 |
+
}
|
34 |
+
.gr-button {
|
35 |
+
white-space: nowrap;
|
36 |
+
}
|
37 |
+
.gr-button:focus {
|
38 |
+
border-color: rgb(147 197 253 / var(--tw-border-opacity));
|
39 |
+
outline: none;
|
40 |
+
box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
|
41 |
+
--tw-border-opacity: 1;
|
42 |
+
--tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);
|
43 |
+
--tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color);
|
44 |
+
--tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity));
|
45 |
+
--tw-ring-opacity: .5;
|
46 |
+
}
|
47 |
+
#advanced-btn {
|
48 |
+
font-size: .7rem !important;
|
49 |
+
line-height: 19px;
|
50 |
+
margin-top: 12px;
|
51 |
+
margin-bottom: 12px;
|
52 |
+
padding: 2px 8px;
|
53 |
+
border-radius: 14px !important;
|
54 |
+
}
|
55 |
+
#advanced-options {
|
56 |
+
display: none;
|
57 |
+
margin-bottom: 20px;
|
58 |
+
}
|
59 |
+
.footer {
|
60 |
+
margin-bottom: 45px;
|
61 |
+
margin-top: 35px;
|
62 |
+
text-align: center;
|
63 |
+
border-bottom: 1px solid #e5e5e5;
|
64 |
+
}
|
65 |
+
.footer>p {
|
66 |
+
font-size: .8rem;
|
67 |
+
display: inline-block;
|
68 |
+
padding: 0 10px;
|
69 |
+
transform: translateY(10px);
|
70 |
+
background: white;
|
71 |
+
}
|
72 |
+
.dark .footer {
|
73 |
+
border-color: #303030;
|
74 |
+
}
|
75 |
+
.dark .footer>p {
|
76 |
+
background: #0b0f19;
|
77 |
+
}
|
78 |
+
.acknowledgments h4{
|
79 |
+
margin: 1.25em 0 .25em 0;
|
80 |
+
font-weight: bold;
|
81 |
+
font-size: 115%;
|
82 |
+
}
|
83 |
+
#container-advanced-btns{
|
84 |
+
display: flex;
|
85 |
+
flex-wrap: wrap;
|
86 |
+
justify-content: space-between;
|
87 |
+
align-items: center;
|
88 |
+
}
|
89 |
+
.animate-spin {
|
90 |
+
animation: spin 1s linear infinite;
|
91 |
+
}
|
92 |
+
@keyframes spin {
|
93 |
+
from {
|
94 |
+
transform: rotate(0deg);
|
95 |
+
}
|
96 |
+
to {
|
97 |
+
transform: rotate(360deg);
|
98 |
+
}
|
99 |
+
}
|
100 |
+
#share-btn-container {
|
101 |
+
display: flex; padding-left: 0.5rem !important; padding-right: 0.5rem !important; background-color: #000000; justify-content: center; align-items: center; border-radius: 9999px !important; width: 13rem;
|
102 |
+
}
|
103 |
+
#share-btn {
|
104 |
+
all: initial; color: #ffffff;font-weight: 600; cursor:pointer; font-family: 'IBM Plex Sans', sans-serif; margin-left: 0.5rem !important; padding-top: 0.25rem !important; padding-bottom: 0.25rem !important;
|
105 |
+
}
|
106 |
+
#share-btn * {
|
107 |
+
all: unset;
|
108 |
+
}
|
109 |
+
.gr-form{
|
110 |
+
flex: 1 1 50%; border-top-right-radius: 0; border-bottom-right-radius: 0;
|
111 |
+
}
|
112 |
+
#prompt-container{
|
113 |
+
gap: 0;
|
114 |
+
}
|
app.py
ADDED
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import io
|
2 |
+
import requests
|
3 |
+
import numpy as np
|
4 |
+
import torch
|
5 |
+
import os
|
6 |
+
from PIL import Image
|
7 |
+
from typing import List, Optional
|
8 |
+
from functools import reduce
|
9 |
+
from argparse import ArgumentParser
|
10 |
+
|
11 |
+
import gradio as gr
|
12 |
+
|
13 |
+
from transformers import DetrFeatureExtractor, DetrForSegmentation, DetrConfig
|
14 |
+
from transformers.models.detr.feature_extraction_detr import rgb_to_id
|
15 |
+
|
16 |
+
from diffusers import StableDiffusionInpaintPipeline, DPMSolverMultistepScheduler
|
17 |
+
|
18 |
+
parser = ArgumentParser()
|
19 |
+
parser.add_argument('--disable-cuda', action='store_true')
|
20 |
+
parser.add_argument('--attention-slicing', action='store_true')
|
21 |
+
args = parser.parse_args()
|
22 |
+
|
23 |
+
auth_token = os.environ.get("READ_TOKEN")
|
24 |
+
try_cuda = not args.disable_cuda
|
25 |
+
|
26 |
+
torch.inference_mode()
|
27 |
+
torch.no_grad()
|
28 |
+
|
29 |
+
# Device helper
|
30 |
+
def get_device(try_cuda=True):
|
31 |
+
return torch.device('cuda' if try_cuda and torch.cuda.is_available() else 'cpu')
|
32 |
+
|
33 |
+
device = get_device(try_cuda=try_cuda)
|
34 |
+
|
35 |
+
# Load segmentation models
|
36 |
+
def load_segmentation_models(model_name: str = 'facebook/detr-resnet-50-panoptic'):
|
37 |
+
feature_extractor = DetrFeatureExtractor.from_pretrained(model_name)
|
38 |
+
model = DetrForSegmentation.from_pretrained(model_name)
|
39 |
+
cfg = DetrConfig.from_pretrained(model_name)
|
40 |
+
|
41 |
+
return feature_extractor, model, cfg
|
42 |
+
|
43 |
+
# Load diffusion pipeline
|
44 |
+
def load_diffusion_pipeline(model_name: str = 'stabilityai/stable-diffusion-2-inpainting'):
|
45 |
+
return StableDiffusionInpaintPipeline.from_pretrained(
|
46 |
+
model_name,
|
47 |
+
revision='fp16',
|
48 |
+
torch_dtype=torch.float16 if try_cuda and torch.cuda.is_available() else torch.float32,
|
49 |
+
use_auth_token=auth_token
|
50 |
+
)
|
51 |
+
|
52 |
+
def min_pool(x: torch.Tensor, kernel_size: int):
|
53 |
+
pad_size = (kernel_size - 1) // 2
|
54 |
+
return -torch.nn.functional.max_pool2d(-x, kernel_size, (1, 1), padding=pad_size)
|
55 |
+
|
56 |
+
def max_pool(x: torch.Tensor, kernel_size: int):
|
57 |
+
pad_size = (kernel_size - 1) // 2
|
58 |
+
return torch.nn.functional.max_pool2d(x, kernel_size, (1, 1), padding=pad_size)
|
59 |
+
|
60 |
+
# Apply min-max pooling to clean up mask
|
61 |
+
def clean_mask(mask, max_kernel: int = 23, min_kernel: int = 5):
|
62 |
+
mask = torch.Tensor(mask[None, None]).float().to(device)
|
63 |
+
mask = min_pool(mask, min_kernel)
|
64 |
+
mask = max_pool(mask, max_kernel)
|
65 |
+
mask = mask.bool().squeeze().cpu().numpy()
|
66 |
+
return mask
|
67 |
+
|
68 |
+
|
69 |
+
feature_extractor, segmentation_model, segmentation_cfg = load_segmentation_models()
|
70 |
+
pipe = load_diffusion_pipeline()
|
71 |
+
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
|
72 |
+
|
73 |
+
segmentation_model = segmentation_model.to(device)
|
74 |
+
pipe = pipe.to(device)
|
75 |
+
if args.attention_slicing:
|
76 |
+
pipe.enable_attention_slicing()
|
77 |
+
|
78 |
+
# Callback function that runs segmentation and updates CheckboxGroup
|
79 |
+
def fn_segmentation(image, max_kernel, min_kernel):
|
80 |
+
inputs = feature_extractor(images=image, return_tensors="pt").to(device)
|
81 |
+
outputs = segmentation_model(**inputs)
|
82 |
+
|
83 |
+
processed_sizes = torch.as_tensor(inputs["pixel_values"].shape[-2:]).unsqueeze(0)
|
84 |
+
result = feature_extractor.post_process_panoptic(outputs, processed_sizes)[0]
|
85 |
+
|
86 |
+
panoptic_seg = Image.open(io.BytesIO(result["png_string"])).resize((image.width, image.height))
|
87 |
+
panoptic_seg = np.array(panoptic_seg, dtype=np.uint8)
|
88 |
+
|
89 |
+
panoptic_seg_id = rgb_to_id(panoptic_seg)
|
90 |
+
|
91 |
+
raw_masks = []
|
92 |
+
for s in result['segments_info']:
|
93 |
+
m = panoptic_seg_id == s['id']
|
94 |
+
raw_masks.append(m.astype(np.uint8) * 255)
|
95 |
+
|
96 |
+
checkbox_choices = [f"{s['id']}:{segmentation_cfg.id2label[s['category_id']]}" for s in result['segments_info']]
|
97 |
+
|
98 |
+
checkbox_group = gr.CheckboxGroup.update(
|
99 |
+
choices=checkbox_choices
|
100 |
+
)
|
101 |
+
|
102 |
+
return raw_masks, checkbox_group, gr.Image.update(value=np.zeros((image.height, image.width))), gr.Image.update(value=image)
|
103 |
+
|
104 |
+
# Callback function that updates the displayed mask based on selected checkboxes
|
105 |
+
def fn_update_mask(
|
106 |
+
image: Image,
|
107 |
+
masks: List[np.array],
|
108 |
+
masks_enabled: List[int],
|
109 |
+
max_kernel: int,
|
110 |
+
min_kernel: int,
|
111 |
+
invert_mask: bool
|
112 |
+
):
|
113 |
+
masks_enabled = [int(m.split(':')[0]) for m in masks_enabled]
|
114 |
+
combined_mask = reduce(lambda x, y: x | y, [masks[i] for i in masks_enabled], np.zeros_like(masks[0], dtype=bool))
|
115 |
+
|
116 |
+
if invert_mask:
|
117 |
+
combined_mask = ~combined_mask
|
118 |
+
|
119 |
+
combined_mask = clean_mask(combined_mask, max_kernel, min_kernel)
|
120 |
+
|
121 |
+
masked_image = np.array(image).copy()
|
122 |
+
masked_image[combined_mask] = 0.0
|
123 |
+
|
124 |
+
return combined_mask.astype(np.uint8) * 255, Image.fromarray(masked_image)
|
125 |
+
|
126 |
+
# Callback function that runs diffusion given the current image, mask and prompt.
|
127 |
+
def fn_diffusion(
|
128 |
+
prompt: str,
|
129 |
+
masked_image: Image,
|
130 |
+
mask: Image,
|
131 |
+
num_diffusion_steps: int,
|
132 |
+
guidance_scale: float,
|
133 |
+
negative_prompt: Optional[str] = None,
|
134 |
+
):
|
135 |
+
if len(negative_prompt) == 0:
|
136 |
+
negative_prompt = None
|
137 |
+
|
138 |
+
# Resize image to a more stable diffusion friendly format.
|
139 |
+
# TODO: remove magic number
|
140 |
+
STABLE_DIFFUSION_SMALL_EDGE = 512
|
141 |
+
|
142 |
+
w, h = masked_image.size
|
143 |
+
is_width_larger = w > h
|
144 |
+
resize_ratio = STABLE_DIFFUSION_SMALL_EDGE / (h if is_width_larger else w)
|
145 |
+
|
146 |
+
new_width = int(w * resize_ratio) if is_width_larger else STABLE_DIFFUSION_SMALL_EDGE
|
147 |
+
new_height = STABLE_DIFFUSION_SMALL_EDGE if is_width_larger else int(h * resize_ratio)
|
148 |
+
|
149 |
+
new_width += 8 - (new_width % 8) if is_width_larger else 0
|
150 |
+
new_height += 0 if is_width_larger else 8 - (new_height % 8)
|
151 |
+
|
152 |
+
mask = Image.fromarray(mask).convert("RGB").resize((new_width, new_height))
|
153 |
+
masked_image = masked_image.convert("RGB").resize((new_width, new_height))
|
154 |
+
|
155 |
+
# Run diffusion
|
156 |
+
inpainted_image = pipe(
|
157 |
+
height=new_height,
|
158 |
+
width=new_width,
|
159 |
+
prompt=prompt,
|
160 |
+
image=masked_image,
|
161 |
+
mask_image=mask,
|
162 |
+
num_inference_steps=num_diffusion_steps,
|
163 |
+
guidance_scale=guidance_scale,
|
164 |
+
negative_prompt=negative_prompt
|
165 |
+
).images[0]
|
166 |
+
|
167 |
+
# Resize back to the original size
|
168 |
+
inpainted_image = inpainted_image.resize((w, h))
|
169 |
+
|
170 |
+
return inpainted_image
|
171 |
+
|
172 |
+
demo = gr.Blocks(css=open('app.css').read())
|
173 |
+
|
174 |
+
with demo:
|
175 |
+
gr.HTML(open('app_header.html').read())
|
176 |
+
|
177 |
+
if not try_cuda or not torch.cuda.is_available():
|
178 |
+
gr.HTML('<div class="alert alert-warning" role="alert" style="color:red"><b>Warning: GPU not available! Diffusion will be slow.</b></div>')
|
179 |
+
|
180 |
+
# Input image control
|
181 |
+
input_image = gr.Image(value="example.png", type='pil', label="Input Image")
|
182 |
+
# Combined mask controls
|
183 |
+
bt_masks = gr.Button("Compute Masks")
|
184 |
+
with gr.Row():
|
185 |
+
mask_image = gr.Image(type='numpy', label="Diffusion Mask")
|
186 |
+
masked_image = gr.Image(type='pil', label="Masked Image")
|
187 |
+
mask_storage = gr.State()
|
188 |
+
|
189 |
+
# Mask editing controls
|
190 |
+
with gr.Row():
|
191 |
+
max_slider = gr.Slider(minimum=1, maximum=99, value=23, step=2, label="Mask Overflow")
|
192 |
+
min_slider = gr.Slider(minimum=1, maximum=99, value=5, step=2, label="Mask Denoising")
|
193 |
+
|
194 |
+
with gr.Row():
|
195 |
+
invert_mask = gr.Checkbox(label="Invert Mask")
|
196 |
+
mask_checkboxes = gr.CheckboxGroup(interactive=True, label="Mask Selection")
|
197 |
+
|
198 |
+
# Diffusion controls and output
|
199 |
+
with gr.Row():
|
200 |
+
with gr.Column():
|
201 |
+
prompt = gr.Textbox("An angry dog floating in outer deep space. Twinkling stars in the background. High definition.", label="Prompt")
|
202 |
+
negative_prompt = gr.Textbox(label="Negative Prompt")
|
203 |
+
with gr.Column():
|
204 |
+
steps_slider = gr.Slider(minimum=1, maximum=100, value=50, label="Inference Steps")
|
205 |
+
guidance_slider = gr.Slider(minimum=0.0, maximum=50.0, value=7.5, step=0.1, label="Guidance Scale")
|
206 |
+
bt_diffusion = gr.Button("Run Diffusion")
|
207 |
+
|
208 |
+
inpainted_image = gr.Image(type='pil', label="Inpainted Image")
|
209 |
+
|
210 |
+
# TODO: saw a better way of handling many inputs online..
|
211 |
+
# forgot where though
|
212 |
+
update_mask_inputs = [input_image, mask_storage, mask_checkboxes, max_slider, min_slider, invert_mask]
|
213 |
+
update_mask_outputs = [mask_image, masked_image]
|
214 |
+
|
215 |
+
# Clear checkbox group on input image change
|
216 |
+
input_image.change(lambda: gr.CheckboxGroup.update(choices=[], value=[]), outputs=mask_checkboxes)
|
217 |
+
input_image.change(lambda: gr.Checkbox.update(value=False), outputs=invert_mask)
|
218 |
+
|
219 |
+
# Segmentation button callback
|
220 |
+
bt_masks.click(fn_segmentation, inputs=[input_image, max_slider, min_slider], outputs=[mask_storage, mask_checkboxes, mask_image, masked_image])
|
221 |
+
|
222 |
+
# Update mask callbacks
|
223 |
+
max_slider.change(fn_update_mask, inputs=update_mask_inputs, outputs=update_mask_outputs, show_progress=False)
|
224 |
+
min_slider.change(fn_update_mask, inputs=update_mask_inputs, outputs=update_mask_outputs, show_progress=False)
|
225 |
+
mask_checkboxes.change(fn_update_mask, inputs=update_mask_inputs, outputs=update_mask_outputs, show_progress=False)
|
226 |
+
invert_mask.change(fn_update_mask, inputs=update_mask_inputs, outputs=update_mask_outputs, show_progress=False)
|
227 |
+
|
228 |
+
# Diffusion button callback
|
229 |
+
bt_diffusion.click(fn_diffusion, inputs=[
|
230 |
+
prompt,
|
231 |
+
masked_image,
|
232 |
+
mask_image,
|
233 |
+
steps_slider,
|
234 |
+
guidance_slider,
|
235 |
+
negative_prompt
|
236 |
+
], outputs=inpainted_image)
|
237 |
+
gr.HTML(open('app_license.html').read())
|
238 |
+
|
239 |
+
demo.launch()
|
app_header.html
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<div style="text-align: center; max-width: 650px; margin: 0 auto;">
|
2 |
+
<div
|
3 |
+
style="
|
4 |
+
display: inline-flex;
|
5 |
+
align-items: center;
|
6 |
+
gap: 0.8rem;
|
7 |
+
font-size: 1.75rem;
|
8 |
+
"
|
9 |
+
>
|
10 |
+
<svg
|
11 |
+
width="0.65em"
|
12 |
+
height="0.65em"
|
13 |
+
viewBox="0 0 115 115"
|
14 |
+
fill="none"
|
15 |
+
xmlns="http://www.w3.org/2000/svg"
|
16 |
+
>
|
17 |
+
<rect width="23" height="23" fill="white"></rect>
|
18 |
+
<rect y="69" width="23" height="23" fill="white"></rect>
|
19 |
+
<rect x="23" width="23" height="23" fill="#AEAEAE"></rect>
|
20 |
+
<rect x="23" y="69" width="23" height="23" fill="#AEAEAE"></rect>
|
21 |
+
<rect x="46" width="23" height="23" fill="white"></rect>
|
22 |
+
<rect x="46" y="69" width="23" height="23" fill="white"></rect>
|
23 |
+
<rect x="69" width="23" height="23" fill="black"></rect>
|
24 |
+
<rect x="69" y="69" width="23" height="23" fill="black"></rect>
|
25 |
+
<rect x="92" width="23" height="23" fill="#D9D9D9"></rect>
|
26 |
+
<rect x="92" y="69" width="23" height="23" fill="#AEAEAE"></rect>
|
27 |
+
<rect x="115" y="46" width="23" height="23" fill="white"></rect>
|
28 |
+
<rect x="115" y="115" width="23" height="23" fill="white"></rect>
|
29 |
+
<rect x="115" y="69" width="23" height="23" fill="#D9D9D9"></rect>
|
30 |
+
<rect x="92" y="46" width="23" height="23" fill="#AEAEAE"></rect>
|
31 |
+
<rect x="92" y="115" width="23" height="23" fill="#AEAEAE"></rect>
|
32 |
+
<rect x="92" y="69" width="23" height="23" fill="white"></rect>
|
33 |
+
<rect x="69" y="46" width="23" height="23" fill="white"></rect>
|
34 |
+
<rect x="69" y="115" width="23" height="23" fill="white"></rect>
|
35 |
+
<rect x="69" y="69" width="23" height="23" fill="#D9D9D9"></rect>
|
36 |
+
<rect x="46" y="46" width="23" height="23" fill="black"></rect>
|
37 |
+
<rect x="46" y="115" width="23" height="23" fill="black"></rect>
|
38 |
+
<rect x="46" y="69" width="23" height="23" fill="black"></rect>
|
39 |
+
<rect x="23" y="46" width="23" height="23" fill="#D9D9D9"></rect>
|
40 |
+
<rect x="23" y="115" width="23" height="23" fill="#AEAEAE"></rect>
|
41 |
+
<rect x="23" y="69" width="23" height="23" fill="black"></rect>
|
42 |
+
</svg>
|
43 |
+
<h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 7px">
|
44 |
+
Stable Diffusion x Segmentation Masking 😷
|
45 |
+
</h1>
|
46 |
+
</div>
|
47 |
+
<p style="margin-bottom: 10px; font-size: 94%">
|
48 |
+
Stable Diffusion is a state of the art text-to-image model that generates
|
49 |
+
images from text. Finetuning the model can make it suitable for inpainting
|
50 |
+
when provided with a starting image, mask, and text prompt.
|
51 |
+
</p>
|
52 |
+
<p style="margin-bottom: 10px; font-size: 94%">
|
53 |
+
However, depending on how complex the area you want to mask is, creating
|
54 |
+
the mask can be tedious. This demo incorporates a segmentation model to
|
55 |
+
generate per-class masks for you, which can be combined to produce a final
|
56 |
+
diffusion mask.
|
57 |
+
</p>
|
58 |
+
</div>
|
app_license.html
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<div class="acknowledgments">
|
2 |
+
<p><h4>LICENSE</h4>
|
3 |
+
The model is licensed with a <a
|
4 |
+
href="https://huggingface.co/spaces/CompVis/stable-diffusion-license"
|
5 |
+
style="text-decoration: underline;" target="_blank">CreativeML Open RAIL-M</a>
|
6 |
+
license. The authors claim no rights on the outputs you generate, you are free
|
7 |
+
to use them and are accountable for their use which must not go against the
|
8 |
+
provisions set in this license. The license forbids you from sharing any content
|
9 |
+
that violates any laws, produce any harm to a person, disseminate any personal
|
10 |
+
information that would be meant for harm, spread misinformation and target
|
11 |
+
vulnerable groups. For the full list of restrictions please <a
|
12 |
+
href="https://huggingface.co/spaces/CompVis/stable-diffusion-license"
|
13 |
+
target="_blank" style="text-decoration: underline;" target="_blank">read the
|
14 |
+
license</a></p>
|
15 |
+
<p><h4>Biases and content acknowledgment</h4>
|
16 |
+
Despite how impressive being able to turn text into image is, beware to the fact
|
17 |
+
that this model may output content that reinforces or exacerbates societal
|
18 |
+
biases, as well as realistic faces, pornography and violence. The model was
|
19 |
+
trained on the <a href="https://laion.ai/blog/laion-5b/" style="text-decoration:
|
20 |
+
underline;" target="_blank">LAION-5B dataset</a>, which scraped non-curated
|
21 |
+
image-text-pairs from the internet (the exception being the removal of illegal
|
22 |
+
content) and is meant for research purposes. You can read more in the <a
|
23 |
+
href="https://huggingface.co/CompVis/stable-diffusion-v1-4"
|
24 |
+
style="text-decoration: underline;" target="_blank">model card</a>. Additionally,
|
25 |
+
you can read more about the inpainting finetuning process in this
|
26 |
+
<a href="https://huggingface.co/runwayml/stable-diffusion-inpainting" style="text-decoration: underline">model card</a>.</p>
|
27 |
+
</div>
|
example.png
ADDED
![]() |
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
--extra-index-url https://download.pytorch.org/whl/cu113
|
2 |
+
torch
|
3 |
+
|
4 |
+
build==0.6.0
|
5 |
+
diffusers==0.9.0
|
6 |
+
ftfy==6.1.1
|
7 |
+
gradio==3.9.1
|
8 |
+
timm==0.6.11
|
9 |
+
transformers==4.22.1
|
10 |
+
accelerate
|
11 |
+
|
12 |
+
https://github.com/apolinario/xformers/releases/download/0.0.3/xformers-0.0.14.dev0-cp38-cp38-linux_x86_64.whl
|