Harisreedhar
commited on
Commit
·
7f475d2
1
Parent(s):
27c3130
Add soft erosion and fix face parsing video
Browse files- app.py +40 -24
- face_parsing/__init__.py +1 -1
- face_parsing/swap.py +60 -17
- swapper.py +3 -3
app.py
CHANGED
|
@@ -17,7 +17,7 @@ from moviepy.editor import VideoFileClip, ImageSequenceClip
|
|
| 17 |
|
| 18 |
from face_analyser import detect_conditions, analyse_face
|
| 19 |
from utils import trim_video, StreamerThread, ProcessBar, open_directory
|
| 20 |
-
from face_parsing import init_parser, swap_regions, mask_regions, mask_regions_to_list
|
| 21 |
from swapper import (
|
| 22 |
swap_face,
|
| 23 |
swap_face_with_condition,
|
|
@@ -59,8 +59,9 @@ MASK_INCLUDE = [
|
|
| 59 |
"L-Lip",
|
| 60 |
"U-Lip"
|
| 61 |
]
|
| 62 |
-
|
| 63 |
-
|
|
|
|
| 64 |
|
| 65 |
FACE_SWAPPER = None
|
| 66 |
FACE_ANALYSER = None
|
|
@@ -84,6 +85,8 @@ else:
|
|
| 84 |
USE_CUDA = False
|
| 85 |
print("\n********** Running on CPU **********\n")
|
| 86 |
|
|
|
|
|
|
|
| 87 |
|
| 88 |
## ------------------------------ LOAD MODELS ------------------------------
|
| 89 |
|
|
@@ -114,7 +117,7 @@ def load_face_parser_model(name="./assets/pretrained_models/79999_iter.pth"):
|
|
| 114 |
global FACE_PARSER
|
| 115 |
path = os.path.join(os.path.abspath(os.path.dirname(__file__)), name)
|
| 116 |
if FACE_PARSER is None:
|
| 117 |
-
FACE_PARSER = init_parser(name,
|
| 118 |
|
| 119 |
|
| 120 |
load_face_analyser_model()
|
|
@@ -137,9 +140,10 @@ def process(
|
|
| 137 |
distance,
|
| 138 |
face_enhance,
|
| 139 |
enable_face_parser,
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
|
|
|
| 143 |
*specifics,
|
| 144 |
):
|
| 145 |
global WORKSPACE
|
|
@@ -196,14 +200,18 @@ def process(
|
|
| 196 |
|
| 197 |
yield "### \n ⌛ Analysing Face...", *ui_before()
|
| 198 |
|
| 199 |
-
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
models = {
|
| 202 |
"swap": FACE_SWAPPER,
|
| 203 |
"enhance": FACE_ENHANCER,
|
| 204 |
"enhance_sett": face_enhance,
|
| 205 |
"face_parser": FACE_PARSER,
|
| 206 |
-
"face_parser_sett": (enable_face_parser,
|
| 207 |
}
|
| 208 |
|
| 209 |
## ------------------------------ ANALYSE SOURCE & SPECIFIC ------------------------------
|
|
@@ -301,9 +309,9 @@ def process(
|
|
| 301 |
|
| 302 |
if condition == "Specific Face":
|
| 303 |
swapped = swap_specific(
|
| 304 |
-
frame,
|
| 305 |
-
analysed_target,
|
| 306 |
analysed_source_specific,
|
|
|
|
|
|
|
| 307 |
models,
|
| 308 |
threshold=distance,
|
| 309 |
)
|
|
@@ -381,9 +389,9 @@ def process(
|
|
| 381 |
|
| 382 |
if condition == "Specific Face":
|
| 383 |
swapped = swap_specific(
|
| 384 |
-
target,
|
| 385 |
-
analysed_target,
|
| 386 |
analysed_source_specific,
|
|
|
|
|
|
|
| 387 |
models,
|
| 388 |
threshold=distance,
|
| 389 |
)
|
|
@@ -636,16 +644,23 @@ with gr.Blocks(css=css) as interface:
|
|
| 636 |
label="Include",
|
| 637 |
interactive=True,
|
| 638 |
)
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
value=
|
| 642 |
-
|
| 643 |
-
label="Exclude",
|
| 644 |
interactive=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
)
|
| 646 |
-
|
| 647 |
-
label="Blur
|
| 648 |
-
value=
|
| 649 |
minimum=0,
|
| 650 |
interactive=True,
|
| 651 |
)
|
|
@@ -827,8 +842,9 @@ with gr.Blocks(css=css) as interface:
|
|
| 827 |
enable_face_enhance,
|
| 828 |
enable_face_parser_mask,
|
| 829 |
mask_include,
|
| 830 |
-
|
| 831 |
-
|
|
|
|
| 832 |
*src_specific_inputs,
|
| 833 |
]
|
| 834 |
|
|
|
|
| 17 |
|
| 18 |
from face_analyser import detect_conditions, analyse_face
|
| 19 |
from utils import trim_video, StreamerThread, ProcessBar, open_directory
|
| 20 |
+
from face_parsing import init_parser, swap_regions, mask_regions, mask_regions_to_list, SoftErosion
|
| 21 |
from swapper import (
|
| 22 |
swap_face,
|
| 23 |
swap_face_with_condition,
|
|
|
|
| 59 |
"L-Lip",
|
| 60 |
"U-Lip"
|
| 61 |
]
|
| 62 |
+
MASK_SOFT_KERNEL = 17
|
| 63 |
+
MASK_SOFT_ITERATIONS = 7
|
| 64 |
+
MASK_BLUR_AMOUNT = 20
|
| 65 |
|
| 66 |
FACE_SWAPPER = None
|
| 67 |
FACE_ANALYSER = None
|
|
|
|
| 85 |
USE_CUDA = False
|
| 86 |
print("\n********** Running on CPU **********\n")
|
| 87 |
|
| 88 |
+
device = "cuda" if USE_CUDA else "cpu"
|
| 89 |
+
|
| 90 |
|
| 91 |
## ------------------------------ LOAD MODELS ------------------------------
|
| 92 |
|
|
|
|
| 117 |
global FACE_PARSER
|
| 118 |
path = os.path.join(os.path.abspath(os.path.dirname(__file__)), name)
|
| 119 |
if FACE_PARSER is None:
|
| 120 |
+
FACE_PARSER = init_parser(name, mode=device)
|
| 121 |
|
| 122 |
|
| 123 |
load_face_analyser_model()
|
|
|
|
| 140 |
distance,
|
| 141 |
face_enhance,
|
| 142 |
enable_face_parser,
|
| 143 |
+
mask_includes,
|
| 144 |
+
mask_soft_kernel,
|
| 145 |
+
mask_soft_iterations,
|
| 146 |
+
blur_amount,
|
| 147 |
*specifics,
|
| 148 |
):
|
| 149 |
global WORKSPACE
|
|
|
|
| 200 |
|
| 201 |
yield "### \n ⌛ Analysing Face...", *ui_before()
|
| 202 |
|
| 203 |
+
includes = mask_regions_to_list(mask_includes)
|
| 204 |
+
if mask_soft_iterations > 0:
|
| 205 |
+
smooth_mask = SoftErosion(kernel_size=17, threshold=0.9, iterations=int(mask_soft_iterations)).to(device)
|
| 206 |
+
else:
|
| 207 |
+
smooth_mask = None
|
| 208 |
+
|
| 209 |
models = {
|
| 210 |
"swap": FACE_SWAPPER,
|
| 211 |
"enhance": FACE_ENHANCER,
|
| 212 |
"enhance_sett": face_enhance,
|
| 213 |
"face_parser": FACE_PARSER,
|
| 214 |
+
"face_parser_sett": (enable_face_parser, includes, smooth_mask, int(blur_amount))
|
| 215 |
}
|
| 216 |
|
| 217 |
## ------------------------------ ANALYSE SOURCE & SPECIFIC ------------------------------
|
|
|
|
| 309 |
|
| 310 |
if condition == "Specific Face":
|
| 311 |
swapped = swap_specific(
|
|
|
|
|
|
|
| 312 |
analysed_source_specific,
|
| 313 |
+
analysed_target,
|
| 314 |
+
frame,
|
| 315 |
models,
|
| 316 |
threshold=distance,
|
| 317 |
)
|
|
|
|
| 389 |
|
| 390 |
if condition == "Specific Face":
|
| 391 |
swapped = swap_specific(
|
|
|
|
|
|
|
| 392 |
analysed_source_specific,
|
| 393 |
+
analysed_target,
|
| 394 |
+
target,
|
| 395 |
models,
|
| 396 |
threshold=distance,
|
| 397 |
)
|
|
|
|
| 644 |
label="Include",
|
| 645 |
interactive=True,
|
| 646 |
)
|
| 647 |
+
mask_soft_kernel = gr.Number(
|
| 648 |
+
label="Soft Erode Kernel",
|
| 649 |
+
value=MASK_SOFT_KERNEL,
|
| 650 |
+
minimum=3,
|
|
|
|
| 651 |
interactive=True,
|
| 652 |
+
visible = False
|
| 653 |
+
)
|
| 654 |
+
mask_soft_iterations = gr.Number(
|
| 655 |
+
label="Soft Erode Iterations",
|
| 656 |
+
value=MASK_SOFT_ITERATIONS,
|
| 657 |
+
minimum=0,
|
| 658 |
+
interactive=True,
|
| 659 |
+
|
| 660 |
)
|
| 661 |
+
blur_amount = gr.Number(
|
| 662 |
+
label="Mask Blur",
|
| 663 |
+
value=MASK_BLUR_AMOUNT,
|
| 664 |
minimum=0,
|
| 665 |
interactive=True,
|
| 666 |
)
|
|
|
|
| 842 |
enable_face_enhance,
|
| 843 |
enable_face_parser_mask,
|
| 844 |
mask_include,
|
| 845 |
+
mask_soft_kernel,
|
| 846 |
+
mask_soft_iterations,
|
| 847 |
+
blur_amount,
|
| 848 |
*src_specific_inputs,
|
| 849 |
]
|
| 850 |
|
face_parsing/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
from .swap import init_parser, swap_regions, mask_regions, mask_regions_to_list
|
|
|
|
| 1 |
+
from .swap import init_parser, swap_regions, mask_regions, mask_regions_to_list, SoftErosion
|
face_parsing/swap.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
| 1 |
import torch
|
|
|
|
|
|
|
| 2 |
import torchvision.transforms as transforms
|
| 3 |
import cv2
|
| 4 |
import numpy as np
|
|
@@ -27,15 +29,44 @@ mask_regions = {
|
|
| 27 |
"Hat":18
|
| 28 |
}
|
| 29 |
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
n_classes = 19
|
| 37 |
net = BiSeNet(n_classes=n_classes)
|
| 38 |
-
if
|
| 39 |
net.cuda()
|
| 40 |
net.load_state_dict(torch.load(pth_path))
|
| 41 |
else:
|
|
@@ -55,8 +86,7 @@ def image_to_parsing(img, net):
|
|
| 55 |
img = torch.unsqueeze(img, 0)
|
| 56 |
|
| 57 |
with torch.no_grad():
|
| 58 |
-
|
| 59 |
-
img = img.cuda()
|
| 60 |
out = net(img)[0]
|
| 61 |
parsing = out.squeeze(0).cpu().numpy().argmax(0)
|
| 62 |
return parsing
|
|
@@ -68,20 +98,33 @@ def get_mask(parsing, classes):
|
|
| 68 |
res += parsing == val
|
| 69 |
return res
|
| 70 |
|
| 71 |
-
def swap_regions(source, target, net, includes=[1,2,3,4,5,10,11,12,13],
|
| 72 |
parsing = image_to_parsing(source, net)
|
|
|
|
| 73 |
if len(includes) == 0:
|
| 74 |
return source, np.zeros_like(source)
|
|
|
|
| 75 |
include_mask = get_mask(parsing, includes)
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
def mask_regions_to_list(values):
|
| 87 |
out_ids = []
|
|
|
|
| 1 |
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
import torchvision.transforms as transforms
|
| 5 |
import cv2
|
| 6 |
import numpy as np
|
|
|
|
| 29 |
"Hat":18
|
| 30 |
}
|
| 31 |
|
| 32 |
+
# Borrowed from simswap
|
| 33 |
+
# https://github.com/neuralchen/SimSwap/blob/26c84d2901bd56eda4d5e3c5ca6da16e65dc82a6/util/reverse2original.py#L30
|
| 34 |
+
class SoftErosion(nn.Module):
|
| 35 |
+
def __init__(self, kernel_size=15, threshold=0.6, iterations=1):
|
| 36 |
+
super(SoftErosion, self).__init__()
|
| 37 |
+
r = kernel_size // 2
|
| 38 |
+
self.padding = r
|
| 39 |
+
self.iterations = iterations
|
| 40 |
+
self.threshold = threshold
|
| 41 |
|
| 42 |
+
# Create kernel
|
| 43 |
+
y_indices, x_indices = torch.meshgrid(torch.arange(0., kernel_size), torch.arange(0., kernel_size))
|
| 44 |
+
dist = torch.sqrt((x_indices - r) ** 2 + (y_indices - r) ** 2)
|
| 45 |
+
kernel = dist.max() - dist
|
| 46 |
+
kernel /= kernel.sum()
|
| 47 |
+
kernel = kernel.view(1, 1, *kernel.shape)
|
| 48 |
+
self.register_buffer('weight', kernel)
|
| 49 |
|
| 50 |
+
def forward(self, x):
|
| 51 |
+
x = x.float()
|
| 52 |
+
for i in range(self.iterations - 1):
|
| 53 |
+
x = torch.min(x, F.conv2d(x, weight=self.weight, groups=x.shape[1], padding=self.padding))
|
| 54 |
+
x = F.conv2d(x, weight=self.weight, groups=x.shape[1], padding=self.padding)
|
| 55 |
+
|
| 56 |
+
mask = x >= self.threshold
|
| 57 |
+
x[mask] = 1.0
|
| 58 |
+
x[~mask] /= x[~mask].max()
|
| 59 |
+
|
| 60 |
+
return x, mask
|
| 61 |
+
|
| 62 |
+
device = "cpu"
|
| 63 |
+
|
| 64 |
+
def init_parser(pth_path, mode="cpu"):
|
| 65 |
+
global device
|
| 66 |
+
device = mode
|
| 67 |
n_classes = 19
|
| 68 |
net = BiSeNet(n_classes=n_classes)
|
| 69 |
+
if device == "cuda":
|
| 70 |
net.cuda()
|
| 71 |
net.load_state_dict(torch.load(pth_path))
|
| 72 |
else:
|
|
|
|
| 86 |
img = torch.unsqueeze(img, 0)
|
| 87 |
|
| 88 |
with torch.no_grad():
|
| 89 |
+
img = img.to(device)
|
|
|
|
| 90 |
out = net(img)[0]
|
| 91 |
parsing = out.squeeze(0).cpu().numpy().argmax(0)
|
| 92 |
return parsing
|
|
|
|
| 98 |
res += parsing == val
|
| 99 |
return res
|
| 100 |
|
| 101 |
+
def swap_regions(source, target, net, smooth_mask, includes=[1,2,3,4,5,10,11,12,13], blur=10):
|
| 102 |
parsing = image_to_parsing(source, net)
|
| 103 |
+
|
| 104 |
if len(includes) == 0:
|
| 105 |
return source, np.zeros_like(source)
|
| 106 |
+
|
| 107 |
include_mask = get_mask(parsing, includes)
|
| 108 |
+
mask = np.repeat(include_mask[:, :, np.newaxis], 3, axis=2).astype("float32")
|
| 109 |
+
|
| 110 |
+
if smooth_mask is not None:
|
| 111 |
+
mask_tensor = torch.from_numpy(mask.copy().transpose((2, 0, 1))).float().to(device)
|
| 112 |
+
face_mask_tensor = mask_tensor[0] + mask_tensor[1]
|
| 113 |
+
soft_face_mask_tensor, _ = smooth_mask(face_mask_tensor.unsqueeze_(0).unsqueeze_(0))
|
| 114 |
+
soft_face_mask_tensor.squeeze_()
|
| 115 |
+
mask = np.repeat(soft_face_mask_tensor.cpu().numpy()[:, :, np.newaxis], 3, axis=2)
|
| 116 |
+
|
| 117 |
+
if blur > 0:
|
| 118 |
+
mask = cv2.GaussianBlur(mask, (0, 0), blur)
|
| 119 |
+
|
| 120 |
+
resized_source = cv2.resize((source/255).astype("float32"), (512, 512))
|
| 121 |
+
resized_target = cv2.resize((target/255).astype("float32"), (512, 512))
|
| 122 |
+
|
| 123 |
+
result = mask * resized_source + (1 - mask) * resized_target
|
| 124 |
+
normalized_result = (result - np.min(result)) / (np.max(result) - np.min(result))
|
| 125 |
+
result = cv2.resize((result*255).astype("uint8"), (source.shape[1], source.shape[0]))
|
| 126 |
+
|
| 127 |
+
return result
|
| 128 |
|
| 129 |
def mask_regions_to_list(values):
|
| 130 |
out_ids = []
|
swapper.py
CHANGED
|
@@ -25,10 +25,10 @@ def swap_face(whole_img, target_face, source_face, models):
|
|
| 25 |
aimg, _ = face_align.norm_crop2(whole_img, target_face.kps, image_size=image_size)
|
| 26 |
|
| 27 |
if face_parser is not None:
|
| 28 |
-
fp_enable,
|
| 29 |
if fp_enable:
|
| 30 |
-
bgr_fake
|
| 31 |
-
bgr_fake, aimg, face_parser,
|
| 32 |
)
|
| 33 |
|
| 34 |
if fe_enable:
|
|
|
|
| 25 |
aimg, _ = face_align.norm_crop2(whole_img, target_face.kps, image_size=image_size)
|
| 26 |
|
| 27 |
if face_parser is not None:
|
| 28 |
+
fp_enable, includes, smooth_mask, blur_amount = models.get("face_parser_sett")
|
| 29 |
if fp_enable:
|
| 30 |
+
bgr_fake = swap_regions(
|
| 31 |
+
bgr_fake, aimg, face_parser, smooth_mask, includes=includes, blur=blur_amount
|
| 32 |
)
|
| 33 |
|
| 34 |
if fe_enable:
|