Spaces:
Running
Running
import gradio as gr | |
import cv2 | |
import numpy as np | |
import mediapipe as mp | |
import time | |
import tempfile | |
import os | |
# --- MediaPipe Initialization --- | |
try: | |
mp_face_mesh = mp.solutions.face_mesh | |
face_mesh = mp_face_mesh.FaceMesh( | |
static_image_mode=True, | |
max_num_faces=1, | |
refine_landmarks=True, | |
min_detection_confidence=0.4, | |
min_tracking_confidence=0.4 | |
) | |
print("MediaPipe Face Mesh initialized successfully.") | |
except (ImportError, AttributeError): | |
print("Error: Could not initialize MediaPipe Face Mesh. Is mediapipe installed correctly?") | |
face_mesh = None | |
# --- Helper Functions --- | |
def get_face_mask_box(img, feather_pct, padding_pct): | |
h, w = img.shape[:2] | |
mask = np.zeros((h, w), dtype=np.uint8) | |
results = face_mesh.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) | |
if not results.multi_face_landmarks: | |
return None, None | |
pts = np.array([(int(p.x * w), int(p.y * h)) for p in results.multi_face_landmarks[0].landmark], np.int32) | |
hull = cv2.convexHull(pts) | |
cv2.fillConvexPoly(mask, hull, 255) | |
x, y, bw, bh = cv2.boundingRect(hull) | |
# calculate padding and feather in pixels | |
pad = int(max(bw, bh) * padding_pct) | |
x_pad = max(x - pad, 0) | |
y_pad = max(y - pad, 0) | |
x2 = min(x + bw + pad, w) | |
y2 = min(y + bh + pad, h) | |
mask_roi = mask[y_pad:y2, x_pad:x2] | |
# inside feather: kernel proportional to face size | |
if feather_pct > 0 and mask_roi.size > 0: | |
k = int(min(mask_roi.shape[0], mask_roi.shape[1]) * feather_pct) | |
if k % 2 == 0: k += 1 | |
mask_roi = cv2.GaussianBlur(mask_roi, (k, k), 0) | |
return mask_roi, (x_pad, y_pad, x2 - x_pad, y2 - y_pad) | |
def cut_and_feather(img, feather): | |
h, w = img.shape[:2] | |
mask = np.zeros((h, w), dtype=np.uint8) | |
results = face_mesh.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) | |
if not results.multi_face_landmarks: | |
return np.zeros_like(img), None, None | |
pts = np.array([(int(p.x * w), int(p.y * h)) for p in results.multi_face_landmarks[0].landmark], np.int32) | |
hull = cv2.convexHull(pts) | |
cv2.fillConvexPoly(mask, hull, 255) | |
# bounding box | |
x, y, bw, bh = cv2.boundingRect(hull) | |
# feather mask | |
k = int(feather) | |
if k > 0: | |
mask = cv2.GaussianBlur(mask, (k*2+1, k*2+1), 0) | |
# extract face ROI | |
face_roi = img[y:y+bh, x:x+bw] | |
mask_roi = mask[y:y+bh, x:x+bw] | |
# apply mask | |
fg = cv2.bitwise_and(face_roi, face_roi, mask=mask_roi) | |
# prepare alpha | |
alpha = mask_roi.astype(np.float32) / 255.0 | |
# composite onto transparent background same size | |
out = (fg.astype(np.float32) * alpha[..., None]).astype(np.uint8) | |
return out, mask_roi, (x, y, bw, bh) | |
def get_landmarks(img, landmark_step=1): | |
if img is None or face_mesh is None: | |
return None | |
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
try: | |
results = face_mesh.process(img_rgb) | |
except Exception: | |
return None | |
if not results.multi_face_landmarks: | |
return None | |
landmarks_mp = results.multi_face_landmarks[0] | |
h, w, _ = img.shape | |
pts = np.array([(pt.x * w, pt.y * h) for pt in landmarks_mp.landmark], dtype=np.float32) | |
landmarks = pts[::landmark_step] if landmark_step > 1 else pts | |
if not np.all(np.isfinite(landmarks)): | |
return None | |
corners = np.array([[0,0],[w-1,0],[0,h-1],[w-1,h-1]], dtype=np.float32) | |
return np.vstack((landmarks, corners)) | |
def calculate_delaunay_triangles(rect, points): | |
if points is None or len(points)<3: | |
return [] | |
points[:,0] = np.clip(points[:,0], rect[0], rect[0]+rect[2]-1) | |
points[:,1] = np.clip(points[:,1], rect[1], rect[1]+rect[3]-1) | |
subdiv = cv2.Subdiv2D(rect) | |
inserted = {} | |
for i,p in enumerate(points): | |
key = (int(p[0]), int(p[1])) | |
if key not in inserted: | |
try: | |
subdiv.insert(key) | |
inserted[key]=i | |
except cv2.error: | |
continue | |
tris = subdiv.getTriangleList() | |
delaunay=[] | |
for t in tris: | |
coords=[(int(t[0]),int(t[1])),(int(t[2]),int(t[3])),(int(t[4]),int(t[5]))] | |
if all(rect[0]<=x<rect[0]+rect[2] and rect[1]<=y<rect[1]+rect[3] for x,y in coords): | |
idxs=[inserted.get(c) for c in coords] | |
if all(i is not None for i in idxs) and len(set(idxs))==3: | |
delaunay.append(idxs) | |
return delaunay | |
def warp_triangle(img1,img2,t1,t2): | |
if len(t1)!=3 or len(t2)!=3: | |
return | |
r1=cv2.boundingRect(np.float32([t1])) | |
r2=cv2.boundingRect(np.float32([t2])) | |
if r1[2] <= 0 or r1[3] <= 0 or r2[2] <= 0 or r2[3] <= 0: | |
return | |
img1_rect = img1[r1[1]:r1[1]+r1[3], r1[0]:r1[0]+r1[2]] | |
if img1_rect.size == 0: | |
return | |
t1r=[(t1[i][0]-r1[0],t1[i][1]-r1[1]) for i in range(3)] | |
t2r=[(t2[i][0]-r2[0],t2[i][1]-r2[1]) for i in range(3)] | |
mask=np.zeros((r2[3],r2[2],3),dtype=np.float32) | |
cv2.fillConvexPoly(mask,np.int32(t2r),(1,1,1),16) | |
src=img1[r1[1]:r1[1]+r1[3],r1[0]:r1[0]+r1[2]] | |
M=cv2.getAffineTransform(np.float32(t1r),np.float32(t2r)) | |
warped=cv2.warpAffine(src,M,(r2[2],r2[3]),flags=cv2.INTER_LINEAR,borderMode=cv2.BORDER_REFLECT_101) | |
warped*=mask | |
y1,y2=r2[1],r2[1]+r2[3]; x1,x2=r2[0],r2[0]+r2[2] | |
img2[y1:y2,x1:x2]=img2[y1:y2,x1:x2]*(1-mask)+warped | |
def morph_faces(img1, img2, alpha, dim, step): | |
if img1 is None or img2 is None: | |
return np.zeros((dim,dim,3),dtype=np.uint8) | |
a=cv2.resize(img1,(dim,dim)); b=cv2.resize(img2,(dim,dim)) | |
l1=get_landmarks(a,step); l2=get_landmarks(b,step) | |
if l1 is None or l2 is None or l1.shape!=l2.shape: | |
return cv2.addWeighted(a,1-alpha,b,alpha,0) | |
m=(1-alpha)*l1+alpha*l2 | |
tris=calculate_delaunay_triangles((0,0,dim,dim),m) | |
if not tris: | |
return cv2.addWeighted(a,1-alpha,b,alpha,0) | |
A=a.astype(np.float32)/255; B=b.astype(np.float32)/255 | |
Wa=np.zeros_like(A); Wb=np.zeros_like(B) | |
for ids in tris: | |
warp_triangle(A,Wa,l1[ids],m[ids]); warp_triangle(B,Wb,l2[ids],m[ids]) | |
out=(1-alpha)*Wa+alpha*Wb | |
return (out*255).astype(np.uint8) | |
def process_video(video_path, ref_img, trans, res, step, feather_pct, padding_pct): | |
cap = cv2.VideoCapture(video_path) | |
fps = cap.get(cv2.CAP_PROP_FPS) or 24 | |
total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) | |
# Prepare masked reference | |
ref_bgr = cv2.cvtColor(ref_img, cv2.COLOR_RGB2BGR) | |
mask_ref, ref_box = get_face_mask_box(ref_bgr, feather_pct, padding_pct) | |
if mask_ref is None: | |
return None, None, None, None | |
x_r, y_r, w_r, h_r = ref_box | |
ref_cut = ref_bgr[y_r:y_r+h_r, x_r:x_r+w_r] | |
mask_ref_norm = mask_ref.astype(np.float32)[..., None] / 255.0 | |
ref_masked = (ref_cut.astype(np.float32) * mask_ref_norm).astype(np.uint8) | |
ref_morph = cv2.resize(ref_masked, (res, res)) | |
# Output video setup | |
w_o = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
h_o = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
tmp_vid = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name | |
out_vid = cv2.VideoWriter(tmp_vid, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w_o, h_o)) | |
first_crop = None | |
first_mask = None | |
first_ref = None | |
first_morphed = None | |
for i in range(total): | |
ret, frame = cap.read() | |
if not ret: break | |
mask_roi, box = get_face_mask_box(frame, feather_pct, padding_pct) | |
if mask_roi is None: | |
out_vid.write(frame) | |
continue | |
x, y, w, h = box | |
crop = frame[y:y+h, x:x+w] | |
crop_resized = cv2.resize(crop, (res, res)) | |
alpha = float(np.clip((trans+1)/2, 0, 1)) | |
mor = morph_faces(crop_resized, ref_morph, alpha, res, step) | |
if i == 0: | |
first_crop = crop_resized.copy() | |
first_ref = ref_morph.copy() | |
first_mask = cv2.resize(mask_roi, (res, res), interpolation=cv2.INTER_LINEAR) | |
first_morphed = mor.copy() | |
mor_back = cv2.resize(mor, (w, h)) | |
mask_n = (mask_roi.astype(np.float32)[..., None] / 255.0) | |
region = frame[y:y+h, x:x+w].astype(np.float32) | |
blended = region * (1-mask_n) + mor_back.astype(np.float32) * mask_n | |
frame[y:y+h, x:x+w] = blended.astype(np.uint8) | |
out_vid.write(frame) | |
cap.release(); out_vid.release() | |
if first_morphed is not None and first_mask is not None: | |
mask_n0 = first_mask.astype(np.float32)[..., None] / 255.0 | |
first_morphed = (first_morphed.astype(np.float32) * mask_n0).astype(np.uint8) | |
else: | |
first_morphed = np.zeros((res, res,3),dtype=np.uint8) | |
first_crop = first_crop if first_crop is not None else np.zeros((res, res,3),dtype=np.uint8) | |
first_ref = first_ref if first_ref is not None else ref_morph.copy() | |
# Convert for Gradio | |
return tmp_vid, cv2.cvtColor(first_crop, cv2.COLOR_BGR2RGB), cv2.cvtColor(first_ref, cv2.COLOR_BGR2RGB), cv2.cvtColor(first_morphed, cv2.COLOR_BGR2RGB) | |
# --- Gradio App --- | |
css = """video, img { object-fit: contain !important; }""" | |
with gr.Blocks(css=css) as iface: | |
gr.Markdown("# Morph with Face-Shaped Composite and Padding Percentage") | |
with gr.Row(): | |
vid = gr.Video(label='Input Video') | |
ref = gr.Image(type='numpy', label='Reference Image') | |
with gr.Row(): | |
res = gr.Dropdown([256,384,512,768], value=512, label='Resolution') | |
step = gr.Slider(1,4,value=4,step=1,label='Landmark Sub-sampling') | |
feather = gr.Slider(0.0,0.5,value=0.1,step=0.01,label='Feather (%)') | |
padding = gr.Slider(0.0,0.5,value=0.24,step=0.01,label='Padding (%)') | |
trans = gr.Slider(-1.0,1.0,value=-0.35,step=0.05,label='Transition Level') | |
btn = gr.Button('Generate Morph π') | |
out_vid = gr.Video(label='Morphed Video') | |
out_crop = gr.Image(label='First Frame Crop') | |
out_ref = gr.Image(label='Masked Reference') | |
out_morph = gr.Image(label='Masked Morphed First Frame') | |
btn.click( | |
fn=process_video, | |
inputs=[vid,ref,trans,res,step,feather,padding], | |
outputs=[out_vid,out_crop,out_ref,out_morph], | |
show_progress=True | |
) | |
gr.Markdown("---\n*Default values set and feather/padding are now relative percentages.*") | |
if __name__=='__main__': | |
iface.launch(debug=True) |