ghostv1 / app.py
Jagrut Thakare
v8- Trying to install torch in build time and downloading dependacies model
ba58b0f
raw
history blame
6.76 kB
import os, sys, subprocess
import gradio as gr
import argparse
import cv2
import time
def setup_dependancies():
if not os.path.exists("./download_models.sh"):
print("Error: download_models.sh script not found.")
sys.exit(1)
subprocess.run(["./download_models.sh"])
pkg_dir = os.path.expanduser("~/.local/gpu_packages")
os.makedirs(pkg_dir, exist_ok=True)
# subprocess.run([
# sys.executable, "-m", "pip", "install",
# "--upgrade", "--target", pkg_dir,
# "torch", "torchvision", "torchaudio",
# "--extra-index-url", "https://download.pytorch.org/whl/cu118", "mxnet-cu112", "onnxruntime-gpu==1.12", "Cython", "insightface==0.2.1", "kornia==0.5.4", "dill", "numpy"
# ], check=True)
sys.path.insert(0, pkg_dir)
def init_models(args):
# model for face cropping
app = Face_detect_crop(name='antelope', root='./insightface_func/models')
app.prepare(ctx_id= 0, det_thresh=0.6, det_size=(640,640))
# main model for generation
G = AEI_Net(args.backbone, num_blocks=args.num_blocks, c_id=512)
G.eval()
G.load_state_dict(torch.load(args.G_path, map_location=torch.device('cpu')))
G = G.cuda()
G = G.half()
# arcface model to get face embedding
netArc = iresnet100(fp16=False)
netArc.load_state_dict(torch.load('arcface_model/backbone.pth'))
netArc=netArc.cuda()
netArc.eval()
# model to get face landmarks
handler = Handler('./coordinate_reg/model/2d106det', 0, ctx_id=0, det_size=640)
# model to make superres of face, set use_sr=True if you want to use super resolution or use_sr=False if you don't
if args.use_sr:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
torch.backends.cudnn.benchmark = True
opt = TestOptions()
#opt.which_epoch ='10_7'
model = Pix2PixModel(opt)
model.netG.train()
else:
model = None
return app, G, netArc, handler, model
def infer_faceswap(src, tgt):
app, G, netArc, handler, model = init_models(args)
# get crops from source images
print('List of source paths: ',args.source_paths)
source = []
img = cv2.imread(src)
img = crop_face(img, app, args.crop_size)[0]
source.append(img[:, :, ::-1])
target = []
img = cv2.imread(tgt)
img = crop_face(img, app, args.crop_size)[0]
target.append(img)
start = time.time()
final_frames_list, crop_frames_list, full_frames, tfm_array_list = model_inference(full_frames,
source,
target,
netArc,
G,
app,
True,
similarity_th=args.similarity_th,
crop_size=args.crop_size,
BS=args.batch_size)
result = get_final_image(final_frames_list, crop_frames_list, full_frames[0], tfm_array_list, handler)
cv2.imwrite(args.out_image_name, result)
print(f'Swapped Image saved with path {args.out_image_name}')
print('Total time: ', time.time()-start)
return result
if __name__ == "__main__":
setup_dependancies()
parser = argparse.ArgumentParser()
# Generator params
parser.add_argument('--G_path', default='weights/G_unet_2blocks.pth', type=str, help='Path to weights for G')
parser.add_argument('--backbone', default='unet', const='unet', nargs='?', choices=['unet', 'linknet', 'resnet'], help='Backbone for attribute encoder')
parser.add_argument('--num_blocks', default=2, type=int, help='Numbers of AddBlocks at AddResblock')
parser.add_argument('--batch_size', default=40, type=int)
parser.add_argument('--crop_size', default=224, type=int, help="Don't change this")
parser.add_argument('--use_sr', default=False, type=bool, help='True for super resolution on swap images')
parser.add_argument('--similarity_th', default=0.15, type=float, help='Threshold for selecting a face similar to the target')
parser.add_argument('--source_paths', default=['examples/images/mark.jpg', 'examples/images/elon_musk.jpg'], nargs='+')
parser.add_argument('--target_faces_paths', default=[], nargs='+', help="It's necessary to set the face/faces in the video to which the source face/faces is swapped. You can skip this parametr, and then any face is selected in the target video for swap.")
# parameters for image to video
parser.add_argument('--target_video', default='examples/videos/nggyup.mp4', type=str, help="It's necessary for image to video swap")
parser.add_argument('--out_video_name', default='examples/results/result.mp4', type=str, help="It's necessary for image to video swap")
# parameters for image to image
parser.add_argument('--image_to_image', default=True, type=bool, help='True for image to image swap, False for swap on video')
parser.add_argument('--target_image', default='examples/images/beckham.jpg', type=str, help="It's necessary for image to image swap")
parser.add_argument('--out_image_name', default='examples/results/result.png', type=str,help="It's necessary for image to image swap")
args = parser.parse_args()
with gr.Blocks() as demo:
with gr.Column():
with gr.Row():
with gr.Column():
with gr.Row(equal_height=True):
input_source = gr.Image(
type="pil",
label="Input Source"
)
input_target = gr.Image(
type="pil",
label="Input Target"
)
run_button = gr.Button("Generate")
with gr.Column():
result = gr.Image(type='pil', label='Image Output')
run_button.click(
fn=infer_faceswap,
inputs=[input_source, input_target],
outputs=[result]
)
demo.launch(server_name="0.0.0.0", server_port=7860, share=True, debug=True)