Update app.py
Browse files
app.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
import os
|
| 2 |
-
from flask import Flask, request, jsonify
|
| 3 |
from PIL import Image
|
| 4 |
from io import BytesIO
|
| 5 |
import torch
|
| 6 |
import base64
|
| 7 |
-
import io
|
| 8 |
import logging
|
| 9 |
import gradio as gr
|
| 10 |
import numpy as np
|
|
@@ -33,52 +33,48 @@ from torchvision.transforms.functional import to_pil_image
|
|
| 33 |
|
| 34 |
app = Flask(__name__)
|
| 35 |
|
| 36 |
-
|
| 37 |
-
unet_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='unet', filename='pytorch_model.bin')
|
| 38 |
-
unet_config_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='unet', filename='config.json')
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
unet = UNet2DConditionModel.from_pretrained(
|
| 41 |
-
|
| 42 |
-
torch_dtype=torch.float16
|
| 43 |
-
force_download=False
|
| 44 |
)
|
| 45 |
unet.requires_grad_(False)
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
tokenizer_two_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='tokenizer_2', filename='tokenizer.json')
|
| 50 |
|
| 51 |
-
|
| 52 |
-
tokenizer_two = AutoTokenizer.from_pretrained(
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
noise_scheduler = DDPMScheduler.from_pretrained(os.path.dirname(noise_scheduler_path))
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
text_encoder_one = CLIPTextModel.from_pretrained(os.path.dirname(text_encoder_one_path), torch_dtype=torch.float16)
|
| 61 |
|
| 62 |
-
text_encoder_two_path =
|
| 63 |
-
text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
image_encoder = CLIPVisionModelWithProjection.from_pretrained(os.path.dirname(image_encoder_path), torch_dtype=torch.float16)
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
vae = AutoencoderKL.from_pretrained(os.path.dirname(vae_path), torch_dtype=torch.float16)
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(os.path.dirname(unet_encoder_path), torch_dtype=torch.float16)
|
| 76 |
|
| 77 |
-
# Initialisation des autres modèles (parsing et openpose)
|
| 78 |
parsing_model = Parsing(0)
|
| 79 |
openpose_model = OpenPose(0)
|
| 80 |
|
| 81 |
-
#
|
| 82 |
UNet_Encoder.requires_grad_(False)
|
| 83 |
image_encoder.requires_grad_(False)
|
| 84 |
vae.requires_grad_(False)
|
|
@@ -86,17 +82,13 @@ unet.requires_grad_(False)
|
|
| 86 |
text_encoder_one.requires_grad_(False)
|
| 87 |
text_encoder_two.requires_grad_(False)
|
| 88 |
|
| 89 |
-
#
|
| 90 |
-
tensor_transfrom = transforms.Compose(
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
]
|
| 95 |
-
)
|
| 96 |
|
| 97 |
-
# Configuration du pipeline Tryon
|
| 98 |
pipe = TryonPipeline.from_pretrained(
|
| 99 |
-
'yisol/IDM-VTON',
|
| 100 |
unet=unet,
|
| 101 |
vae=vae,
|
| 102 |
feature_extractor=CLIPImageProcessor(),
|
|
@@ -106,11 +98,8 @@ pipe = TryonPipeline.from_pretrained(
|
|
| 106 |
tokenizer_2=tokenizer_two,
|
| 107 |
scheduler=noise_scheduler,
|
| 108 |
image_encoder=image_encoder,
|
| 109 |
-
torch_dtype=torch.float16
|
| 110 |
-
force_download=False
|
| 111 |
)
|
| 112 |
-
|
| 113 |
-
# Ajout du UNet Encoder dans le pipeline
|
| 114 |
pipe.unet_encoder = UNet_Encoder
|
| 115 |
|
| 116 |
def pil_to_binary_mask(pil_image, threshold=0):
|
|
@@ -259,7 +248,7 @@ def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denois
|
|
| 259 |
ip_adapter_image=garm_img.resize((768, 1024)),
|
| 260 |
guidance_scale=2.0,
|
| 261 |
)[0]
|
| 262 |
-
|
| 263 |
if is_checked_crop:
|
| 264 |
out_img = images[0].resize(crop_size)
|
| 265 |
human_img_orig.paste(out_img, (int(left), int(top)))
|
|
@@ -295,7 +284,7 @@ def tryon():
|
|
| 295 |
'layers': [human_image] if not use_auto_mask else None,
|
| 296 |
'composite': None
|
| 297 |
}
|
| 298 |
-
clear_gpu_memory()
|
| 299 |
|
| 300 |
output_image, mask_image = start_tryon(human_dict, garment_image, description, use_auto_mask, use_auto_crop, denoise_steps, seed , categorie)
|
| 301 |
|
|
|
|
| 1 |
import os
|
| 2 |
+
from flask import Flask, request, jsonify, send_file
|
| 3 |
from PIL import Image
|
| 4 |
from io import BytesIO
|
| 5 |
import torch
|
| 6 |
import base64
|
| 7 |
+
import io
|
| 8 |
import logging
|
| 9 |
import gradio as gr
|
| 10 |
import numpy as np
|
|
|
|
| 33 |
|
| 34 |
app = Flask(__name__)
|
| 35 |
|
| 36 |
+
base_path = 'yisol/IDM-VTON'
|
|
|
|
|
|
|
| 37 |
|
| 38 |
+
# Téléchargez les fichiers nécessaires via huggingface_hub
|
| 39 |
+
def download_model_files(base_path, filename):
|
| 40 |
+
return hf_hub_download(repo_id=base_path, filename=filename)
|
| 41 |
+
|
| 42 |
+
# Téléchargement et chargement des fichiers de modèle
|
| 43 |
+
unet_path = download_model_files(base_path, "unet/pytorch_model.bin")
|
| 44 |
unet = UNet2DConditionModel.from_pretrained(
|
| 45 |
+
unet_path,
|
| 46 |
+
torch_dtype=torch.float16
|
|
|
|
| 47 |
)
|
| 48 |
unet.requires_grad_(False)
|
| 49 |
|
| 50 |
+
tokenizer_one_path = download_model_files(base_path, "tokenizer/config.json")
|
| 51 |
+
tokenizer_one = AutoTokenizer.from_pretrained(tokenizer_one_path, use_fast=False)
|
|
|
|
| 52 |
|
| 53 |
+
tokenizer_two_path = download_model_files(base_path, "tokenizer_2/config.json")
|
| 54 |
+
tokenizer_two = AutoTokenizer.from_pretrained(tokenizer_two_path, use_fast=False)
|
| 55 |
|
| 56 |
+
noise_scheduler_path = download_model_files(base_path, "scheduler/scheduler_config.json")
|
| 57 |
+
noise_scheduler = DDPMScheduler.from_pretrained(noise_scheduler_path)
|
|
|
|
| 58 |
|
| 59 |
+
text_encoder_one_path = download_model_files(base_path, "text_encoder/pytorch_model.bin")
|
| 60 |
+
text_encoder_one = CLIPTextModel.from_pretrained(text_encoder_one_path, torch_dtype=torch.float16)
|
|
|
|
| 61 |
|
| 62 |
+
text_encoder_two_path = download_model_files(base_path, "text_encoder_2/pytorch_model.bin")
|
| 63 |
+
text_encoder_two = CLIPTextModelWithProjection.from_pretrained(text_encoder_two_path, torch_dtype=torch.float16)
|
| 64 |
|
| 65 |
+
image_encoder_path = download_model_files(base_path, "image_encoder/pytorch_model.bin")
|
| 66 |
+
image_encoder = CLIPVisionModelWithProjection.from_pretrained(image_encoder_path, torch_dtype=torch.float16)
|
|
|
|
| 67 |
|
| 68 |
+
vae_path = download_model_files(base_path, "vae/pytorch_model.bin")
|
| 69 |
+
vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=torch.float16)
|
|
|
|
| 70 |
|
| 71 |
+
unet_encoder_path = download_model_files(base_path, "unet_encoder/pytorch_model.bin")
|
| 72 |
+
UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(unet_encoder_path, torch_dtype=torch.float16)
|
|
|
|
| 73 |
|
|
|
|
| 74 |
parsing_model = Parsing(0)
|
| 75 |
openpose_model = OpenPose(0)
|
| 76 |
|
| 77 |
+
# Définir les modèles comme non entraînables
|
| 78 |
UNet_Encoder.requires_grad_(False)
|
| 79 |
image_encoder.requires_grad_(False)
|
| 80 |
vae.requires_grad_(False)
|
|
|
|
| 82 |
text_encoder_one.requires_grad_(False)
|
| 83 |
text_encoder_two.requires_grad_(False)
|
| 84 |
|
| 85 |
+
# Autres transformations et pipeline
|
| 86 |
+
tensor_transfrom = transforms.Compose([
|
| 87 |
+
transforms.ToTensor(),
|
| 88 |
+
transforms.Normalize([0.5], [0.5]),
|
| 89 |
+
])
|
|
|
|
|
|
|
| 90 |
|
|
|
|
| 91 |
pipe = TryonPipeline.from_pretrained(
|
|
|
|
| 92 |
unet=unet,
|
| 93 |
vae=vae,
|
| 94 |
feature_extractor=CLIPImageProcessor(),
|
|
|
|
| 98 |
tokenizer_2=tokenizer_two,
|
| 99 |
scheduler=noise_scheduler,
|
| 100 |
image_encoder=image_encoder,
|
| 101 |
+
torch_dtype=torch.float16
|
|
|
|
| 102 |
)
|
|
|
|
|
|
|
| 103 |
pipe.unet_encoder = UNet_Encoder
|
| 104 |
|
| 105 |
def pil_to_binary_mask(pil_image, threshold=0):
|
|
|
|
| 248 |
ip_adapter_image=garm_img.resize((768, 1024)),
|
| 249 |
guidance_scale=2.0,
|
| 250 |
)[0]
|
| 251 |
+
|
| 252 |
if is_checked_crop:
|
| 253 |
out_img = images[0].resize(crop_size)
|
| 254 |
human_img_orig.paste(out_img, (int(left), int(top)))
|
|
|
|
| 284 |
'layers': [human_image] if not use_auto_mask else None,
|
| 285 |
'composite': None
|
| 286 |
}
|
| 287 |
+
#clear_gpu_memory()
|
| 288 |
|
| 289 |
output_image, mask_image = start_tryon(human_dict, garment_image, description, use_auto_mask, use_auto_crop, denoise_steps, seed , categorie)
|
| 290 |
|