Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
|
2 |
import os
|
3 |
-
from flask import Flask, request, jsonify
|
4 |
from PIL import Image
|
5 |
from io import BytesIO
|
6 |
import torch
|
@@ -34,95 +34,84 @@ from torchvision.transforms.functional import to_pil_image
|
|
34 |
|
35 |
app = Flask(__name__)
|
36 |
|
37 |
-
#
|
38 |
-
|
39 |
-
|
40 |
-
example_path = os.path.join(os.path.dirname(__file__), 'example')
|
41 |
|
42 |
unet = UNet2DConditionModel.from_pretrained(
|
43 |
-
|
44 |
-
subfolder="unet",
|
45 |
torch_dtype=torch.float16,
|
46 |
force_download=False
|
47 |
)
|
48 |
unet.requires_grad_(False)
|
49 |
-
tokenizer_one = AutoTokenizer.from_pretrained(
|
50 |
-
base_path,
|
51 |
-
subfolder="tokenizer",
|
52 |
-
revision=None,
|
53 |
-
use_fast=False,
|
54 |
-
force_download=False
|
55 |
-
)
|
56 |
-
tokenizer_two = AutoTokenizer.from_pretrained(
|
57 |
-
base_path,
|
58 |
-
subfolder="tokenizer_2",
|
59 |
-
revision=None,
|
60 |
-
use_fast=False,
|
61 |
-
force_download=False
|
62 |
-
)
|
63 |
-
noise_scheduler = DDPMScheduler.from_pretrained(base_path, subfolder="scheduler")
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
torch_dtype=torch.float16,
|
69 |
-
force_download=False
|
70 |
-
)
|
71 |
-
text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
|
72 |
-
base_path,
|
73 |
-
subfolder="text_encoder_2",
|
74 |
-
torch_dtype=torch.float16,
|
75 |
-
force_download=False
|
76 |
-
)
|
77 |
-
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
|
78 |
-
base_path,
|
79 |
-
subfolder="image_encoder",
|
80 |
-
torch_dtype=torch.float16,
|
81 |
-
force_download=False
|
82 |
-
)
|
83 |
-
vae = AutoencoderKL.from_pretrained(base_path,
|
84 |
-
subfolder="vae",
|
85 |
-
torch_dtype=torch.float16,
|
86 |
-
force_download=False
|
87 |
-
)
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
|
|
|
|
|
|
|
|
|
|
96 |
parsing_model = Parsing(0)
|
97 |
openpose_model = OpenPose(0)
|
98 |
|
|
|
99 |
UNet_Encoder.requires_grad_(False)
|
100 |
image_encoder.requires_grad_(False)
|
101 |
vae.requires_grad_(False)
|
102 |
unet.requires_grad_(False)
|
103 |
text_encoder_one.requires_grad_(False)
|
104 |
text_encoder_two.requires_grad_(False)
|
|
|
|
|
105 |
tensor_transfrom = transforms.Compose(
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
|
|
|
112 |
pipe = TryonPipeline.from_pretrained(
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
)
|
|
|
|
|
126 |
pipe.unet_encoder = UNet_Encoder
|
127 |
|
128 |
def pil_to_binary_mask(pil_image, threshold=0):
|
@@ -271,7 +260,7 @@ def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denois
|
|
271 |
ip_adapter_image=garm_img.resize((768, 1024)),
|
272 |
guidance_scale=2.0,
|
273 |
)[0]
|
274 |
-
|
275 |
if is_checked_crop:
|
276 |
out_img = images[0].resize(crop_size)
|
277 |
human_img_orig.paste(out_img, (int(left), int(top)))
|
@@ -307,7 +296,7 @@ def tryon():
|
|
307 |
'layers': [human_image] if not use_auto_mask else None,
|
308 |
'composite': None
|
309 |
}
|
310 |
-
|
311 |
|
312 |
output_image, mask_image = start_tryon(human_dict, garment_image, description, use_auto_mask, use_auto_crop, denoise_steps, seed , categorie)
|
313 |
|
|
|
1 |
|
2 |
import os
|
3 |
+
from flask import Flask, request, jsonify
|
4 |
from PIL import Image
|
5 |
from io import BytesIO
|
6 |
import torch
|
|
|
34 |
|
35 |
app = Flask(__name__)
|
36 |
|
37 |
+
# Téléchargement du modèle UNet
|
38 |
+
unet_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='unet', filename='pytorch_model.bin')
|
39 |
+
unet_config_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='unet', filename='config.json')
|
|
|
40 |
|
41 |
unet = UNet2DConditionModel.from_pretrained(
|
42 |
+
pretrained_model_name_or_path=os.path.dirname(unet_path),
|
|
|
43 |
torch_dtype=torch.float16,
|
44 |
force_download=False
|
45 |
)
|
46 |
unet.requires_grad_(False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
+
# Téléchargement des tokenizers
|
49 |
+
tokenizer_one_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='tokenizer', filename='tokenizer.json')
|
50 |
+
tokenizer_two_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='tokenizer_2', filename='tokenizer.json')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
+
tokenizer_one = AutoTokenizer.from_pretrained(os.path.dirname(tokenizer_one_path), use_fast=False)
|
53 |
+
tokenizer_two = AutoTokenizer.from_pretrained(os.path.dirname(tokenizer_two_path), use_fast=False)
|
54 |
+
|
55 |
+
# Téléchargement du noise scheduler
|
56 |
+
noise_scheduler_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='scheduler', filename='scheduler.json')
|
57 |
+
noise_scheduler = DDPMScheduler.from_pretrained(os.path.dirname(noise_scheduler_path))
|
58 |
+
|
59 |
+
# Téléchargement et chargement des text encoders
|
60 |
+
text_encoder_one_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='text_encoder', filename='pytorch_model.bin')
|
61 |
+
text_encoder_one = CLIPTextModel.from_pretrained(os.path.dirname(text_encoder_one_path), torch_dtype=torch.float16)
|
62 |
+
|
63 |
+
text_encoder_two_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='text_encoder_2', filename='pytorch_model.bin')
|
64 |
+
text_encoder_two = CLIPTextModelWithProjection.from_pretrained(os.path.dirname(text_encoder_two_path), torch_dtype=torch.float16)
|
65 |
+
|
66 |
+
# Téléchargement et chargement de l'image encoder
|
67 |
+
image_encoder_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='image_encoder', filename='pytorch_model.bin')
|
68 |
+
image_encoder = CLIPVisionModelWithProjection.from_pretrained(os.path.dirname(image_encoder_path), torch_dtype=torch.float16)
|
69 |
+
|
70 |
+
# Téléchargement et chargement du VAE
|
71 |
+
vae_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='vae', filename='pytorch_model.bin')
|
72 |
+
vae = AutoencoderKL.from_pretrained(os.path.dirname(vae_path), torch_dtype=torch.float16)
|
73 |
|
74 |
+
# Téléchargement et chargement de l'UNet Encoder
|
75 |
+
unet_encoder_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='unet_encoder', filename='pytorch_model.bin')
|
76 |
+
UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(os.path.dirname(unet_encoder_path), torch_dtype=torch.float16)
|
77 |
+
|
78 |
+
# Initialisation des autres modèles (parsing et openpose)
|
79 |
parsing_model = Parsing(0)
|
80 |
openpose_model = OpenPose(0)
|
81 |
|
82 |
+
# Désactivation des gradients
|
83 |
UNet_Encoder.requires_grad_(False)
|
84 |
image_encoder.requires_grad_(False)
|
85 |
vae.requires_grad_(False)
|
86 |
unet.requires_grad_(False)
|
87 |
text_encoder_one.requires_grad_(False)
|
88 |
text_encoder_two.requires_grad_(False)
|
89 |
+
|
90 |
+
# Transformation des tensors
|
91 |
tensor_transfrom = transforms.Compose(
|
92 |
+
[
|
93 |
+
transforms.ToTensor(),
|
94 |
+
transforms.Normalize([0.5], [0.5]),
|
95 |
+
]
|
96 |
+
)
|
97 |
|
98 |
+
# Configuration du pipeline Tryon
|
99 |
pipe = TryonPipeline.from_pretrained(
|
100 |
+
'yisol/IDM-VTON',
|
101 |
+
unet=unet,
|
102 |
+
vae=vae,
|
103 |
+
feature_extractor=CLIPImageProcessor(),
|
104 |
+
text_encoder=text_encoder_one,
|
105 |
+
text_encoder_2=text_encoder_two,
|
106 |
+
tokenizer=tokenizer_one,
|
107 |
+
tokenizer_2=tokenizer_two,
|
108 |
+
scheduler=noise_scheduler,
|
109 |
+
image_encoder=image_encoder,
|
110 |
+
torch_dtype=torch.float16,
|
111 |
+
force_download=False
|
112 |
)
|
113 |
+
|
114 |
+
# Ajout du UNet Encoder dans le pipeline
|
115 |
pipe.unet_encoder = UNet_Encoder
|
116 |
|
117 |
def pil_to_binary_mask(pil_image, threshold=0):
|
|
|
260 |
ip_adapter_image=garm_img.resize((768, 1024)),
|
261 |
guidance_scale=2.0,
|
262 |
)[0]
|
263 |
+
del prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds, generator, images
|
264 |
if is_checked_crop:
|
265 |
out_img = images[0].resize(crop_size)
|
266 |
human_img_orig.paste(out_img, (int(left), int(top)))
|
|
|
296 |
'layers': [human_image] if not use_auto_mask else None,
|
297 |
'composite': None
|
298 |
}
|
299 |
+
clear_gpu_memory()
|
300 |
|
301 |
output_image, mask_image = start_tryon(human_dict, garment_image, description, use_auto_mask, use_auto_crop, denoise_steps, seed , categorie)
|
302 |
|