Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
import os
|
2 |
-
from flask import Flask, request, jsonify
|
3 |
from PIL import Image
|
4 |
from io import BytesIO
|
5 |
import torch
|
6 |
import base64
|
7 |
-
import io
|
8 |
import logging
|
9 |
import gradio as gr
|
10 |
import numpy as np
|
@@ -33,52 +33,48 @@ from torchvision.transforms.functional import to_pil_image
|
|
33 |
|
34 |
app = Flask(__name__)
|
35 |
|
36 |
-
|
37 |
-
unet_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='unet', filename='pytorch_model.bin')
|
38 |
-
unet_config_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='unet', filename='config.json')
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
unet = UNet2DConditionModel.from_pretrained(
|
41 |
-
|
42 |
-
torch_dtype=torch.float16
|
43 |
-
force_download=False
|
44 |
)
|
45 |
unet.requires_grad_(False)
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
tokenizer_two_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='tokenizer_2', filename='tokenizer.json')
|
50 |
|
51 |
-
|
52 |
-
tokenizer_two = AutoTokenizer.from_pretrained(
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
noise_scheduler = DDPMScheduler.from_pretrained(os.path.dirname(noise_scheduler_path))
|
57 |
|
58 |
-
|
59 |
-
|
60 |
-
text_encoder_one = CLIPTextModel.from_pretrained(os.path.dirname(text_encoder_one_path), torch_dtype=torch.float16)
|
61 |
|
62 |
-
text_encoder_two_path =
|
63 |
-
text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
image_encoder = CLIPVisionModelWithProjection.from_pretrained(os.path.dirname(image_encoder_path), torch_dtype=torch.float16)
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
vae = AutoencoderKL.from_pretrained(os.path.dirname(vae_path), torch_dtype=torch.float16)
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(os.path.dirname(unet_encoder_path), torch_dtype=torch.float16)
|
76 |
|
77 |
-
# Initialisation des autres modèles (parsing et openpose)
|
78 |
parsing_model = Parsing(0)
|
79 |
openpose_model = OpenPose(0)
|
80 |
|
81 |
-
#
|
82 |
UNet_Encoder.requires_grad_(False)
|
83 |
image_encoder.requires_grad_(False)
|
84 |
vae.requires_grad_(False)
|
@@ -86,17 +82,13 @@ unet.requires_grad_(False)
|
|
86 |
text_encoder_one.requires_grad_(False)
|
87 |
text_encoder_two.requires_grad_(False)
|
88 |
|
89 |
-
#
|
90 |
-
tensor_transfrom = transforms.Compose(
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
]
|
95 |
-
)
|
96 |
|
97 |
-
# Configuration du pipeline Tryon
|
98 |
pipe = TryonPipeline.from_pretrained(
|
99 |
-
'yisol/IDM-VTON',
|
100 |
unet=unet,
|
101 |
vae=vae,
|
102 |
feature_extractor=CLIPImageProcessor(),
|
@@ -106,11 +98,8 @@ pipe = TryonPipeline.from_pretrained(
|
|
106 |
tokenizer_2=tokenizer_two,
|
107 |
scheduler=noise_scheduler,
|
108 |
image_encoder=image_encoder,
|
109 |
-
torch_dtype=torch.float16
|
110 |
-
force_download=False
|
111 |
)
|
112 |
-
|
113 |
-
# Ajout du UNet Encoder dans le pipeline
|
114 |
pipe.unet_encoder = UNet_Encoder
|
115 |
|
116 |
def pil_to_binary_mask(pil_image, threshold=0):
|
@@ -259,7 +248,7 @@ def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denois
|
|
259 |
ip_adapter_image=garm_img.resize((768, 1024)),
|
260 |
guidance_scale=2.0,
|
261 |
)[0]
|
262 |
-
|
263 |
if is_checked_crop:
|
264 |
out_img = images[0].resize(crop_size)
|
265 |
human_img_orig.paste(out_img, (int(left), int(top)))
|
@@ -295,7 +284,7 @@ def tryon():
|
|
295 |
'layers': [human_image] if not use_auto_mask else None,
|
296 |
'composite': None
|
297 |
}
|
298 |
-
clear_gpu_memory()
|
299 |
|
300 |
output_image, mask_image = start_tryon(human_dict, garment_image, description, use_auto_mask, use_auto_crop, denoise_steps, seed , categorie)
|
301 |
|
|
|
1 |
import os
|
2 |
+
from flask import Flask, request, jsonify, send_file
|
3 |
from PIL import Image
|
4 |
from io import BytesIO
|
5 |
import torch
|
6 |
import base64
|
7 |
+
import io
|
8 |
import logging
|
9 |
import gradio as gr
|
10 |
import numpy as np
|
|
|
33 |
|
34 |
app = Flask(__name__)
|
35 |
|
36 |
+
base_path = 'yisol/IDM-VTON'
|
|
|
|
|
37 |
|
38 |
+
# Téléchargez les fichiers nécessaires via huggingface_hub
|
39 |
+
def download_model_files(base_path, filename):
|
40 |
+
return hf_hub_download(repo_id=base_path, filename=filename)
|
41 |
+
|
42 |
+
# Téléchargement et chargement des fichiers de modèle
|
43 |
+
unet_path = download_model_files(base_path, "unet/pytorch_model.bin")
|
44 |
unet = UNet2DConditionModel.from_pretrained(
|
45 |
+
unet_path,
|
46 |
+
torch_dtype=torch.float16
|
|
|
47 |
)
|
48 |
unet.requires_grad_(False)
|
49 |
|
50 |
+
tokenizer_one_path = download_model_files(base_path, "tokenizer/config.json")
|
51 |
+
tokenizer_one = AutoTokenizer.from_pretrained(tokenizer_one_path, use_fast=False)
|
|
|
52 |
|
53 |
+
tokenizer_two_path = download_model_files(base_path, "tokenizer_2/config.json")
|
54 |
+
tokenizer_two = AutoTokenizer.from_pretrained(tokenizer_two_path, use_fast=False)
|
55 |
|
56 |
+
noise_scheduler_path = download_model_files(base_path, "scheduler/scheduler_config.json")
|
57 |
+
noise_scheduler = DDPMScheduler.from_pretrained(noise_scheduler_path)
|
|
|
58 |
|
59 |
+
text_encoder_one_path = download_model_files(base_path, "text_encoder/pytorch_model.bin")
|
60 |
+
text_encoder_one = CLIPTextModel.from_pretrained(text_encoder_one_path, torch_dtype=torch.float16)
|
|
|
61 |
|
62 |
+
text_encoder_two_path = download_model_files(base_path, "text_encoder_2/pytorch_model.bin")
|
63 |
+
text_encoder_two = CLIPTextModelWithProjection.from_pretrained(text_encoder_two_path, torch_dtype=torch.float16)
|
64 |
|
65 |
+
image_encoder_path = download_model_files(base_path, "image_encoder/pytorch_model.bin")
|
66 |
+
image_encoder = CLIPVisionModelWithProjection.from_pretrained(image_encoder_path, torch_dtype=torch.float16)
|
|
|
67 |
|
68 |
+
vae_path = download_model_files(base_path, "vae/pytorch_model.bin")
|
69 |
+
vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=torch.float16)
|
|
|
70 |
|
71 |
+
unet_encoder_path = download_model_files(base_path, "unet_encoder/pytorch_model.bin")
|
72 |
+
UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(unet_encoder_path, torch_dtype=torch.float16)
|
|
|
73 |
|
|
|
74 |
parsing_model = Parsing(0)
|
75 |
openpose_model = OpenPose(0)
|
76 |
|
77 |
+
# Définir les modèles comme non entraînables
|
78 |
UNet_Encoder.requires_grad_(False)
|
79 |
image_encoder.requires_grad_(False)
|
80 |
vae.requires_grad_(False)
|
|
|
82 |
text_encoder_one.requires_grad_(False)
|
83 |
text_encoder_two.requires_grad_(False)
|
84 |
|
85 |
+
# Autres transformations et pipeline
|
86 |
+
tensor_transfrom = transforms.Compose([
|
87 |
+
transforms.ToTensor(),
|
88 |
+
transforms.Normalize([0.5], [0.5]),
|
89 |
+
])
|
|
|
|
|
90 |
|
|
|
91 |
pipe = TryonPipeline.from_pretrained(
|
|
|
92 |
unet=unet,
|
93 |
vae=vae,
|
94 |
feature_extractor=CLIPImageProcessor(),
|
|
|
98 |
tokenizer_2=tokenizer_two,
|
99 |
scheduler=noise_scheduler,
|
100 |
image_encoder=image_encoder,
|
101 |
+
torch_dtype=torch.float16
|
|
|
102 |
)
|
|
|
|
|
103 |
pipe.unet_encoder = UNet_Encoder
|
104 |
|
105 |
def pil_to_binary_mask(pil_image, threshold=0):
|
|
|
248 |
ip_adapter_image=garm_img.resize((768, 1024)),
|
249 |
guidance_scale=2.0,
|
250 |
)[0]
|
251 |
+
|
252 |
if is_checked_crop:
|
253 |
out_img = images[0].resize(crop_size)
|
254 |
human_img_orig.paste(out_img, (int(left), int(top)))
|
|
|
284 |
'layers': [human_image] if not use_auto_mask else None,
|
285 |
'composite': None
|
286 |
}
|
287 |
+
#clear_gpu_memory()
|
288 |
|
289 |
output_image, mask_image = start_tryon(human_dict, garment_image, description, use_auto_mask, use_auto_crop, denoise_steps, seed , categorie)
|
290 |
|