Update app.py
Browse files
app.py
CHANGED
@@ -1,17 +1,17 @@
|
|
|
|
1 |
import os
|
2 |
-
from flask import Flask, request, jsonify,
|
3 |
from PIL import Image
|
4 |
from io import BytesIO
|
5 |
import torch
|
6 |
import base64
|
7 |
-
import io
|
8 |
import logging
|
9 |
import gradio as gr
|
10 |
import numpy as np
|
11 |
import spaces
|
12 |
import uuid
|
13 |
import random
|
14 |
-
from huggingface_hub import hf_hub_download
|
15 |
from src.tryon_pipeline import StableDiffusionXLInpaintPipeline as TryonPipeline
|
16 |
from src.unet_hacked_garmnet import UNet2DConditionModel as UNet2DConditionModel_ref
|
17 |
from src.unet_hacked_tryon import UNet2DConditionModel
|
@@ -34,71 +34,91 @@ from torchvision.transforms.functional import to_pil_image
|
|
34 |
app = Flask(__name__)
|
35 |
|
36 |
base_path = 'yisol/IDM-VTON'
|
|
|
37 |
|
38 |
-
# Téléchargez les fichiers nécessaires via huggingface_hub
|
39 |
-
def download_model_files(base_path, filename):
|
40 |
-
return hf_hub_download(repo_id=base_path, filename=filename)
|
41 |
-
|
42 |
-
# Téléchargement et chargement des fichiers de modèle
|
43 |
-
unet_path = download_model_files(base_path, "unet/pytorch_model.bin")
|
44 |
unet = UNet2DConditionModel.from_pretrained(
|
45 |
-
|
46 |
-
|
|
|
|
|
47 |
)
|
48 |
unet.requires_grad_(False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
vae
|
|
|
|
|
|
|
70 |
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
73 |
|
74 |
parsing_model = Parsing(0)
|
75 |
openpose_model = OpenPose(0)
|
76 |
|
77 |
-
# Définir les modèles comme non entraînables
|
78 |
UNet_Encoder.requires_grad_(False)
|
79 |
image_encoder.requires_grad_(False)
|
80 |
vae.requires_grad_(False)
|
81 |
unet.requires_grad_(False)
|
82 |
text_encoder_one.requires_grad_(False)
|
83 |
text_encoder_two.requires_grad_(False)
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
|
91 |
pipe = TryonPipeline.from_pretrained(
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
|
|
102 |
)
|
103 |
pipe.unet_encoder = UNet_Encoder
|
104 |
|
@@ -284,7 +304,7 @@ def tryon():
|
|
284 |
'layers': [human_image] if not use_auto_mask else None,
|
285 |
'composite': None
|
286 |
}
|
287 |
-
|
288 |
|
289 |
output_image, mask_image = start_tryon(human_dict, garment_image, description, use_auto_mask, use_auto_crop, denoise_steps, seed , categorie)
|
290 |
|
|
|
1 |
+
|
2 |
import os
|
3 |
+
from flask import Flask, request, jsonify,send_file
|
4 |
from PIL import Image
|
5 |
from io import BytesIO
|
6 |
import torch
|
7 |
import base64
|
8 |
+
import io
|
9 |
import logging
|
10 |
import gradio as gr
|
11 |
import numpy as np
|
12 |
import spaces
|
13 |
import uuid
|
14 |
import random
|
|
|
15 |
from src.tryon_pipeline import StableDiffusionXLInpaintPipeline as TryonPipeline
|
16 |
from src.unet_hacked_garmnet import UNet2DConditionModel as UNet2DConditionModel_ref
|
17 |
from src.unet_hacked_tryon import UNet2DConditionModel
|
|
|
34 |
app = Flask(__name__)
|
35 |
|
36 |
base_path = 'yisol/IDM-VTON'
|
37 |
+
example_path = os.path.join(os.path.dirname(__file__), 'example')
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
unet = UNet2DConditionModel.from_pretrained(
|
40 |
+
base_path,
|
41 |
+
subfolder="unet",
|
42 |
+
torch_dtype=torch.float16,
|
43 |
+
force_download=False
|
44 |
)
|
45 |
unet.requires_grad_(False)
|
46 |
+
tokenizer_one = AutoTokenizer.from_pretrained(
|
47 |
+
base_path,
|
48 |
+
subfolder="tokenizer",
|
49 |
+
revision=None,
|
50 |
+
use_fast=False,
|
51 |
+
force_download=False
|
52 |
+
)
|
53 |
+
tokenizer_two = AutoTokenizer.from_pretrained(
|
54 |
+
base_path,
|
55 |
+
subfolder="tokenizer_2",
|
56 |
+
revision=None,
|
57 |
+
use_fast=False,
|
58 |
+
force_download=False
|
59 |
+
)
|
60 |
+
noise_scheduler = DDPMScheduler.from_pretrained(base_path, subfolder="scheduler")
|
61 |
|
62 |
+
text_encoder_one = CLIPTextModel.from_pretrained(
|
63 |
+
base_path,
|
64 |
+
subfolder="text_encoder",
|
65 |
+
torch_dtype=torch.float16,
|
66 |
+
force_download=False
|
67 |
+
)
|
68 |
+
text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
|
69 |
+
base_path,
|
70 |
+
subfolder="text_encoder_2",
|
71 |
+
torch_dtype=torch.float16,
|
72 |
+
force_download=False
|
73 |
+
)
|
74 |
+
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
|
75 |
+
base_path,
|
76 |
+
subfolder="image_encoder",
|
77 |
+
torch_dtype=torch.float16,
|
78 |
+
force_download=False
|
79 |
+
)
|
80 |
+
vae = AutoencoderKL.from_pretrained(base_path,
|
81 |
+
subfolder="vae",
|
82 |
+
torch_dtype=torch.float16,
|
83 |
+
force_download=False
|
84 |
+
)
|
85 |
|
86 |
+
UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(
|
87 |
+
base_path,
|
88 |
+
subfolder="unet_encoder",
|
89 |
+
torch_dtype=torch.float16,
|
90 |
+
force_download=False
|
91 |
+
)
|
92 |
|
93 |
parsing_model = Parsing(0)
|
94 |
openpose_model = OpenPose(0)
|
95 |
|
|
|
96 |
UNet_Encoder.requires_grad_(False)
|
97 |
image_encoder.requires_grad_(False)
|
98 |
vae.requires_grad_(False)
|
99 |
unet.requires_grad_(False)
|
100 |
text_encoder_one.requires_grad_(False)
|
101 |
text_encoder_two.requires_grad_(False)
|
102 |
+
tensor_transfrom = transforms.Compose(
|
103 |
+
[
|
104 |
+
transforms.ToTensor(),
|
105 |
+
transforms.Normalize([0.5], [0.5]),
|
106 |
+
]
|
107 |
+
)
|
108 |
|
109 |
pipe = TryonPipeline.from_pretrained(
|
110 |
+
base_path,
|
111 |
+
unet=unet,
|
112 |
+
vae=vae,
|
113 |
+
feature_extractor= CLIPImageProcessor(),
|
114 |
+
text_encoder = text_encoder_one,
|
115 |
+
text_encoder_2 = text_encoder_two,
|
116 |
+
tokenizer = tokenizer_one,
|
117 |
+
tokenizer_2 = tokenizer_two,
|
118 |
+
scheduler = noise_scheduler,
|
119 |
+
image_encoder=image_encoder,
|
120 |
+
torch_dtype=torch.float16,
|
121 |
+
force_download=False
|
122 |
)
|
123 |
pipe.unet_encoder = UNet_Encoder
|
124 |
|
|
|
304 |
'layers': [human_image] if not use_auto_mask else None,
|
305 |
'composite': None
|
306 |
}
|
307 |
+
clear_gpu_memory()
|
308 |
|
309 |
output_image, mask_image = start_tryon(human_dict, garment_image, description, use_auto_mask, use_auto_crop, denoise_steps, seed , categorie)
|
310 |
|