Spaces:
Saad0KH
/
Running on Zero

Saad0KH commited on
Commit
9c9e9a9
·
verified ·
1 Parent(s): 3e1e1da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -70
app.py CHANGED
@@ -1,6 +1,6 @@
1
 
2
  import os
3
- from flask import Flask, request, jsonify,send_file
4
  from PIL import Image
5
  from io import BytesIO
6
  import torch
@@ -34,95 +34,84 @@ from torchvision.transforms.functional import to_pil_image
34
 
35
  app = Flask(__name__)
36
 
37
- #base_path = 'yisol/IDM-VTON'
38
- base_path = huggingface_hub.hf_hub_download("yisol/IDM-VTON")
39
-
40
- example_path = os.path.join(os.path.dirname(__file__), 'example')
41
 
42
  unet = UNet2DConditionModel.from_pretrained(
43
- base_path,
44
- subfolder="unet",
45
  torch_dtype=torch.float16,
46
  force_download=False
47
  )
48
  unet.requires_grad_(False)
49
- tokenizer_one = AutoTokenizer.from_pretrained(
50
- base_path,
51
- subfolder="tokenizer",
52
- revision=None,
53
- use_fast=False,
54
- force_download=False
55
- )
56
- tokenizer_two = AutoTokenizer.from_pretrained(
57
- base_path,
58
- subfolder="tokenizer_2",
59
- revision=None,
60
- use_fast=False,
61
- force_download=False
62
- )
63
- noise_scheduler = DDPMScheduler.from_pretrained(base_path, subfolder="scheduler")
64
 
65
- text_encoder_one = CLIPTextModel.from_pretrained(
66
- base_path,
67
- subfolder="text_encoder",
68
- torch_dtype=torch.float16,
69
- force_download=False
70
- )
71
- text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
72
- base_path,
73
- subfolder="text_encoder_2",
74
- torch_dtype=torch.float16,
75
- force_download=False
76
- )
77
- image_encoder = CLIPVisionModelWithProjection.from_pretrained(
78
- base_path,
79
- subfolder="image_encoder",
80
- torch_dtype=torch.float16,
81
- force_download=False
82
- )
83
- vae = AutoencoderKL.from_pretrained(base_path,
84
- subfolder="vae",
85
- torch_dtype=torch.float16,
86
- force_download=False
87
- )
88
 
89
- UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(
90
- base_path,
91
- subfolder="unet_encoder",
92
- torch_dtype=torch.float16,
93
- force_download=False
94
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
 
 
 
 
 
96
  parsing_model = Parsing(0)
97
  openpose_model = OpenPose(0)
98
 
 
99
  UNet_Encoder.requires_grad_(False)
100
  image_encoder.requires_grad_(False)
101
  vae.requires_grad_(False)
102
  unet.requires_grad_(False)
103
  text_encoder_one.requires_grad_(False)
104
  text_encoder_two.requires_grad_(False)
 
 
105
  tensor_transfrom = transforms.Compose(
106
- [
107
- transforms.ToTensor(),
108
- transforms.Normalize([0.5], [0.5]),
109
- ]
110
- )
111
 
 
112
  pipe = TryonPipeline.from_pretrained(
113
- base_path,
114
- unet=unet,
115
- vae=vae,
116
- feature_extractor= CLIPImageProcessor(),
117
- text_encoder = text_encoder_one,
118
- text_encoder_2 = text_encoder_two,
119
- tokenizer = tokenizer_one,
120
- tokenizer_2 = tokenizer_two,
121
- scheduler = noise_scheduler,
122
- image_encoder=image_encoder,
123
- torch_dtype=torch.float16,
124
- force_download=False
125
  )
 
 
126
  pipe.unet_encoder = UNet_Encoder
127
 
128
  def pil_to_binary_mask(pil_image, threshold=0):
@@ -271,7 +260,7 @@ def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denois
271
  ip_adapter_image=garm_img.resize((768, 1024)),
272
  guidance_scale=2.0,
273
  )[0]
274
-
275
  if is_checked_crop:
276
  out_img = images[0].resize(crop_size)
277
  human_img_orig.paste(out_img, (int(left), int(top)))
@@ -307,7 +296,7 @@ def tryon():
307
  'layers': [human_image] if not use_auto_mask else None,
308
  'composite': None
309
  }
310
- #clear_gpu_memory()
311
 
312
  output_image, mask_image = start_tryon(human_dict, garment_image, description, use_auto_mask, use_auto_crop, denoise_steps, seed , categorie)
313
 
 
1
 
2
  import os
3
+ from flask import Flask, request, jsonify
4
  from PIL import Image
5
  from io import BytesIO
6
  import torch
 
34
 
35
  app = Flask(__name__)
36
 
37
+ # Téléchargement du modèle UNet
38
+ unet_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='unet', filename='pytorch_model.bin')
39
+ unet_config_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='unet', filename='config.json')
 
40
 
41
  unet = UNet2DConditionModel.from_pretrained(
42
+ pretrained_model_name_or_path=os.path.dirname(unet_path),
 
43
  torch_dtype=torch.float16,
44
  force_download=False
45
  )
46
  unet.requires_grad_(False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ # Téléchargement des tokenizers
49
+ tokenizer_one_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='tokenizer', filename='tokenizer.json')
50
+ tokenizer_two_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='tokenizer_2', filename='tokenizer.json')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
+ tokenizer_one = AutoTokenizer.from_pretrained(os.path.dirname(tokenizer_one_path), use_fast=False)
53
+ tokenizer_two = AutoTokenizer.from_pretrained(os.path.dirname(tokenizer_two_path), use_fast=False)
54
+
55
+ # Téléchargement du noise scheduler
56
+ noise_scheduler_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='scheduler', filename='scheduler.json')
57
+ noise_scheduler = DDPMScheduler.from_pretrained(os.path.dirname(noise_scheduler_path))
58
+
59
+ # Téléchargement et chargement des text encoders
60
+ text_encoder_one_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='text_encoder', filename='pytorch_model.bin')
61
+ text_encoder_one = CLIPTextModel.from_pretrained(os.path.dirname(text_encoder_one_path), torch_dtype=torch.float16)
62
+
63
+ text_encoder_two_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='text_encoder_2', filename='pytorch_model.bin')
64
+ text_encoder_two = CLIPTextModelWithProjection.from_pretrained(os.path.dirname(text_encoder_two_path), torch_dtype=torch.float16)
65
+
66
+ # Téléchargement et chargement de l'image encoder
67
+ image_encoder_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='image_encoder', filename='pytorch_model.bin')
68
+ image_encoder = CLIPVisionModelWithProjection.from_pretrained(os.path.dirname(image_encoder_path), torch_dtype=torch.float16)
69
+
70
+ # Téléchargement et chargement du VAE
71
+ vae_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='vae', filename='pytorch_model.bin')
72
+ vae = AutoencoderKL.from_pretrained(os.path.dirname(vae_path), torch_dtype=torch.float16)
73
 
74
+ # Téléchargement et chargement de l'UNet Encoder
75
+ unet_encoder_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='unet_encoder', filename='pytorch_model.bin')
76
+ UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(os.path.dirname(unet_encoder_path), torch_dtype=torch.float16)
77
+
78
+ # Initialisation des autres modèles (parsing et openpose)
79
  parsing_model = Parsing(0)
80
  openpose_model = OpenPose(0)
81
 
82
+ # Désactivation des gradients
83
  UNet_Encoder.requires_grad_(False)
84
  image_encoder.requires_grad_(False)
85
  vae.requires_grad_(False)
86
  unet.requires_grad_(False)
87
  text_encoder_one.requires_grad_(False)
88
  text_encoder_two.requires_grad_(False)
89
+
90
+ # Transformation des tensors
91
  tensor_transfrom = transforms.Compose(
92
+ [
93
+ transforms.ToTensor(),
94
+ transforms.Normalize([0.5], [0.5]),
95
+ ]
96
+ )
97
 
98
+ # Configuration du pipeline Tryon
99
  pipe = TryonPipeline.from_pretrained(
100
+ 'yisol/IDM-VTON',
101
+ unet=unet,
102
+ vae=vae,
103
+ feature_extractor=CLIPImageProcessor(),
104
+ text_encoder=text_encoder_one,
105
+ text_encoder_2=text_encoder_two,
106
+ tokenizer=tokenizer_one,
107
+ tokenizer_2=tokenizer_two,
108
+ scheduler=noise_scheduler,
109
+ image_encoder=image_encoder,
110
+ torch_dtype=torch.float16,
111
+ force_download=False
112
  )
113
+
114
+ # Ajout du UNet Encoder dans le pipeline
115
  pipe.unet_encoder = UNet_Encoder
116
 
117
  def pil_to_binary_mask(pil_image, threshold=0):
 
260
  ip_adapter_image=garm_img.resize((768, 1024)),
261
  guidance_scale=2.0,
262
  )[0]
263
+ del prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds, generator, images
264
  if is_checked_crop:
265
  out_img = images[0].resize(crop_size)
266
  human_img_orig.paste(out_img, (int(left), int(top)))
 
296
  'layers': [human_image] if not use_auto_mask else None,
297
  'composite': None
298
  }
299
+ clear_gpu_memory()
300
 
301
  output_image, mask_image = start_tryon(human_dict, garment_image, description, use_auto_mask, use_auto_crop, denoise_steps, seed , categorie)
302