Spaces:
Saad0KH
/
Running on Zero

Saad0KH commited on
Commit
90382c3
·
verified ·
1 Parent(s): 8658745

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -47
app.py CHANGED
@@ -1,10 +1,10 @@
1
  import os
2
- from flask import Flask, request, jsonify
3
  from PIL import Image
4
  from io import BytesIO
5
  import torch
6
  import base64
7
- import io
8
  import logging
9
  import gradio as gr
10
  import numpy as np
@@ -33,52 +33,48 @@ from torchvision.transforms.functional import to_pil_image
33
 
34
  app = Flask(__name__)
35
 
36
- # Téléchargement du modèle UNet
37
- unet_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='unet', filename='pytorch_model.bin')
38
- unet_config_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='unet', filename='config.json')
39
 
 
 
 
 
 
 
40
  unet = UNet2DConditionModel.from_pretrained(
41
- pretrained_model_name_or_path=os.path.dirname(unet_path),
42
- torch_dtype=torch.float16,
43
- force_download=False
44
  )
45
  unet.requires_grad_(False)
46
 
47
- # Téléchargement des tokenizers
48
- tokenizer_one_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='tokenizer', filename='tokenizer.json')
49
- tokenizer_two_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='tokenizer_2', filename='tokenizer.json')
50
 
51
- tokenizer_one = AutoTokenizer.from_pretrained(os.path.dirname(tokenizer_one_path), use_fast=False)
52
- tokenizer_two = AutoTokenizer.from_pretrained(os.path.dirname(tokenizer_two_path), use_fast=False)
53
 
54
- # Téléchargement du noise scheduler
55
- noise_scheduler_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='scheduler', filename='scheduler.json')
56
- noise_scheduler = DDPMScheduler.from_pretrained(os.path.dirname(noise_scheduler_path))
57
 
58
- # Téléchargement et chargement des text encoders
59
- text_encoder_one_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='text_encoder', filename='pytorch_model.bin')
60
- text_encoder_one = CLIPTextModel.from_pretrained(os.path.dirname(text_encoder_one_path), torch_dtype=torch.float16)
61
 
62
- text_encoder_two_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='text_encoder_2', filename='pytorch_model.bin')
63
- text_encoder_two = CLIPTextModelWithProjection.from_pretrained(os.path.dirname(text_encoder_two_path), torch_dtype=torch.float16)
64
 
65
- # Téléchargement et chargement de l'image encoder
66
- image_encoder_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='image_encoder', filename='pytorch_model.bin')
67
- image_encoder = CLIPVisionModelWithProjection.from_pretrained(os.path.dirname(image_encoder_path), torch_dtype=torch.float16)
68
 
69
- # Téléchargement et chargement du VAE
70
- vae_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='vae', filename='pytorch_model.bin')
71
- vae = AutoencoderKL.from_pretrained(os.path.dirname(vae_path), torch_dtype=torch.float16)
72
 
73
- # Téléchargement et chargement de l'UNet Encoder
74
- unet_encoder_path = hf_hub_download(repo_id='yisol/IDM-VTON', subfolder='unet_encoder', filename='pytorch_model.bin')
75
- UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(os.path.dirname(unet_encoder_path), torch_dtype=torch.float16)
76
 
77
- # Initialisation des autres modèles (parsing et openpose)
78
  parsing_model = Parsing(0)
79
  openpose_model = OpenPose(0)
80
 
81
- # Désactivation des gradients
82
  UNet_Encoder.requires_grad_(False)
83
  image_encoder.requires_grad_(False)
84
  vae.requires_grad_(False)
@@ -86,17 +82,13 @@ unet.requires_grad_(False)
86
  text_encoder_one.requires_grad_(False)
87
  text_encoder_two.requires_grad_(False)
88
 
89
- # Transformation des tensors
90
- tensor_transfrom = transforms.Compose(
91
- [
92
- transforms.ToTensor(),
93
- transforms.Normalize([0.5], [0.5]),
94
- ]
95
- )
96
 
97
- # Configuration du pipeline Tryon
98
  pipe = TryonPipeline.from_pretrained(
99
- 'yisol/IDM-VTON',
100
  unet=unet,
101
  vae=vae,
102
  feature_extractor=CLIPImageProcessor(),
@@ -106,11 +98,8 @@ pipe = TryonPipeline.from_pretrained(
106
  tokenizer_2=tokenizer_two,
107
  scheduler=noise_scheduler,
108
  image_encoder=image_encoder,
109
- torch_dtype=torch.float16,
110
- force_download=False
111
  )
112
-
113
- # Ajout du UNet Encoder dans le pipeline
114
  pipe.unet_encoder = UNet_Encoder
115
 
116
  def pil_to_binary_mask(pil_image, threshold=0):
@@ -259,7 +248,7 @@ def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denois
259
  ip_adapter_image=garm_img.resize((768, 1024)),
260
  guidance_scale=2.0,
261
  )[0]
262
- del prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds, generator, images
263
  if is_checked_crop:
264
  out_img = images[0].resize(crop_size)
265
  human_img_orig.paste(out_img, (int(left), int(top)))
@@ -295,7 +284,7 @@ def tryon():
295
  'layers': [human_image] if not use_auto_mask else None,
296
  'composite': None
297
  }
298
- clear_gpu_memory()
299
 
300
  output_image, mask_image = start_tryon(human_dict, garment_image, description, use_auto_mask, use_auto_crop, denoise_steps, seed , categorie)
301
 
 
1
  import os
2
+ from flask import Flask, request, jsonify, send_file
3
  from PIL import Image
4
  from io import BytesIO
5
  import torch
6
  import base64
7
+ import io
8
  import logging
9
  import gradio as gr
10
  import numpy as np
 
33
 
34
  app = Flask(__name__)
35
 
36
+ base_path = 'yisol/IDM-VTON'
 
 
37
 
38
+ # Téléchargez les fichiers nécessaires via huggingface_hub
39
+ def download_model_files(base_path, filename):
40
+ return hf_hub_download(repo_id=base_path, filename=filename)
41
+
42
+ # Téléchargement et chargement des fichiers de modèle
43
+ unet_path = download_model_files(base_path, "unet/pytorch_model.bin")
44
  unet = UNet2DConditionModel.from_pretrained(
45
+ unet_path,
46
+ torch_dtype=torch.float16
 
47
  )
48
  unet.requires_grad_(False)
49
 
50
+ tokenizer_one_path = download_model_files(base_path, "tokenizer/config.json")
51
+ tokenizer_one = AutoTokenizer.from_pretrained(tokenizer_one_path, use_fast=False)
 
52
 
53
+ tokenizer_two_path = download_model_files(base_path, "tokenizer_2/config.json")
54
+ tokenizer_two = AutoTokenizer.from_pretrained(tokenizer_two_path, use_fast=False)
55
 
56
+ noise_scheduler_path = download_model_files(base_path, "scheduler/scheduler_config.json")
57
+ noise_scheduler = DDPMScheduler.from_pretrained(noise_scheduler_path)
 
58
 
59
+ text_encoder_one_path = download_model_files(base_path, "text_encoder/pytorch_model.bin")
60
+ text_encoder_one = CLIPTextModel.from_pretrained(text_encoder_one_path, torch_dtype=torch.float16)
 
61
 
62
+ text_encoder_two_path = download_model_files(base_path, "text_encoder_2/pytorch_model.bin")
63
+ text_encoder_two = CLIPTextModelWithProjection.from_pretrained(text_encoder_two_path, torch_dtype=torch.float16)
64
 
65
+ image_encoder_path = download_model_files(base_path, "image_encoder/pytorch_model.bin")
66
+ image_encoder = CLIPVisionModelWithProjection.from_pretrained(image_encoder_path, torch_dtype=torch.float16)
 
67
 
68
+ vae_path = download_model_files(base_path, "vae/pytorch_model.bin")
69
+ vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=torch.float16)
 
70
 
71
+ unet_encoder_path = download_model_files(base_path, "unet_encoder/pytorch_model.bin")
72
+ UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(unet_encoder_path, torch_dtype=torch.float16)
 
73
 
 
74
  parsing_model = Parsing(0)
75
  openpose_model = OpenPose(0)
76
 
77
+ # Définir les modèles comme non entraînables
78
  UNet_Encoder.requires_grad_(False)
79
  image_encoder.requires_grad_(False)
80
  vae.requires_grad_(False)
 
82
  text_encoder_one.requires_grad_(False)
83
  text_encoder_two.requires_grad_(False)
84
 
85
+ # Autres transformations et pipeline
86
+ tensor_transfrom = transforms.Compose([
87
+ transforms.ToTensor(),
88
+ transforms.Normalize([0.5], [0.5]),
89
+ ])
 
 
90
 
 
91
  pipe = TryonPipeline.from_pretrained(
 
92
  unet=unet,
93
  vae=vae,
94
  feature_extractor=CLIPImageProcessor(),
 
98
  tokenizer_2=tokenizer_two,
99
  scheduler=noise_scheduler,
100
  image_encoder=image_encoder,
101
+ torch_dtype=torch.float16
 
102
  )
 
 
103
  pipe.unet_encoder = UNet_Encoder
104
 
105
  def pil_to_binary_mask(pil_image, threshold=0):
 
248
  ip_adapter_image=garm_img.resize((768, 1024)),
249
  guidance_scale=2.0,
250
  )[0]
251
+
252
  if is_checked_crop:
253
  out_img = images[0].resize(crop_size)
254
  human_img_orig.paste(out_img, (int(left), int(top)))
 
284
  'layers': [human_image] if not use_auto_mask else None,
285
  'composite': None
286
  }
287
+ #clear_gpu_memory()
288
 
289
  output_image, mask_image = start_tryon(human_dict, garment_image, description, use_auto_mask, use_auto_crop, denoise_steps, seed , categorie)
290