Keshabwi66 commited on
Commit
07e3b9e
·
verified ·
1 Parent(s): 2de4d81

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -20
app.py CHANGED
@@ -40,11 +40,9 @@ class Args:
40
  self.seed = 42
41
  self.guidance_scale = 2.0
42
  self.mixed_precision = None
43
-
44
- # Determine the device to be used for computations (CUDA if available)
45
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
46
 
47
- logger = get_logger(__name__, log_level="INFO")
 
48
 
49
  def pil_to_tensor(images):
50
  images = np.array(images).astype(np.float32) / 255.0
@@ -56,44 +54,46 @@ def pil_to_tensor(images):
56
  args = Args()
57
 
58
  # Define the data type for model weights
59
- weight_dtype = torch.float32
60
 
61
  if args.seed is not None:
62
  set_seed(args.seed)
63
 
64
 
65
  # Load scheduler, tokenizer and models.
 
66
  noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
67
  vae = AutoencoderKL.from_pretrained(
68
  args.pretrained_model_name_or_path,
69
  subfolder="vae",
70
- torch_dtype=torch.float32,
71
  )
72
  unet = UNet2DConditionModel.from_pretrained(
73
  args.pretrained_model_name_or_path,
74
  subfolder="unet",
75
- torch_dtype=torch.float32,
76
  )
77
  image_encoder = CLIPVisionModelWithProjection.from_pretrained(
78
  args.pretrained_model_name_or_path,
79
  subfolder="image_encoder",
80
- torch_dtype=torch.float32,
81
  )
82
  unet_encoder = UNet2DConditionModel_ref.from_pretrained(
83
  args.pretrained_model_name_or_path,
84
  subfolder="unet_encoder",
85
- torch_dtype=torch.float32,
86
  )
87
  text_encoder_one = CLIPTextModel.from_pretrained(
88
  args.pretrained_model_name_or_path,
89
  subfolder="text_encoder",
90
- torch_dtype=torch.float32,
91
  )
92
  text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
93
  args.pretrained_model_name_or_path,
94
  subfolder="text_encoder_2",
95
- torch_dtype=torch.float32,
96
  )
 
97
  tokenizer_one = AutoTokenizer.from_pretrained(
98
  args.pretrained_model_name_or_path,
99
  subfolder="tokenizer",
@@ -113,9 +113,8 @@ image_encoder.requires_grad_(False)
113
  unet_encoder.requires_grad_(False)
114
  text_encoder_one.requires_grad_(False)
115
  text_encoder_two.requires_grad_(False)
116
- unet_encoder.to(device, weight_dtype)
117
- unet.eval()
118
- unet_encoder.eval()
119
 
120
  pipe = TryonPipeline.from_pretrained(
121
  args.pretrained_model_name_or_path,
@@ -129,13 +128,11 @@ pipe = TryonPipeline.from_pretrained(
129
  scheduler = noise_scheduler,
130
  image_encoder=image_encoder,
131
  unet_encoder = unet_encoder,
132
- torch_dtype=torch.float32,
133
- ).to(device)
134
- # pipe.enable_sequential_cpu_offload()
135
- # pipe.enable_model_cpu_offload()
136
- # pipe.enable_vae_slicing()
137
- # Function to generate the image based on inputs
138
  def generate_virtual_try_on(person_image, cloth_image, mask_image, pose_image,cloth_des):
 
139
  # Prepare the input images as tensors
140
  person_image = person_image.resize((args.width, args.height))
141
  cloth_image = cloth_image.resize((args.width, args.height))
 
40
  self.seed = 42
41
  self.guidance_scale = 2.0
42
  self.mixed_precision = None
 
 
 
43
 
44
+ device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
45
+
46
 
47
  def pil_to_tensor(images):
48
  images = np.array(images).astype(np.float32) / 255.0
 
54
  args = Args()
55
 
56
  # Define the data type for model weights
57
+ weight_dtype = torch.float16
58
 
59
  if args.seed is not None:
60
  set_seed(args.seed)
61
 
62
 
63
  # Load scheduler, tokenizer and models.
64
+
65
  noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler")
66
  vae = AutoencoderKL.from_pretrained(
67
  args.pretrained_model_name_or_path,
68
  subfolder="vae",
69
+ torch_dtype=torch.float16,
70
  )
71
  unet = UNet2DConditionModel.from_pretrained(
72
  args.pretrained_model_name_or_path,
73
  subfolder="unet",
74
+ torch_dtype=torch.float16,
75
  )
76
  image_encoder = CLIPVisionModelWithProjection.from_pretrained(
77
  args.pretrained_model_name_or_path,
78
  subfolder="image_encoder",
79
+ torch_dtype=torch.float16,
80
  )
81
  unet_encoder = UNet2DConditionModel_ref.from_pretrained(
82
  args.pretrained_model_name_or_path,
83
  subfolder="unet_encoder",
84
+ torch_dtype=torch.float16,
85
  )
86
  text_encoder_one = CLIPTextModel.from_pretrained(
87
  args.pretrained_model_name_or_path,
88
  subfolder="text_encoder",
89
+ torch_dtype=torch.float16,
90
  )
91
  text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
92
  args.pretrained_model_name_or_path,
93
  subfolder="text_encoder_2",
94
+ torch_dtype=torch.float16,
95
  )
96
+
97
  tokenizer_one = AutoTokenizer.from_pretrained(
98
  args.pretrained_model_name_or_path,
99
  subfolder="tokenizer",
 
113
  unet_encoder.requires_grad_(False)
114
  text_encoder_one.requires_grad_(False)
115
  text_encoder_two.requires_grad_(False)
116
+ unet_encoder.requires_grad_(False)
117
+
 
118
 
119
  pipe = TryonPipeline.from_pretrained(
120
  args.pretrained_model_name_or_path,
 
128
  scheduler = noise_scheduler,
129
  image_encoder=image_encoder,
130
  unet_encoder = unet_encoder,
131
+ torch_dtype=torch.float16,
132
+ )
133
+
 
 
 
134
  def generate_virtual_try_on(person_image, cloth_image, mask_image, pose_image,cloth_des):
135
+ pipe.to(device)
136
  # Prepare the input images as tensors
137
  person_image = person_image.resize((args.width, args.height))
138
  cloth_image = cloth_image.resize((args.width, args.height))