Spaces:
Saad0KH
/
Running on Zero

Saad0KH commited on
Commit
85e57bc
ยท
verified ยท
1 Parent(s): eb7b596

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -35
app.py CHANGED
@@ -123,42 +123,140 @@ def pil_to_binary_mask(pil_image, threshold=0):
123
  return output_mask
124
 
125
  @spaces.GPU
126
- def start_tryon_full_body(tops_image: Image.Image, bottoms_image: Image.Image, model_parse_tops: Image.Image, model_parse_bottoms: Image.Image, keypoints_tops: dict, keypoints_bottoms: dict):
127
- """
128
- Combines tops and bottoms images into a single output image after processing with get_mask_location.
129
- """
 
130
 
131
- # Get mask for the tops (upper body)
132
- mask_tops, _ = get_mask_location('hd', "upper_body", model_parse_tops, keypoints_tops)
 
133
 
134
- # Get mask for the bottoms (lower body)
135
- mask_bottoms, _ = get_mask_location('hd', "lower_body", model_parse_bottoms, keypoints_bottoms)
 
 
 
 
 
 
 
 
 
 
136
 
137
- # Convert masks to NumPy arrays
138
- mask_tops_np = np.array(mask_tops)
139
- mask_bottoms_np = np.array(mask_bottoms)
140
 
141
- # Convert tops and bottoms images to NumPy arrays
142
- tops_np = np.array(tops_image)
143
- bottoms_np = np.array(bottoms_image)
 
144
 
145
- # Ensure that tops and bottoms images have the same dimensions as their masks
146
- tops_resized = cv2.resize(tops_np, (mask_tops_np.shape[1], mask_tops_np.shape[0]))
147
- bottoms_resized = cv2.resize(bottoms_np, (mask_bottoms_np.shape[1], mask_bottoms_np.shape[0]))
 
 
 
 
 
 
148
 
149
- # Create a blank canvas for the final output image
150
- combined_image = np.zeros_like(tops_resized)
151
 
152
- # Apply the tops mask to the combined image
153
- combined_image[mask_tops_np > 0] = tops_resized[mask_tops_np > 0]
154
 
155
- # Apply the bottoms mask to the combined image
156
- combined_image[mask_bottoms_np > 0] = bottoms_resized[mask_bottoms_np > 0]
 
157
 
158
- # Convert the final combined image back to a PIL image
159
- combined_image_pil = Image.fromarray(combined_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
- return combined_image_pil
162
 
163
 
164
  @spaces.GPU
@@ -313,33 +411,40 @@ def tryon():
313
  def tryon_full():
314
  data = request.json
315
 
316
- # Decode input images
317
  tops_image = decode_image_from_base64(data['tops_image'])
318
  bottoms_image = decode_image_from_base64(data['bottoms_image'])
319
  model_parse_tops = decode_image_from_base64(data['model_parse_tops'])
320
  model_parse_bottoms = decode_image_from_base64(data['model_parse_bottoms'])
321
 
322
- # Decode keypoints
323
- keypoints_tops = data.get('keypoints_tops', {})
324
- keypoints_bottoms = data.get('keypoints_bottoms', {})
 
 
325
 
326
- # Call the start_tryon function
327
- output_image = start_tryon_full_body(
328
  tops_image,
329
  bottoms_image,
330
  model_parse_tops,
331
  model_parse_bottoms,
332
- keypoints_tops,
333
- keypoints_bottoms
 
 
334
  )
335
 
336
  # Convert output image to base64
337
  output_base64 = encode_image_to_base64(output_image)
 
338
 
339
  return jsonify({
340
- 'output_image': output_base64
 
341
  })
342
 
 
343
  if __name__ == "__main__":
344
  app.run(debug=True, host="0.0.0.0", port=7860)
345
 
 
123
  return output_mask
124
 
125
  @spaces.GPU
126
+ def start_tryon_full_body(tops_img, bottoms_img, model_parse_tops, model_parse_bottoms, is_checked, is_checked_crop, denoise_steps, seed):
127
+ device = "cuda"
128
+ openpose_model.preprocessor.body_estimation.model.to(device)
129
+ pipe.to(device)
130
+ pipe.unet_encoder.to(device)
131
 
132
+ # Convert and resize images
133
+ tops_img = tops_img.convert("RGB").resize((768, 1024))
134
+ bottoms_img = bottoms_img.convert("RGB").resize((768, 1024))
135
 
136
+ # Process tops image
137
+ human_img_orig_tops = model_parse_tops.convert("RGB").resize((768, 1024))
138
+
139
+ if is_checked:
140
+ # Assuming mask creation for the tops
141
+ mask_tops, _ = get_mask_location('hd', "upper_body", model_parse_tops, {})
142
+ mask_tops = mask_tops.resize((768, 1024))
143
+ else:
144
+ mask_tops = pil_to_binary_mask(model_parse_tops.convert("RGB").resize((768, 1024)))
145
+
146
+ mask_gray_tops = (1 - transforms.ToTensor()(mask_tops)) * tensor_transfrom(human_img_orig_tops)
147
+ mask_gray_tops = to_pil_image((mask_gray_tops + 1.0) / 2.0)
148
 
149
+ human_img_arg_tops = _apply_exif_orientation(human_img_orig_tops.resize((384, 512)))
150
+ human_img_arg_tops = convert_PIL_to_numpy(human_img_arg_tops, format="BGR")
 
151
 
152
+ args = apply_net.create_argument_parser().parse_args(('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
153
+ pose_img_tops = args.func(args, human_img_arg_tops)
154
+ pose_img_tops = pose_img_tops[:, :, ::-1]
155
+ pose_img_tops = Image.fromarray(pose_img_tops).resize((768, 1024))
156
 
157
+ # Process bottoms image
158
+ human_img_orig_bottoms = model_parse_bottoms.convert("RGB").resize((768, 1024))
159
+
160
+ if is_checked:
161
+ # Assuming mask creation for the bottoms
162
+ mask_bottoms, _ = get_mask_location('hd', "lower_body", model_parse_bottoms, {})
163
+ mask_bottoms = mask_bottoms.resize((768, 1024))
164
+ else:
165
+ mask_bottoms = pil_to_binary_mask(model_parse_bottoms.convert("RGB").resize((768, 1024)))
166
 
167
+ mask_gray_bottoms = (1 - transforms.ToTensor()(mask_bottoms)) * tensor_transfrom(human_img_orig_bottoms)
168
+ mask_gray_bottoms = to_pil_image((mask_gray_bottoms + 1.0) / 2.0)
169
 
170
+ human_img_arg_bottoms = _apply_exif_orientation(human_img_orig_bottoms.resize((384, 512)))
171
+ human_img_arg_bottoms = convert_PIL_to_numpy(human_img_arg_bottoms, format="BGR")
172
 
173
+ pose_img_bottoms = args.func(args, human_img_arg_bottoms)
174
+ pose_img_bottoms = pose_img_bottoms[:, :, ::-1]
175
+ pose_img_bottoms = Image.fromarray(pose_img_bottoms).resize((768, 1024))
176
 
177
+ with torch.no_grad():
178
+ with torch.cuda.amp.autocast():
179
+ prompt_tops = "model is wearing tops"
180
+ negative_prompt_tops = "monochrome, lowres, bad anatomy, worst quality, low quality"
181
+ prompt_bottoms = "model is wearing bottoms"
182
+ negative_prompt_bottoms = "monochrome, lowres, bad anatomy, worst quality, low quality"
183
+
184
+ # Encode prompts
185
+ (
186
+ prompt_embeds_tops,
187
+ negative_prompt_embeds_tops,
188
+ pooled_prompt_embeds_tops,
189
+ negative_pooled_prompt_embeds_tops,
190
+ ) = pipe.encode_prompt(
191
+ prompt_tops,
192
+ num_images_per_prompt=1,
193
+ do_classifier_free_guidance=True,
194
+ negative_prompt=negative_prompt_tops,
195
+ )
196
+
197
+ (
198
+ prompt_embeds_bottoms,
199
+ negative_prompt_embeds_bottoms,
200
+ pooled_prompt_embeds_bottoms,
201
+ negative_pooled_prompt_embeds_bottoms,
202
+ ) = pipe.encode_prompt(
203
+ prompt_bottoms,
204
+ num_images_per_prompt=1,
205
+ do_classifier_free_guidance=True,
206
+ negative_prompt=negative_prompt_bottoms,
207
+ )
208
+
209
+ pose_img_tops = tensor_transfrom(pose_img_tops).unsqueeze(0).to(device, torch.float16)
210
+ pose_img_bottoms = tensor_transfrom(pose_img_bottoms).unsqueeze(0).to(device, torch.float16)
211
+ garm_tensor_tops = tensor_transfrom(tops_img).unsqueeze(0).to(device, torch.float16)
212
+ garm_tensor_bottoms = tensor_transfrom(bottoms_img).unsqueeze(0).to(device, torch.float16)
213
+
214
+ generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
215
+ images_tops = pipe(
216
+ prompt_embeds=prompt_embeds_tops.to(device, torch.float16),
217
+ negative_prompt_embeds=negative_prompt_embeds_tops.to(device, torch.float16),
218
+ pooled_prompt_embeds=pooled_prompt_embeds_tops.to(device, torch.float16),
219
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds_tops.to(device, torch.float16),
220
+ num_inference_steps=denoise_steps,
221
+ generator=generator,
222
+ strength=1.0,
223
+ pose_img=pose_img_tops.to(device, torch.float16),
224
+ text_embeds_cloth=prompt_embeds_tops.to(device, torch.float16),
225
+ cloth=garm_tensor_tops.to(device, torch.float16),
226
+ mask_image=mask_tops,
227
+ image=human_img_orig_tops,
228
+ height=1024,
229
+ width=768,
230
+ ip_adapter_image=tops_img.resize((768, 1024)),
231
+ guidance_scale=2.0,
232
+ )[0]
233
+
234
+ images_bottoms = pipe(
235
+ prompt_embeds=prompt_embeds_bottoms.to(device, torch.float16),
236
+ negative_prompt_embeds=negative_prompt_embeds_bottoms.to(device, torch.float16),
237
+ pooled_prompt_embeds=pooled_prompt_embeds_bottoms.to(device, torch.float16),
238
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds_bottoms.to(device, torch.float16),
239
+ num_inference_steps=denoise_steps,
240
+ generator=generator,
241
+ strength=1.0,
242
+ pose_img=pose_img_bottoms.to(device, torch.float16),
243
+ text_embeds_cloth=prompt_embeds_bottoms.to(device, torch.float16),
244
+ cloth=garm_tensor_bottoms.to(device, torch.float16),
245
+ mask_image=mask_bottoms,
246
+ image=human_img_orig_bottoms,
247
+ height=1024,
248
+ width=768,
249
+ ip_adapter_image=bottoms_img.resize((768, 1024)),
250
+ guidance_scale=2.0,
251
+ )[0]
252
+
253
+ # Combine images
254
+ combined_img = Image.new("RGB", (768, 2048)) # Height is 2x the original to accommodate both images
255
+ combined_img.paste(images_tops, (0, 0))
256
+ combined_img.paste(images_bottoms, (0, 1024)) # Paste bottoms image below tops
257
+
258
+ return combined_img, mask_gray_tops # Or another mask, depending on your needs
259
 
 
260
 
261
 
262
  @spaces.GPU
 
411
  def tryon_full():
412
  data = request.json
413
 
414
+ # Decode images
415
  tops_image = decode_image_from_base64(data['tops_image'])
416
  bottoms_image = decode_image_from_base64(data['bottoms_image'])
417
  model_parse_tops = decode_image_from_base64(data['model_parse_tops'])
418
  model_parse_bottoms = decode_image_from_base64(data['model_parse_bottoms'])
419
 
420
+ # Retrieve additional parameters
421
+ is_checked = data.get('use_auto_mask', True)
422
+ is_checked_crop = data.get('use_auto_crop', False)
423
+ denoise_steps = int(data.get('denoise_steps', 30))
424
+ seed = int(data.get('seed', 42))
425
 
426
+ # Call the start_tryon_full_body function
427
+ output_image, mask_image = start_tryon_full_body(
428
  tops_image,
429
  bottoms_image,
430
  model_parse_tops,
431
  model_parse_bottoms,
432
+ is_checked,
433
+ is_checked_crop,
434
+ denoise_steps,
435
+ seed
436
  )
437
 
438
  # Convert output image to base64
439
  output_base64 = encode_image_to_base64(output_image)
440
+ mask_base64 = encode_image_to_base64(mask_image)
441
 
442
  return jsonify({
443
+ 'output_image': output_base64,
444
+ 'mask_image': mask_base64
445
  })
446
 
447
+
448
  if __name__ == "__main__":
449
  app.run(debug=True, host="0.0.0.0", port=7860)
450