Spaces:
Saad0KH
/
Running on Zero

Saad0KH commited on
Commit
f0876aa
·
verified ·
1 Parent(s): 5cda59f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -157
app.py CHANGED
@@ -123,142 +123,25 @@ def pil_to_binary_mask(pil_image, threshold=0):
123
  output_mask = Image.fromarray(mask)
124
  return output_mask
125
 
126
- @spaces.GPU
127
- def start_tryon_full_body(tops_img, bottoms_img, model_parse_tops, model_parse_bottoms, is_checked, is_checked_crop, denoise_steps, seed):
128
- device = "cuda"
129
- openpose_model.preprocessor.body_estimation.model.to(device)
130
- pipe.to(device)
131
- pipe.unet_encoder.to(device)
132
-
133
- # Convert and resize images
134
- tops_img = tops_img.convert("RGB").resize((768, 1024))
135
- bottoms_img = bottoms_img.convert("RGB").resize((768, 1024))
136
-
137
- # Process tops image
138
- human_img_orig_tops = model_parse_tops.convert("RGB").resize((768, 1024))
139
-
140
- if is_checked:
141
- # Assuming mask creation for the tops
142
- mask_tops, _ = get_mask_location('hd', "upper_body", model_parse_tops, {})
143
- mask_tops = mask_tops.resize((768, 1024))
144
- else:
145
- mask_tops = pil_to_binary_mask(model_parse_tops.convert("RGB").resize((768, 1024)))
146
-
147
- mask_gray_tops = (1 - transforms.ToTensor()(mask_tops)) * tensor_transfrom(human_img_orig_tops)
148
- mask_gray_tops = to_pil_image((mask_gray_tops + 1.0) / 2.0)
149
-
150
- human_img_arg_tops = _apply_exif_orientation(human_img_orig_tops.resize((384, 512)))
151
- human_img_arg_tops = convert_PIL_to_numpy(human_img_arg_tops, format="BGR")
152
-
153
- args = apply_net.create_argument_parser().parse_args(['show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'])
154
- pose_img_tops = args.func(args, human_img_arg_tops)
155
- pose_img_tops = pose_img_tops[:, :, ::-1]
156
- pose_img_tops = Image.fromarray(pose_img_tops).resize((768, 1024))
157
-
158
- # Process bottoms image
159
- human_img_orig_bottoms = model_parse_bottoms.convert("RGB").resize((768, 1024))
160
-
161
- if is_checked:
162
- # Assuming mask creation for the bottoms
163
- mask_bottoms, _ = get_mask_location('hd', "lower_body", model_parse_bottoms, {})
164
- mask_bottoms = mask_bottoms.resize((768, 1024))
165
- else:
166
- mask_bottoms = pil_to_binary_mask(model_parse_bottoms.convert("RGB").resize((768, 1024)))
167
-
168
- mask_gray_bottoms = (1 - transforms.ToTensor()(mask_bottoms)) * tensor_transfrom(human_img_orig_bottoms)
169
- mask_gray_bottoms = to_pil_image((mask_gray_bottoms + 1.0) / 2.0)
170
-
171
- human_img_arg_bottoms = _apply_exif_orientation(human_img_orig_bottoms.resize((384, 512)))
172
- human_img_arg_bottoms = convert_PIL_to_numpy(human_img_arg_bottoms, format="BGR")
173
-
174
- pose_img_bottoms = args.func(args, human_img_arg_bottoms)
175
- pose_img_bottoms = pose_img_bottoms[:, :, ::-1]
176
- pose_img_bottoms = Image.fromarray(pose_img_bottoms).resize((768, 1024))
177
-
178
- with torch.no_grad():
179
- with torch.cuda.amp.autocast():
180
- prompt_tops = "model is wearing tops"
181
- negative_prompt_tops = "monochrome, lowres, bad anatomy, worst quality, low quality"
182
- prompt_bottoms = "model is wearing bottoms"
183
- negative_prompt_bottoms = "monochrome, lowres, bad anatomy, worst quality, low quality"
184
-
185
- # Encode prompts
186
- (
187
- prompt_embeds_tops,
188
- negative_prompt_embeds_tops,
189
- pooled_prompt_embeds_tops,
190
- negative_pooled_prompt_embeds_tops,
191
- ) = pipe.encode_prompt(
192
- prompt_tops,
193
- num_images_per_prompt=1,
194
- do_classifier_free_guidance=True,
195
- negative_prompt=negative_prompt_tops,
196
- )
197
-
198
- (
199
- prompt_embeds_bottoms,
200
- negative_prompt_embeds_bottoms,
201
- pooled_prompt_embeds_bottoms,
202
- negative_pooled_prompt_embeds_bottoms,
203
- ) = pipe.encode_prompt(
204
- prompt_bottoms,
205
- num_images_per_prompt=1,
206
- do_classifier_free_guidance=True,
207
- negative_prompt=negative_prompt_bottoms,
208
- )
209
-
210
- pose_img_tops = tensor_transfrom(pose_img_tops).unsqueeze(0).to(device, torch.float16)
211
- pose_img_bottoms = tensor_transfrom(pose_img_bottoms).unsqueeze(0).to(device, torch.float16)
212
- garm_tensor_tops = tensor_transfrom(tops_img).unsqueeze(0).to(device, torch.float16)
213
- garm_tensor_bottoms = tensor_transfrom(bottoms_img).unsqueeze(0).to(device, torch.float16)
214
-
215
- generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
216
- images_tops = pipe(
217
- prompt_embeds=prompt_embeds_tops.to(device, torch.float16),
218
- negative_prompt_embeds=negative_prompt_embeds_tops.to(device, torch.float16),
219
- pooled_prompt_embeds=pooled_prompt_embeds_tops.to(device, torch.float16),
220
- negative_pooled_prompt_embeds=negative_pooled_prompt_embeds_tops.to(device, torch.float16),
221
- num_inference_steps=denoise_steps,
222
- generator=generator,
223
- strength=1.0,
224
- pose_img=pose_img_tops.to(device, torch.float16),
225
- text_embeds_cloth=prompt_embeds_tops.to(device, torch.float16),
226
- cloth=garm_tensor_tops.to(device, torch.float16),
227
- mask_image=mask_tops,
228
- image=human_img_orig_tops,
229
- height=1024,
230
- width=768,
231
- ip_adapter_image=tops_img.resize((768, 1024)),
232
- guidance_scale=2.0,
233
- )[0]
234
-
235
- images_bottoms = pipe(
236
- prompt_embeds=prompt_embeds_bottoms.to(device, torch.float16),
237
- negative_prompt_embeds=negative_prompt_embeds_bottoms.to(device, torch.float16),
238
- pooled_prompt_embeds=pooled_prompt_embeds_bottoms.to(device, torch.float16),
239
- negative_pooled_prompt_embeds=negative_pooled_prompt_embeds_bottoms.to(device, torch.float16),
240
- num_inference_steps=denoise_steps,
241
- generator=generator,
242
- strength=1.0,
243
- pose_img=pose_img_bottoms.to(device, torch.float16),
244
- text_embeds_cloth=prompt_embeds_bottoms.to(device, torch.float16),
245
- cloth=garm_tensor_bottoms.to(device, torch.float16),
246
- mask_image=mask_bottoms,
247
- image=human_img_orig_bottoms,
248
- height=1024,
249
- width=768,
250
- ip_adapter_image=bottoms_img.resize((768, 1024)),
251
- guidance_scale=2.0,
252
- )[0]
253
-
254
- # Combine images
255
- combined_img = Image.new("RGB", (768, 2048)) # Height is 2x the original to accommodate both images
256
- combined_img.paste(images_tops, (0, 0))
257
- combined_img.paste(images_bottoms, (0, 1024)) # Paste bottoms image below tops
258
-
259
- return combined_img, mask_gray_tops # Or another mask, depending on your needs
260
 
 
 
 
 
 
 
 
 
261
 
 
 
 
 
 
 
 
 
 
262
 
263
  @spaces.GPU
264
  def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denoise_steps, seed):
@@ -368,24 +251,7 @@ def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denois
368
  return images[0], mask_gray
369
 
370
 
371
- def decode_image_from_base64(base64_str):
372
- try:
373
- img_data = base64.b64decode(base64_str)
374
- img = Image.open(BytesIO(img_data))
375
- return img
376
- except Exception as e:
377
- logging.error(f"Error decoding image: {e}")
378
- raise
379
 
380
- def encode_image_to_base64(img):
381
- try:
382
- buffered = BytesIO()
383
- img.save(buffered, format="PNG")
384
- img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
385
- return img_str
386
- except Exception as e:
387
- logging.error(f"Error encoding image: {e}")
388
- raise
389
 
390
  @app.route('/tryon', methods=['POST'])
391
  def tryon():
@@ -416,24 +282,167 @@ def tryon():
416
  'mask_image': mask_base64
417
  })
418
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
  @app.route('/tryon-full', methods=['POST'])
420
  def tryon_full():
421
  try:
422
  data = request.json
423
 
424
- # Decode images
425
  tops_image = decode_image_from_base64(data['tops_image'])
426
  bottoms_image = decode_image_from_base64(data['bottoms_image'])
427
  model_parse_tops = decode_image_from_base64(data['model_parse_tops'])
428
  model_parse_bottoms = decode_image_from_base64(data['model_parse_bottoms'])
429
 
430
- # Retrieve additional parameters
431
  is_checked = data.get('use_auto_mask', True)
432
  is_checked_crop = data.get('use_auto_crop', False)
433
  denoise_steps = int(data.get('denoise_steps', 30))
434
  seed = int(data.get('seed', 42))
435
 
436
- # Call the start_tryon_full_body function
437
  output_image, mask_image = start_tryon_full_body(
438
  tops_image,
439
  bottoms_image,
@@ -445,7 +454,7 @@ def tryon_full():
445
  seed
446
  )
447
 
448
- # Convert output image to base64
449
  output_base64 = encode_image_to_base64(output_image)
450
  mask_base64 = encode_image_to_base64(mask_image)
451
 
@@ -453,10 +462,16 @@ def tryon_full():
453
  'output_image': output_base64,
454
  'mask_image': mask_base64
455
  })
456
-
 
 
 
 
 
 
457
  except Exception as e:
458
  logging.error(f"Error in /tryon-full: {e}")
459
- return jsonify({'error': str(e)}), 200
460
 
461
  if __name__ == "__main__":
462
  app.run(debug=True, host="0.0.0.0", port=7860)
 
123
  output_mask = Image.fromarray(mask)
124
  return output_mask
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ def decode_image_from_base64(base64_str):
128
+ try:
129
+ img_data = base64.b64decode(base64_str)
130
+ img = Image.open(BytesIO(img_data))
131
+ return img
132
+ except Exception as e:
133
+ logging.error(f"Error decoding image: {e}")
134
+ raise
135
 
136
+ def encode_image_to_base64(img):
137
+ try:
138
+ buffered = BytesIO()
139
+ img.save(buffered, format="PNG")
140
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
141
+ return img_str
142
+ except Exception as e:
143
+ logging.error(f"Error encoding image: {e}")
144
+ raise
145
 
146
  @spaces.GPU
147
  def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denoise_steps, seed):
 
251
  return images[0], mask_gray
252
 
253
 
 
 
 
 
 
 
 
 
254
 
 
 
 
 
 
 
 
 
 
255
 
256
  @app.route('/tryon', methods=['POST'])
257
  def tryon():
 
282
  'mask_image': mask_base64
283
  })
284
 
285
+
286
+ def combine_images_with_masks(tops_image, bottoms_image, mask, is_checked_crop, crop_size):
287
+ try:
288
+ # Logique de combinaison des images de haut et de bas
289
+ if is_checked_crop:
290
+ tops_image = tops_image.resize(crop_size)
291
+ bottoms_image = bottoms_image.resize(crop_size)
292
+ combined_image = Image.new('RGB', (tops_image.width, tops_image.height))
293
+ combined_image.paste(tops_image, (0, 0))
294
+ combined_image.paste(bottoms_image, (0, tops_image.height // 2))
295
+ else:
296
+ combined_image = Image.new('RGB', (tops_image.width, tops_image.height))
297
+ combined_image.paste(tops_image, (0, 0))
298
+ combined_image.paste(bottoms_image, (0, tops_image.height // 2))
299
+
300
+ return combined_image
301
+
302
+ except Exception as e:
303
+ raise ValueError(f"Error combining images with masks: {e}")
304
+
305
+ @spaces.GPU
306
+ def start_tryon_full_body(tops_image, bottoms_image, model_parse_tops, model_parse_bottoms, is_checked, is_checked_crop, denoise_steps, seed):
307
+ try:
308
+ device = "cuda" if torch.cuda.is_available() else "cpu"
309
+ openpose_model.preprocessor.body_estimation.model.to(device)
310
+ pipe.to(device)
311
+ pipe.unet_encoder.to(device)
312
+
313
+ # Traitement de l'image de haut (tops)
314
+ tops_image = tops_image.convert("RGB").resize((768, 1024))
315
+ human_img_orig = tops_image # Utiliser l'image de haut comme arrière-plan
316
+
317
+ if is_checked_crop:
318
+ width, height = human_img_orig.size
319
+ target_width = int(min(width, height * (3 / 4)))
320
+ target_height = int(min(height, width * (4 / 3)))
321
+ left = (width - target_width) / 2
322
+ top = (height - target_height) / 2
323
+ right = (width + target_width) / 2
324
+ bottom = (height + target_height) / 2
325
+ cropped_img = human_img_orig.crop((left, top, right, bottom))
326
+ crop_size = cropped_img.size
327
+ human_img = cropped_img.resize((768, 1024))
328
+ else:
329
+ human_img = human_img_orig.resize((768, 1024))
330
+
331
+ if is_checked:
332
+ keypoints = openpose_model(human_img.resize((384, 512)))
333
+ model_parse, _ = parsing_model(human_img.resize((384, 512)))
334
+ mask, mask_gray = get_mask_location('hd', "full_body", model_parse, keypoints)
335
+ mask = mask.resize((768, 1024))
336
+ else:
337
+ mask = pil_to_binary_mask(model_parse_tops.convert("RGB").resize((768, 1024)))
338
+ mask_gray = (1 - transforms.ToTensor()(mask)) * tensor_transfrom(human_img)
339
+ mask_gray = to_pil_image((mask_gray + 1.0) / 2.0)
340
+
341
+ human_img_arg = _apply_exif_orientation(human_img.resize((384, 512)))
342
+ human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
343
+
344
+ args = apply_net.create_argument_parser().parse_args(('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
345
+ pose_img = args.func(args, human_img_arg)
346
+ pose_img = pose_img[:, :, ::-1]
347
+ pose_img = Image.fromarray(pose_img).resize((768, 1024))
348
+
349
+ # Traitement de l'image de bas (bottoms)
350
+ bottoms_image = bottoms_image.convert("RGB").resize((768, 1024))
351
+ bottoms_img_arg = _apply_exif_orientation(bottoms_image.resize((384, 512)))
352
+ bottoms_img_arg = convert_PIL_to_numpy(bottoms_img_arg, format="BGR")
353
+
354
+ # Combine les images de haut et de bas
355
+ combined_image = combine_images_with_masks(tops_image, bottoms_image, mask, is_checked_crop, crop_size)
356
+
357
+ with torch.no_grad():
358
+ with torch.cuda.amp.autocast():
359
+ prompt = "model is wearing " + garment_des
360
+ negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
361
+ with torch.inference_mode():
362
+ (
363
+ prompt_embeds,
364
+ negative_prompt_embeds,
365
+ pooled_prompt_embeds,
366
+ negative_pooled_prompt_embeds,
367
+ ) = pipe.encode_prompt(
368
+ prompt,
369
+ num_images_per_prompt=1,
370
+ do_classifier_free_guidance=True,
371
+ negative_prompt=negative_prompt,
372
+ )
373
+
374
+ prompt = "a photo of " + garment_des
375
+ negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
376
+ if not isinstance(prompt, list):
377
+ prompt = [prompt] * 1
378
+ if not isinstance(negative_prompt, list):
379
+ negative_prompt = [negative_prompt] * 1
380
+ with torch.inference_mode():
381
+ (
382
+ prompt_embeds_c,
383
+ _,
384
+ _,
385
+ _,
386
+ ) = pipe.encode_prompt(
387
+ prompt,
388
+ num_images_per_prompt=1,
389
+ do_classifier_free_guidance=False,
390
+ negative_prompt=negative_prompt,
391
+ )
392
+
393
+ pose_img = tensor_transfrom(pose_img).unsqueeze(0).to(device, torch.float16)
394
+ garm_tensor = tensor_transfrom(combined_image).unsqueeze(0).to(device, torch.float16)
395
+ generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
396
+ images = pipe(
397
+ prompt_embeds=prompt_embeds.to(device, torch.float16),
398
+ negative_prompt_embeds=negative_prompt_embeds.to(device, torch.float16),
399
+ pooled_prompt_embeds=pooled_prompt_embeds.to(device, torch.float16),
400
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device, torch.float16),
401
+ num_inference_steps=denoise_steps,
402
+ generator=generator,
403
+ strength=1.0,
404
+ pose_img=pose_img.to(device, torch.float16),
405
+ text_embeds_cloth=prompt_embeds_c.to(device, torch.float16),
406
+ cloth=garm_tensor.to(device, torch.float16),
407
+ mask_image=mask,
408
+ image=human_img,
409
+ height=1024,
410
+ width=768,
411
+ ip_adapter_image=combined_image.resize((768, 1024)),
412
+ guidance_scale=2.0,
413
+ )[0]
414
+
415
+ if is_checked_crop:
416
+ out_img = images[0].resize(crop_size)
417
+ human_img_orig.paste(out_img, (int(left), int(top)))
418
+ return human_img_orig, mask_gray
419
+ else:
420
+ return images[0], mask_gray
421
+
422
+ except Exception as e:
423
+ raise ValueError(f"Error in start_tryon_full_body: {e}")
424
+
425
+
426
+
427
+
428
  @app.route('/tryon-full', methods=['POST'])
429
  def tryon_full():
430
  try:
431
  data = request.json
432
 
433
+ # Décoder les images
434
  tops_image = decode_image_from_base64(data['tops_image'])
435
  bottoms_image = decode_image_from_base64(data['bottoms_image'])
436
  model_parse_tops = decode_image_from_base64(data['model_parse_tops'])
437
  model_parse_bottoms = decode_image_from_base64(data['model_parse_bottoms'])
438
 
439
+ # Récupérer les paramètres supplémentaires
440
  is_checked = data.get('use_auto_mask', True)
441
  is_checked_crop = data.get('use_auto_crop', False)
442
  denoise_steps = int(data.get('denoise_steps', 30))
443
  seed = int(data.get('seed', 42))
444
 
445
+ # Appeler la fonction principale
446
  output_image, mask_image = start_tryon_full_body(
447
  tops_image,
448
  bottoms_image,
 
454
  seed
455
  )
456
 
457
+ # Convertir les images en base64
458
  output_base64 = encode_image_to_base64(output_image)
459
  mask_base64 = encode_image_to_base64(mask_image)
460
 
 
462
  'output_image': output_base64,
463
  'mask_image': mask_base64
464
  })
465
+
466
+ except KeyError as e:
467
+ logging.error(f"KeyError in /tryon-full: {e}")
468
+ return jsonify({'error': f"KeyError: {str(e)}"}), 400
469
+ except ValueError as e:
470
+ logging.error(f"ValueError in /tryon-full: {e}")
471
+ return jsonify({'error': f"ValueError: {str(e)}"}), 400
472
  except Exception as e:
473
  logging.error(f"Error in /tryon-full: {e}")
474
+ return jsonify({'error': f"Internal server error: {str(e)}"}), 500
475
 
476
  if __name__ == "__main__":
477
  app.run(debug=True, host="0.0.0.0", port=7860)