Update app.py
Browse files
app.py
CHANGED
@@ -123,42 +123,140 @@ def pil_to_binary_mask(pil_image, threshold=0):
|
|
123 |
return output_mask
|
124 |
|
125 |
@spaces.GPU
|
126 |
-
def start_tryon_full_body(
|
127 |
-
""
|
128 |
-
|
129 |
-
|
|
|
130 |
|
131 |
-
#
|
132 |
-
|
|
|
133 |
|
134 |
-
#
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
-
|
138 |
-
|
139 |
-
mask_bottoms_np = np.array(mask_bottoms)
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
|
|
|
144 |
|
145 |
-
#
|
146 |
-
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
-
|
150 |
-
|
151 |
|
152 |
-
|
153 |
-
|
154 |
|
155 |
-
|
156 |
-
|
|
|
157 |
|
158 |
-
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
-
return combined_image_pil
|
162 |
|
163 |
|
164 |
@spaces.GPU
|
@@ -313,33 +411,40 @@ def tryon():
|
|
313 |
def tryon_full():
|
314 |
data = request.json
|
315 |
|
316 |
-
# Decode
|
317 |
tops_image = decode_image_from_base64(data['tops_image'])
|
318 |
bottoms_image = decode_image_from_base64(data['bottoms_image'])
|
319 |
model_parse_tops = decode_image_from_base64(data['model_parse_tops'])
|
320 |
model_parse_bottoms = decode_image_from_base64(data['model_parse_bottoms'])
|
321 |
|
322 |
-
#
|
323 |
-
|
324 |
-
|
|
|
|
|
325 |
|
326 |
-
# Call the
|
327 |
-
output_image = start_tryon_full_body(
|
328 |
tops_image,
|
329 |
bottoms_image,
|
330 |
model_parse_tops,
|
331 |
model_parse_bottoms,
|
332 |
-
|
333 |
-
|
|
|
|
|
334 |
)
|
335 |
|
336 |
# Convert output image to base64
|
337 |
output_base64 = encode_image_to_base64(output_image)
|
|
|
338 |
|
339 |
return jsonify({
|
340 |
-
'output_image': output_base64
|
|
|
341 |
})
|
342 |
|
|
|
343 |
if __name__ == "__main__":
|
344 |
app.run(debug=True, host="0.0.0.0", port=7860)
|
345 |
|
|
|
123 |
return output_mask
|
124 |
|
125 |
@spaces.GPU
|
126 |
+
def start_tryon_full_body(tops_img, bottoms_img, model_parse_tops, model_parse_bottoms, is_checked, is_checked_crop, denoise_steps, seed):
|
127 |
+
device = "cuda"
|
128 |
+
openpose_model.preprocessor.body_estimation.model.to(device)
|
129 |
+
pipe.to(device)
|
130 |
+
pipe.unet_encoder.to(device)
|
131 |
|
132 |
+
# Convert and resize images
|
133 |
+
tops_img = tops_img.convert("RGB").resize((768, 1024))
|
134 |
+
bottoms_img = bottoms_img.convert("RGB").resize((768, 1024))
|
135 |
|
136 |
+
# Process tops image
|
137 |
+
human_img_orig_tops = model_parse_tops.convert("RGB").resize((768, 1024))
|
138 |
+
|
139 |
+
if is_checked:
|
140 |
+
# Assuming mask creation for the tops
|
141 |
+
mask_tops, _ = get_mask_location('hd', "upper_body", model_parse_tops, {})
|
142 |
+
mask_tops = mask_tops.resize((768, 1024))
|
143 |
+
else:
|
144 |
+
mask_tops = pil_to_binary_mask(model_parse_tops.convert("RGB").resize((768, 1024)))
|
145 |
+
|
146 |
+
mask_gray_tops = (1 - transforms.ToTensor()(mask_tops)) * tensor_transfrom(human_img_orig_tops)
|
147 |
+
mask_gray_tops = to_pil_image((mask_gray_tops + 1.0) / 2.0)
|
148 |
|
149 |
+
human_img_arg_tops = _apply_exif_orientation(human_img_orig_tops.resize((384, 512)))
|
150 |
+
human_img_arg_tops = convert_PIL_to_numpy(human_img_arg_tops, format="BGR")
|
|
|
151 |
|
152 |
+
args = apply_net.create_argument_parser().parse_args(('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
|
153 |
+
pose_img_tops = args.func(args, human_img_arg_tops)
|
154 |
+
pose_img_tops = pose_img_tops[:, :, ::-1]
|
155 |
+
pose_img_tops = Image.fromarray(pose_img_tops).resize((768, 1024))
|
156 |
|
157 |
+
# Process bottoms image
|
158 |
+
human_img_orig_bottoms = model_parse_bottoms.convert("RGB").resize((768, 1024))
|
159 |
+
|
160 |
+
if is_checked:
|
161 |
+
# Assuming mask creation for the bottoms
|
162 |
+
mask_bottoms, _ = get_mask_location('hd', "lower_body", model_parse_bottoms, {})
|
163 |
+
mask_bottoms = mask_bottoms.resize((768, 1024))
|
164 |
+
else:
|
165 |
+
mask_bottoms = pil_to_binary_mask(model_parse_bottoms.convert("RGB").resize((768, 1024)))
|
166 |
|
167 |
+
mask_gray_bottoms = (1 - transforms.ToTensor()(mask_bottoms)) * tensor_transfrom(human_img_orig_bottoms)
|
168 |
+
mask_gray_bottoms = to_pil_image((mask_gray_bottoms + 1.0) / 2.0)
|
169 |
|
170 |
+
human_img_arg_bottoms = _apply_exif_orientation(human_img_orig_bottoms.resize((384, 512)))
|
171 |
+
human_img_arg_bottoms = convert_PIL_to_numpy(human_img_arg_bottoms, format="BGR")
|
172 |
|
173 |
+
pose_img_bottoms = args.func(args, human_img_arg_bottoms)
|
174 |
+
pose_img_bottoms = pose_img_bottoms[:, :, ::-1]
|
175 |
+
pose_img_bottoms = Image.fromarray(pose_img_bottoms).resize((768, 1024))
|
176 |
|
177 |
+
with torch.no_grad():
|
178 |
+
with torch.cuda.amp.autocast():
|
179 |
+
prompt_tops = "model is wearing tops"
|
180 |
+
negative_prompt_tops = "monochrome, lowres, bad anatomy, worst quality, low quality"
|
181 |
+
prompt_bottoms = "model is wearing bottoms"
|
182 |
+
negative_prompt_bottoms = "monochrome, lowres, bad anatomy, worst quality, low quality"
|
183 |
+
|
184 |
+
# Encode prompts
|
185 |
+
(
|
186 |
+
prompt_embeds_tops,
|
187 |
+
negative_prompt_embeds_tops,
|
188 |
+
pooled_prompt_embeds_tops,
|
189 |
+
negative_pooled_prompt_embeds_tops,
|
190 |
+
) = pipe.encode_prompt(
|
191 |
+
prompt_tops,
|
192 |
+
num_images_per_prompt=1,
|
193 |
+
do_classifier_free_guidance=True,
|
194 |
+
negative_prompt=negative_prompt_tops,
|
195 |
+
)
|
196 |
+
|
197 |
+
(
|
198 |
+
prompt_embeds_bottoms,
|
199 |
+
negative_prompt_embeds_bottoms,
|
200 |
+
pooled_prompt_embeds_bottoms,
|
201 |
+
negative_pooled_prompt_embeds_bottoms,
|
202 |
+
) = pipe.encode_prompt(
|
203 |
+
prompt_bottoms,
|
204 |
+
num_images_per_prompt=1,
|
205 |
+
do_classifier_free_guidance=True,
|
206 |
+
negative_prompt=negative_prompt_bottoms,
|
207 |
+
)
|
208 |
+
|
209 |
+
pose_img_tops = tensor_transfrom(pose_img_tops).unsqueeze(0).to(device, torch.float16)
|
210 |
+
pose_img_bottoms = tensor_transfrom(pose_img_bottoms).unsqueeze(0).to(device, torch.float16)
|
211 |
+
garm_tensor_tops = tensor_transfrom(tops_img).unsqueeze(0).to(device, torch.float16)
|
212 |
+
garm_tensor_bottoms = tensor_transfrom(bottoms_img).unsqueeze(0).to(device, torch.float16)
|
213 |
+
|
214 |
+
generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
|
215 |
+
images_tops = pipe(
|
216 |
+
prompt_embeds=prompt_embeds_tops.to(device, torch.float16),
|
217 |
+
negative_prompt_embeds=negative_prompt_embeds_tops.to(device, torch.float16),
|
218 |
+
pooled_prompt_embeds=pooled_prompt_embeds_tops.to(device, torch.float16),
|
219 |
+
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds_tops.to(device, torch.float16),
|
220 |
+
num_inference_steps=denoise_steps,
|
221 |
+
generator=generator,
|
222 |
+
strength=1.0,
|
223 |
+
pose_img=pose_img_tops.to(device, torch.float16),
|
224 |
+
text_embeds_cloth=prompt_embeds_tops.to(device, torch.float16),
|
225 |
+
cloth=garm_tensor_tops.to(device, torch.float16),
|
226 |
+
mask_image=mask_tops,
|
227 |
+
image=human_img_orig_tops,
|
228 |
+
height=1024,
|
229 |
+
width=768,
|
230 |
+
ip_adapter_image=tops_img.resize((768, 1024)),
|
231 |
+
guidance_scale=2.0,
|
232 |
+
)[0]
|
233 |
+
|
234 |
+
images_bottoms = pipe(
|
235 |
+
prompt_embeds=prompt_embeds_bottoms.to(device, torch.float16),
|
236 |
+
negative_prompt_embeds=negative_prompt_embeds_bottoms.to(device, torch.float16),
|
237 |
+
pooled_prompt_embeds=pooled_prompt_embeds_bottoms.to(device, torch.float16),
|
238 |
+
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds_bottoms.to(device, torch.float16),
|
239 |
+
num_inference_steps=denoise_steps,
|
240 |
+
generator=generator,
|
241 |
+
strength=1.0,
|
242 |
+
pose_img=pose_img_bottoms.to(device, torch.float16),
|
243 |
+
text_embeds_cloth=prompt_embeds_bottoms.to(device, torch.float16),
|
244 |
+
cloth=garm_tensor_bottoms.to(device, torch.float16),
|
245 |
+
mask_image=mask_bottoms,
|
246 |
+
image=human_img_orig_bottoms,
|
247 |
+
height=1024,
|
248 |
+
width=768,
|
249 |
+
ip_adapter_image=bottoms_img.resize((768, 1024)),
|
250 |
+
guidance_scale=2.0,
|
251 |
+
)[0]
|
252 |
+
|
253 |
+
# Combine images
|
254 |
+
combined_img = Image.new("RGB", (768, 2048)) # Height is 2x the original to accommodate both images
|
255 |
+
combined_img.paste(images_tops, (0, 0))
|
256 |
+
combined_img.paste(images_bottoms, (0, 1024)) # Paste bottoms image below tops
|
257 |
+
|
258 |
+
return combined_img, mask_gray_tops # Or another mask, depending on your needs
|
259 |
|
|
|
260 |
|
261 |
|
262 |
@spaces.GPU
|
|
|
411 |
def tryon_full():
|
412 |
data = request.json
|
413 |
|
414 |
+
# Decode images
|
415 |
tops_image = decode_image_from_base64(data['tops_image'])
|
416 |
bottoms_image = decode_image_from_base64(data['bottoms_image'])
|
417 |
model_parse_tops = decode_image_from_base64(data['model_parse_tops'])
|
418 |
model_parse_bottoms = decode_image_from_base64(data['model_parse_bottoms'])
|
419 |
|
420 |
+
# Retrieve additional parameters
|
421 |
+
is_checked = data.get('use_auto_mask', True)
|
422 |
+
is_checked_crop = data.get('use_auto_crop', False)
|
423 |
+
denoise_steps = int(data.get('denoise_steps', 30))
|
424 |
+
seed = int(data.get('seed', 42))
|
425 |
|
426 |
+
# Call the start_tryon_full_body function
|
427 |
+
output_image, mask_image = start_tryon_full_body(
|
428 |
tops_image,
|
429 |
bottoms_image,
|
430 |
model_parse_tops,
|
431 |
model_parse_bottoms,
|
432 |
+
is_checked,
|
433 |
+
is_checked_crop,
|
434 |
+
denoise_steps,
|
435 |
+
seed
|
436 |
)
|
437 |
|
438 |
# Convert output image to base64
|
439 |
output_base64 = encode_image_to_base64(output_image)
|
440 |
+
mask_base64 = encode_image_to_base64(mask_image)
|
441 |
|
442 |
return jsonify({
|
443 |
+
'output_image': output_base64,
|
444 |
+
'mask_image': mask_base64
|
445 |
})
|
446 |
|
447 |
+
|
448 |
if __name__ == "__main__":
|
449 |
app.run(debug=True, host="0.0.0.0", port=7860)
|
450 |
|