Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py
Browse files
app.py
CHANGED
@@ -17,6 +17,8 @@ import tempfile
|
|
17 |
from PIL import Image
|
18 |
import subprocess
|
19 |
|
|
|
|
|
20 |
import torch
|
21 |
import gradio as gr
|
22 |
import string
|
@@ -62,6 +64,7 @@ num_inputs = 6
|
|
62 |
def clear_images():
|
63 |
return [None, ]*num_inputs
|
64 |
|
|
|
65 |
def det_seg_img(image, label):
|
66 |
if isinstance(image, str):
|
67 |
image = Image.open(image).convert("RGB")
|
@@ -70,6 +73,7 @@ def det_seg_img(image, label):
|
|
70 |
ins, bbox = merge_instances(image, indices, instance_result_dict["instance_bboxes"], instance_result_dict["instance_images"])
|
71 |
return ins
|
72 |
|
|
|
73 |
def crop_face_img(image):
|
74 |
if isinstance(image, str):
|
75 |
image = Image.open(image).convert("RGB")
|
@@ -83,6 +87,7 @@ def crop_face_img(image):
|
|
83 |
face = image.crop(face_bbox)
|
84 |
return face
|
85 |
|
|
|
86 |
def vlm_img_caption(image):
|
87 |
if isinstance(image, str):
|
88 |
image = Image.open(image).convert("RGB")
|
@@ -360,204 +365,207 @@ def update_inputs(is_open, index, state: list):
|
|
360 |
print(indexs)
|
361 |
return indexs, is_open
|
362 |
|
363 |
-
|
364 |
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
with gr.Row():
|
374 |
-
for i in range(num_inputs):
|
375 |
-
image, caption, face_btn, det_btn, vlm_btn, accordion_state, accordion, id_ip_checkbox = create_image_input(i, open=i<2, indexs_state=indexs_state)
|
376 |
-
images.append(image)
|
377 |
-
idip_checkboxes.append(id_ip_checkbox)
|
378 |
-
captions.append(caption)
|
379 |
-
face_btns.append(face_btn)
|
380 |
-
det_btns.append(det_btn)
|
381 |
-
vlm_btns.append(vlm_btn)
|
382 |
-
accordion_states.append(accordion_state)
|
383 |
-
|
384 |
-
accordions.append(accordion)
|
385 |
-
|
386 |
-
# 将其他设置参数压缩到 Advanced Accordion 内
|
387 |
-
with gr.Accordion("Advanced", open=False):
|
388 |
-
# 使用 Row 和 Column 来布局四个图像和描述
|
389 |
-
with gr.Row():
|
390 |
-
target_height = gr.Slider(512, 1024, step=128, value=768, label="Generated Height", info="")
|
391 |
-
target_width = gr.Slider(512, 1024, step=128, value=768, label="Generated Width", info="")
|
392 |
-
cond_size = gr.Slider(256, 384, step=128, value=256, label="Condition Size", info="")
|
393 |
-
with gr.Row():
|
394 |
-
# 修改 weight_id_ip_str 为两个 Slider
|
395 |
-
weight_id = gr.Slider(0.1, 5, step=0.1, value=3, label="weight_id")
|
396 |
-
weight_ip = gr.Slider(0.1, 5, step=0.1, value=5, label="weight_ip")
|
397 |
-
with gr.Row():
|
398 |
-
# 修改 ip_scale_str 为 Slider,并添加 Textbox 显示转换后的格式
|
399 |
-
ip_scale_str = gr.Slider(0.5, 1.5, step=0.01, value=0.85, label="latent_lora_scale")
|
400 |
-
vae_lora_scale = gr.Slider(0.5, 1.5, step=0.01, value=1.3, label="vae_lora_scale")
|
401 |
-
with gr.Row():
|
402 |
-
# 修改 vae_skip_iter 为两个 Slider
|
403 |
-
vae_skip_iter_s1 = gr.Slider(0, 1, step=0.01, value=0.05, label="vae_skip_iter_before")
|
404 |
-
vae_skip_iter_s2 = gr.Slider(0, 1, step=0.01, value=0.8, label="vae_skip_iter_after")
|
405 |
|
|
|
406 |
with gr.Row():
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
lambda s1, s2: f"0-1:1/{s1}/{s2}",
|
419 |
-
inputs=[weight_id, weight_ip],
|
420 |
-
outputs=weight_id_ip_str
|
421 |
-
)
|
422 |
-
vae_skip_iter = gr.Textbox(
|
423 |
-
value="0-0.05:1,0.8-1:1",
|
424 |
-
label="vae_skip_iter",
|
425 |
-
interactive=False, visible=False
|
426 |
-
)
|
427 |
-
vae_skip_iter_s1.change(
|
428 |
-
lambda s1, s2: f"0-{s1}:1,{s2}-1:1",
|
429 |
-
inputs=[vae_skip_iter_s1, vae_skip_iter_s2],
|
430 |
-
outputs=vae_skip_iter
|
431 |
-
)
|
432 |
-
vae_skip_iter_s2.change(
|
433 |
-
lambda s1, s2: f"0-{s1}:1,{s2}-1:1",
|
434 |
-
inputs=[vae_skip_iter_s1, vae_skip_iter_s2],
|
435 |
-
outputs=vae_skip_iter
|
436 |
-
)
|
437 |
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
label="
|
452 |
-
|
453 |
-
)
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
|
|
|
|
|
|
|
|
|
|
458 |
)
|
459 |
-
|
460 |
-
lambda
|
461 |
-
inputs=
|
462 |
-
outputs=
|
463 |
)
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
],
|
532 |
-
[
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
|
|
|
|
|
|
|
|
|
|
541 |
],
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
target_height,
|
547 |
-
target_width,
|
548 |
-
weight_id,
|
549 |
-
weight_ip,
|
550 |
-
ip_scale_str,
|
551 |
-
vae_lora_scale,
|
552 |
-
vae_skip_iter_s1,
|
553 |
-
vae_skip_iter_s2,
|
554 |
-
*images,
|
555 |
-
*captions,
|
556 |
-
*idip_checkboxes
|
557 |
-
],
|
558 |
-
outputs=accordion_states,
|
559 |
-
fn=open_accordion_on_example_selection,
|
560 |
-
run_on_click=True
|
561 |
-
)
|
562 |
|
563 |
-
demo.queue()
|
|
|
|
17 |
from PIL import Image
|
18 |
import subprocess
|
19 |
|
20 |
+
import spaces
|
21 |
+
|
22 |
import torch
|
23 |
import gradio as gr
|
24 |
import string
|
|
|
64 |
def clear_images():
|
65 |
return [None, ]*num_inputs
|
66 |
|
67 |
+
@spaces.GPU()
|
68 |
def det_seg_img(image, label):
|
69 |
if isinstance(image, str):
|
70 |
image = Image.open(image).convert("RGB")
|
|
|
73 |
ins, bbox = merge_instances(image, indices, instance_result_dict["instance_bboxes"], instance_result_dict["instance_images"])
|
74 |
return ins
|
75 |
|
76 |
+
@spaces.GPU()
|
77 |
def crop_face_img(image):
|
78 |
if isinstance(image, str):
|
79 |
image = Image.open(image).convert("RGB")
|
|
|
87 |
face = image.crop(face_bbox)
|
88 |
return face
|
89 |
|
90 |
+
@spaces.GPU()
|
91 |
def vlm_img_caption(image):
|
92 |
if isinstance(image, str):
|
93 |
image = Image.open(image).convert("RGB")
|
|
|
365 |
print(indexs)
|
366 |
return indexs, is_open
|
367 |
|
368 |
+
if __name__ == "__main__":
|
369 |
|
370 |
+
with gr.Blocks() as demo:
|
371 |
+
|
372 |
+
indexs_state = gr.State([0, 1]) # 添加状态来存储 indexs
|
373 |
+
|
374 |
+
gr.Markdown("### XVerse Demo")
|
375 |
+
with gr.Row():
|
376 |
+
with gr.Column():
|
377 |
+
prompt = gr.Textbox(label="Prompt", value="")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
378 |
|
379 |
+
clear_btn = gr.Button("清空输入图像")
|
380 |
with gr.Row():
|
381 |
+
for i in range(num_inputs):
|
382 |
+
image, caption, face_btn, det_btn, vlm_btn, accordion_state, accordion, id_ip_checkbox = create_image_input(i, open=i<2, indexs_state=indexs_state)
|
383 |
+
images.append(image)
|
384 |
+
idip_checkboxes.append(id_ip_checkbox)
|
385 |
+
captions.append(caption)
|
386 |
+
face_btns.append(face_btn)
|
387 |
+
det_btns.append(det_btn)
|
388 |
+
vlm_btns.append(vlm_btn)
|
389 |
+
accordion_states.append(accordion_state)
|
390 |
+
|
391 |
+
accordions.append(accordion)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
|
393 |
+
# 将其他设置参数压缩到 Advanced Accordion 内
|
394 |
+
with gr.Accordion("Advanced", open=False):
|
395 |
+
# 使用 Row 和 Column 来布局四个图像和描述
|
396 |
+
with gr.Row():
|
397 |
+
target_height = gr.Slider(512, 1024, step=128, value=768, label="Generated Height", info="")
|
398 |
+
target_width = gr.Slider(512, 1024, step=128, value=768, label="Generated Width", info="")
|
399 |
+
cond_size = gr.Slider(256, 384, step=128, value=256, label="Condition Size", info="")
|
400 |
+
with gr.Row():
|
401 |
+
# 修改 weight_id_ip_str 为两个 Slider
|
402 |
+
weight_id = gr.Slider(0.1, 5, step=0.1, value=3, label="weight_id")
|
403 |
+
weight_ip = gr.Slider(0.1, 5, step=0.1, value=5, label="weight_ip")
|
404 |
+
with gr.Row():
|
405 |
+
# 修改 ip_scale_str 为 Slider,并添加 Textbox 显示转换后的格式
|
406 |
+
ip_scale_str = gr.Slider(0.5, 1.5, step=0.01, value=0.85, label="latent_lora_scale")
|
407 |
+
vae_lora_scale = gr.Slider(0.5, 1.5, step=0.01, value=1.3, label="vae_lora_scale")
|
408 |
+
with gr.Row():
|
409 |
+
# 修改 vae_skip_iter 为两个 Slider
|
410 |
+
vae_skip_iter_s1 = gr.Slider(0, 1, step=0.01, value=0.05, label="vae_skip_iter_before")
|
411 |
+
vae_skip_iter_s2 = gr.Slider(0, 1, step=0.01, value=0.8, label="vae_skip_iter_after")
|
412 |
+
|
413 |
+
with gr.Row():
|
414 |
+
weight_id_ip_str = gr.Textbox(
|
415 |
+
value="0-1:1/3/5",
|
416 |
+
label="weight_id_ip_str",
|
417 |
+
interactive=False, visible=False
|
418 |
)
|
419 |
+
weight_id.change(
|
420 |
+
lambda s1, s2: f"0-1:1/{s1}/{s2}",
|
421 |
+
inputs=[weight_id, weight_ip],
|
422 |
+
outputs=weight_id_ip_str
|
423 |
)
|
424 |
+
weight_ip.change(
|
425 |
+
lambda s1, s2: f"0-1:1/{s1}/{s2}",
|
426 |
+
inputs=[weight_id, weight_ip],
|
427 |
+
outputs=weight_id_ip_str
|
428 |
+
)
|
429 |
+
vae_skip_iter = gr.Textbox(
|
430 |
+
value="0-0.05:1,0.8-1:1",
|
431 |
+
label="vae_skip_iter",
|
432 |
+
interactive=False, visible=False
|
433 |
+
)
|
434 |
+
vae_skip_iter_s1.change(
|
435 |
+
lambda s1, s2: f"0-{s1}:1,{s2}-1:1",
|
436 |
+
inputs=[vae_skip_iter_s1, vae_skip_iter_s2],
|
437 |
+
outputs=vae_skip_iter
|
438 |
+
)
|
439 |
+
vae_skip_iter_s2.change(
|
440 |
+
lambda s1, s2: f"0-{s1}:1,{s2}-1:1",
|
441 |
+
inputs=[vae_skip_iter_s1, vae_skip_iter_s2],
|
442 |
+
outputs=vae_skip_iter
|
443 |
+
)
|
444 |
+
|
445 |
+
with gr.Row():
|
446 |
+
db_latent_lora_scale_str = gr.Textbox(
|
447 |
+
value="0-1:0.85",
|
448 |
+
label="db_latent_lora_scale_str",
|
449 |
+
interactive=False, visible=False
|
450 |
+
)
|
451 |
+
sb_latent_lora_scale_str = gr.Textbox(
|
452 |
+
value="0-1:0.85",
|
453 |
+
label="sb_latent_lora_scale_str",
|
454 |
+
interactive=False, visible=False
|
455 |
+
)
|
456 |
+
vae_lora_scale_str = gr.Textbox(
|
457 |
+
value="0-1:1.3",
|
458 |
+
label="vae_lora_scale_str",
|
459 |
+
interactive=False, visible=False
|
460 |
+
)
|
461 |
+
vae_lora_scale.change(
|
462 |
+
lambda s: f"0-1:{s}",
|
463 |
+
inputs=vae_lora_scale,
|
464 |
+
outputs=vae_lora_scale_str
|
465 |
+
)
|
466 |
+
ip_scale_str.change(
|
467 |
+
lambda s: [f"0-1:{s}", f"0-1:{s}"],
|
468 |
+
inputs=ip_scale_str,
|
469 |
+
outputs=[db_latent_lora_scale_str, sb_latent_lora_scale_str]
|
470 |
+
)
|
471 |
+
|
472 |
+
with gr.Row():
|
473 |
+
double_attention = gr.Checkbox(value=False, label="Double Attention", visible=False)
|
474 |
+
single_attention = gr.Checkbox(value=True, label="Single Attention", visible=False)
|
475 |
+
|
476 |
+
with gr.Column():
|
477 |
+
output = gr.Image(label="生成的图像")
|
478 |
+
seed = gr.Number(value=42, label="Seed", info="")
|
479 |
+
gen_btn = gr.Button("生成图像")
|
480 |
+
|
481 |
+
gr.Markdown("### Examples")
|
482 |
+
gen_btn.click(
|
483 |
+
generate_image,
|
484 |
+
inputs=[
|
485 |
+
prompt, cond_size, target_height, target_width, seed,
|
486 |
+
vae_skip_iter, weight_id_ip_str,
|
487 |
+
double_attention, single_attention,
|
488 |
+
db_latent_lora_scale_str, sb_latent_lora_scale_str, vae_lora_scale_str,
|
489 |
+
indexs_state, # 传递 indexs 状态
|
490 |
+
*images,
|
491 |
+
*captions,
|
492 |
+
*idip_checkboxes,
|
493 |
+
],
|
494 |
+
outputs=output
|
495 |
+
)
|
496 |
+
|
497 |
+
# 修改清空函数的输出参数
|
498 |
+
clear_btn.click(clear_images, outputs=images)
|
499 |
+
|
500 |
+
# 循环绑定 Det & Seg 和 Auto Caption 按钮的点击事件
|
501 |
+
for i in range(num_inputs):
|
502 |
+
face_btns[i].click(crop_face_img, inputs=[images[i]], outputs=[images[i]])
|
503 |
+
det_btns[i].click(det_seg_img, inputs=[images[i], captions[i]], outputs=[images[i]])
|
504 |
+
vlm_btns[i].click(vlm_img_caption, inputs=[images[i]], outputs=[captions[i]])
|
505 |
+
accordion_states[i].change(fn=lambda x, state, index=i: change_accordion(x, index, state), inputs=[accordion_states[i], indexs_state], outputs=[accordions[i], indexs_state])
|
506 |
+
|
507 |
+
examples = gr.Examples(
|
508 |
+
examples=[
|
509 |
+
[
|
510 |
+
"ENT1 wearing a tiny hat",
|
511 |
+
42, 256, 768, 768,
|
512 |
+
3, 5,
|
513 |
+
0.85, 1.3,
|
514 |
+
0.05, 0.8,
|
515 |
+
"sample/hamster.jpg", None, None, None, None, None,
|
516 |
+
"a hamster", None, None, None, None, None,
|
517 |
+
False, False, False, False, False, False
|
518 |
+
],
|
519 |
+
[
|
520 |
+
"ENT1 in a red dress is smiling",
|
521 |
+
42, 256, 768, 768,
|
522 |
+
3, 5,
|
523 |
+
0.85, 1.3,
|
524 |
+
0.05, 0.8,
|
525 |
+
"sample/woman.jpg", None, None, None, None, None,
|
526 |
+
"a woman", None, None, None, None, None,
|
527 |
+
True, False, False, False, False, False
|
528 |
+
],
|
529 |
+
[
|
530 |
+
"ENT1 and ENT2 standing together in a park.",
|
531 |
+
42, 256, 768, 768,
|
532 |
+
2, 5,
|
533 |
+
0.85, 1.3,
|
534 |
+
0.05, 0.8,
|
535 |
+
"sample/woman.jpg", "sample/girl.jpg", None, None, None, None,
|
536 |
+
"a woman", "a girl", None, None, None, None,
|
537 |
+
True, True, False, False, False, False
|
538 |
+
],
|
539 |
+
[
|
540 |
+
"ENT1, ENT2, and ENT3 standing together in a park.",
|
541 |
+
42, 256, 768, 768,
|
542 |
+
2.5, 5,
|
543 |
+
0.8, 1.2,
|
544 |
+
0.05, 0.8,
|
545 |
+
"sample/woman.jpg", "sample/girl.jpg", "sample/old_man.jpg", None, None, None,
|
546 |
+
"a woman", "a girl", "an old man", None, None, None,
|
547 |
+
True, True, True, False, False, False
|
548 |
+
],
|
549 |
],
|
550 |
+
inputs=[
|
551 |
+
prompt, seed,
|
552 |
+
cond_size,
|
553 |
+
target_height,
|
554 |
+
target_width,
|
555 |
+
weight_id,
|
556 |
+
weight_ip,
|
557 |
+
ip_scale_str,
|
558 |
+
vae_lora_scale,
|
559 |
+
vae_skip_iter_s1,
|
560 |
+
vae_skip_iter_s2,
|
561 |
+
*images,
|
562 |
+
*captions,
|
563 |
+
*idip_checkboxes
|
564 |
],
|
565 |
+
outputs=accordion_states,
|
566 |
+
fn=open_accordion_on_example_selection,
|
567 |
+
run_on_click=True
|
568 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
569 |
|
570 |
+
demo.queue()
|
571 |
+
demo.launch()
|