Spaces:
Running
on
Zero
Running
on
Zero
lzyhha
commited on
Commit
·
42002e4
1
Parent(s):
808cfcf
space
Browse files- app.py +6 -11
- visualcloze.py +5 -4
app.py
CHANGED
|
@@ -71,10 +71,6 @@ def create_demo(model):
|
|
| 71 |
""")
|
| 72 |
|
| 73 |
gr.Markdown(GUIDANCE)
|
| 74 |
-
|
| 75 |
-
# gr.Markdown("<div style='font-size: 24px; font-weight: bold; color: #FF9999;'>" +
|
| 76 |
-
# "Note: Click the task button in the right bottom to acquire examples of tasks." +
|
| 77 |
-
# "</div>", )
|
| 78 |
|
| 79 |
# Pre-create all possible image components
|
| 80 |
all_image_inputs = []
|
|
@@ -82,9 +78,8 @@ def create_demo(model):
|
|
| 82 |
row_texts = []
|
| 83 |
with gr.Row():
|
| 84 |
|
| 85 |
-
# 左侧列:图像网格和提示输入
|
| 86 |
with gr.Column(scale=2):
|
| 87 |
-
#
|
| 88 |
for i in range(max_grid_h):
|
| 89 |
# Add row label before each row
|
| 90 |
row_texts.append(gr.Markdown(
|
|
@@ -106,7 +101,7 @@ def create_demo(model):
|
|
| 106 |
)
|
| 107 |
all_image_inputs.append(img_input)
|
| 108 |
|
| 109 |
-
#
|
| 110 |
layout_prompt = gr.Textbox(
|
| 111 |
label="Layout Description (Auto-filled, Read-only)",
|
| 112 |
placeholder="Layout description will be automatically filled based on grid size...",
|
|
@@ -143,17 +138,17 @@ def create_demo(model):
|
|
| 143 |
|
| 144 |
gr.Markdown(CITATION)
|
| 145 |
|
| 146 |
-
#
|
| 147 |
with gr.Column(scale=2):
|
| 148 |
output_gallery = gr.Gallery(
|
| 149 |
label="Generated Results",
|
| 150 |
show_label=True,
|
| 151 |
elem_id="output_gallery",
|
| 152 |
-
columns=None,
|
| 153 |
-
rows=None,
|
| 154 |
height="auto",
|
| 155 |
allow_preview=True,
|
| 156 |
-
object_fit="contain"
|
| 157 |
)
|
| 158 |
|
| 159 |
gr.Markdown("# Task Examples")
|
|
|
|
| 71 |
""")
|
| 72 |
|
| 73 |
gr.Markdown(GUIDANCE)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
# Pre-create all possible image components
|
| 76 |
all_image_inputs = []
|
|
|
|
| 78 |
row_texts = []
|
| 79 |
with gr.Row():
|
| 80 |
|
|
|
|
| 81 |
with gr.Column(scale=2):
|
| 82 |
+
# Image grid
|
| 83 |
for i in range(max_grid_h):
|
| 84 |
# Add row label before each row
|
| 85 |
row_texts.append(gr.Markdown(
|
|
|
|
| 101 |
)
|
| 102 |
all_image_inputs.append(img_input)
|
| 103 |
|
| 104 |
+
# Prompts
|
| 105 |
layout_prompt = gr.Textbox(
|
| 106 |
label="Layout Description (Auto-filled, Read-only)",
|
| 107 |
placeholder="Layout description will be automatically filled based on grid size...",
|
|
|
|
| 138 |
|
| 139 |
gr.Markdown(CITATION)
|
| 140 |
|
| 141 |
+
# Output
|
| 142 |
with gr.Column(scale=2):
|
| 143 |
output_gallery = gr.Gallery(
|
| 144 |
label="Generated Results",
|
| 145 |
show_label=True,
|
| 146 |
elem_id="output_gallery",
|
| 147 |
+
columns=None,
|
| 148 |
+
rows=None,
|
| 149 |
height="auto",
|
| 150 |
allow_preview=True,
|
| 151 |
+
object_fit="contain"
|
| 152 |
)
|
| 153 |
|
| 154 |
gr.Markdown("# Task Examples")
|
visualcloze.py
CHANGED
|
@@ -241,7 +241,8 @@ class VisualClozeModel:
|
|
| 241 |
return output_image
|
| 242 |
|
| 243 |
def process_images(
|
| 244 |
-
self, images: list[list[Image.Image]],
|
|
|
|
| 245 |
seed: int = 0,
|
| 246 |
cfg: int = 30,
|
| 247 |
steps: int = 30,
|
|
@@ -256,7 +257,7 @@ class VisualClozeModel:
|
|
| 256 |
images (list[list[Image.Image]]): A grid-layout image collection, each row represents an in-context example or the current query,
|
| 257 |
where the current query should be placed in the last row.
|
| 258 |
The target image can be None in the input. The other images should be the PIL Image class (Image.Image).
|
| 259 |
-
|
| 260 |
seed (int): A fixed integer seed to ensure reproducibility of the random elements in the processing.
|
| 261 |
cfg (int): The strength of Classifier-Free Diffusion Guidance.
|
| 262 |
steps (int): The number of sampling steps.
|
|
@@ -388,7 +389,7 @@ class VisualClozeModel:
|
|
| 388 |
x = [noise]
|
| 389 |
|
| 390 |
with torch.no_grad():
|
| 391 |
-
inp = prepare_modified(t5=self.t5, clip=self.clip, img=x, prompt=[' '.join(
|
| 392 |
|
| 393 |
model_kwargs = dict(
|
| 394 |
txt=inp["txt"],
|
|
@@ -445,7 +446,7 @@ class VisualClozeModel:
|
|
| 445 |
upsampling_steps=upsampling_steps,
|
| 446 |
upsampling_noise=upsampling_noise,
|
| 447 |
generator=rng,
|
| 448 |
-
content_prompt=
|
| 449 |
ret.append(upsampled)
|
| 450 |
|
| 451 |
return ret
|
|
|
|
| 241 |
return output_image
|
| 242 |
|
| 243 |
def process_images(
|
| 244 |
+
self, images: list[list[Image.Image]],
|
| 245 |
+
prompts: list[str],
|
| 246 |
seed: int = 0,
|
| 247 |
cfg: int = 30,
|
| 248 |
steps: int = 30,
|
|
|
|
| 257 |
images (list[list[Image.Image]]): A grid-layout image collection, each row represents an in-context example or the current query,
|
| 258 |
where the current query should be placed in the last row.
|
| 259 |
The target image can be None in the input. The other images should be the PIL Image class (Image.Image).
|
| 260 |
+
prompts (list[str]): Three prompts, representing the layout prompt, task prompt, and content prompt respectively.
|
| 261 |
seed (int): A fixed integer seed to ensure reproducibility of the random elements in the processing.
|
| 262 |
cfg (int): The strength of Classifier-Free Diffusion Guidance.
|
| 263 |
steps (int): The number of sampling steps.
|
|
|
|
| 389 |
x = [noise]
|
| 390 |
|
| 391 |
with torch.no_grad():
|
| 392 |
+
inp = prepare_modified(t5=self.t5, clip=self.clip, img=x, prompt=[' '.join(prompts)], proportion_empty_prompts=0.0)
|
| 393 |
|
| 394 |
model_kwargs = dict(
|
| 395 |
txt=inp["txt"],
|
|
|
|
| 446 |
upsampling_steps=upsampling_steps,
|
| 447 |
upsampling_noise=upsampling_noise,
|
| 448 |
generator=rng,
|
| 449 |
+
content_prompt=prompts[2])
|
| 450 |
ret.append(upsampled)
|
| 451 |
|
| 452 |
return ret
|