Spaces:

omer11a
/

bounded-attention

Runtime error

App Files Files Community

omer11a commited on Apr 3, 2024

Commit

f42e996

1 Parent(s): 49a7542

Fixed errors

Browse files

Files changed (1) hide show

app.py +148 -145

app.py CHANGED Viewed

@@ -13,11 +13,51 @@ from pytorch_lightning import seed_everything
 from functools import partial
 RESOLUTION = 256
 MIN_SIZE = 0.01
 WHITE = 255
 COLORS = ["red", "blue", "green", "orange", "purple", "turquoise", "olive"]
 DESCRIPTION = """
     <p style="text-align: center; font-weight: bold;">
     <span style="font-size: 28px">Bounded Attention</span>
@@ -72,14 +112,8 @@ FOOTNOTE = """
 """
-MODEL_PATH = "stabilityai/stable-diffusion-xl-base-1.0"
-scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False)
-model = StableDiffusionXLPipeline.from_pretrained(MODEL_PATH, scheduler=scheduler, torch_dtype=torch.float16)
-model.unet.set_default_attn_processor()
-model.enable_sequential_cpu_offload()
 def inference(
     boxes,
     prompts,
     subject_token_indices,
@@ -125,14 +159,15 @@ def inference(
     )
     register_attention_editor_diffusers(model, editor)
     images = model(prompts, latents=start_code, guidance_scale=classifier_free_guidance_scale).images
     unregister_attention_editor_diffusers(model)
     model.to(torch.device("cpu"))
 @spaces.GPU(duration=300)
 def generate(
     prompt,
     subject_token_indices,
     filter_token_indices,
@@ -162,7 +197,7 @@ def generate(
     prompts = [prompt.strip(".").strip(",").strip()] * batch_size
     images = inference(
-        boxes, prompts, subject_token_indices, filter_token_indices, num_tokens, init_step_size,
         final_step_size, num_clusters_per_subject, cross_loss_scale, self_loss_scale, classifier_free_guidance_scale,
         num_iterations, loss_threshold, num_guidance_steps, seed)
@@ -217,139 +252,107 @@ def clear(batch_size):
 def main():
-    css = """
-    #paper-info a {
-        color:#008AD7;
-        text-decoration: none;
-    }
-    #paper-info a:hover {
-        cursor: pointer;
-        text-decoration: none;
-    }
-    .tooltip {
-        color: #555;
-        position: relative;
-        display: inline-block;
-        cursor: pointer;
-    }
-    .tooltip .tooltiptext {
-        visibility: hidden;
-        width: 400px;
-        background-color: #555;
-        color: #fff;
-        text-align: center;
-        padding: 5px;
-        border-radius: 5px;
-        position: absolute;
-        z-index: 1; /* Set z-index to 1 */
-        left: 10px;
-        top: 100%;
-        opacity: 0;
-        transition: opacity 0.3s;
-    }
-    .tooltip:hover .tooltiptext {
-        visibility: visible;
-        opacity: 1;
-        z-index: 9999; /* Set a high z-index value when hovering */
-    }
-    """
-nltk.download("averaged_perceptron_tagger")
-with gr.Blocks(
-        css=css,
-        title="Bounded Attention demo",
-) as demo:
-    gr.HTML(DESCRIPTION)
-    gr.HTML(COPY_LINK)
-    with gr.Column():
-        gr.HTML("Scroll down to see examples of the required input format.")
-        prompt = gr.Textbox(
-            label="Text prompt",
-        )
-        subject_token_indices = gr.Textbox(
-            label="The token indices of each subject (separate indices for the same subject with commas, and for different subjects with semicolons)",
-        )
-        filter_token_indices = gr.Textbox(
-            label="Optional: The token indices to filter, i.e. conjunctions, numbers, postional relations, etc. (if left empty, this will be automatically inferred)",
-        )
-        num_tokens = gr.Textbox(
-            label="Optional: The number of tokens in the prompt (We use this to verify your input, as sometimes rare words are split into more than one token)",
-        )
-        with gr.Row():
-            sketchpad = gr.Sketchpad(label="Sketch Pad (draw each bounding box in a different layer)")
-            layout_image = gr.Image(type="pil", label="Bounding Boxes", interactive=False)
-        with gr.Row():
-            clear_button = gr.Button(value="Clear")
-            generate_layout_button = gr.Button(value="Generate layout")
-            generate_image_button = gr.Button(value="Generate image")
-        with gr.Row():
-            out_images = gr.Gallery(type="pil", label="Generated Images", interactive=False)
-        with gr.Accordion("Advanced Options", open=False):
-            with gr.Column():
-                gr.HTML(ADVANCED_OPTION_DESCRIPTION)
-                batch_size = gr.Slider(minimum=1, maximum=5, step=1, value=1, label="Number of samples (limited to one sample on current space)")
-                num_guidance_steps = gr.Slider(minimum=5, maximum=20, step=1, value=8, label="Number of timesteps to perform guidance")
-                init_step_size = gr.Slider(minimum=0, maximum=50, step=0.5, value=25, label="Initial step size")
-                final_step_size = gr.Slider(minimum=0, maximum=20, step=0.5, value=10, label="Final step size")
-                num_clusters_per_subject = gr.Slider(minimum=0, maximum=5, step=0.5, value=3, label="Number of clusters per subject")
-                cross_loss_scale = gr.Slider(minimum=0, maximum=2, step=0.1, value=1, label="Cross-attention loss scale factor")
-                self_loss_scale = gr.Slider(minimum=0, maximum=2, step=0.1, value=1, label="Self-attention loss scale factor")
-                num_iterations = gr.Slider(minimum=0, maximum=10, step=1, value=5, label="Number of Gradient Descent iterations")
-                loss_threshold = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.2, label="Loss threshold")
-                classifier_free_guidance_scale = gr.Slider(minimum=0, maximum=50, step=0.5, value=7.5, label="Classifier-free guidance Scale")
-                seed = gr.Slider(minimum=0, maximum=1000, step=1, value=445, label="Random Seed")
-        boxes = gr.State([])
-        clear_button.click(
-            clear,
-            inputs=[batch_size],
-            outputs=[boxes, sketchpad, layout_image, out_images],
-            queue=False,
-        )
-        generate_layout_button.click(
-            draw,
-            inputs=[sketchpad],
-            outputs=[boxes, layout_image],
-            queue=False,
-        )
-        generate_image_button.click(
-            fn=generate,
-            inputs=[
-                prompt, subject_token_indices, filter_token_indices, num_tokens,
-                init_step_size, final_step_size, num_clusters_per_subject, cross_loss_scale, self_loss_scale,
-                classifier_free_guidance_scale, batch_size, num_iterations, loss_threshold, num_guidance_steps,
-                seed,
-                boxes,
-            ],
-            outputs=[out_images],
-            queue=True,
-        )
-    with gr.Column():
-        gr.Examples(
-            examples=[
-                ["a ginger kitten and a gray puppy in a yard", "2,3;6,7", "1,4,5,8,9", "10"],
-                ["a realistic photo of a highway with a semi trailer and a concrete mixer and a helicopter", "9,10;13,14;17", "1,4,5,7,8,11,12,15,16", "17"],
-            ],
-            inputs=[prompt, subject_token_indices, filter_token_indices, num_tokens],
-        )
-    gr.HTML(FOOTNOTE)
-demo.launch(show_api=False, show_error=True)

 from functools import partial
+MODEL_PATH = "stabilityai/stable-diffusion-xl-base-1.0"
 RESOLUTION = 256
 MIN_SIZE = 0.01
 WHITE = 255
 COLORS = ["red", "blue", "green", "orange", "purple", "turquoise", "olive"]
+CSS = """
+#paper-info a {
+    color:#008AD7;
+    text-decoration: none;
+}
+#paper-info a:hover {
+    cursor: pointer;
+    text-decoration: none;
+}
+.tooltip {
+    color: #555;
+    position: relative;
+    display: inline-block;
+    cursor: pointer;
+}
+.tooltip .tooltiptext {
+    visibility: hidden;
+    width: 400px;
+    background-color: #555;
+    color: #fff;
+    text-align: center;
+    padding: 5px;
+    border-radius: 5px;
+    position: absolute;
+    z-index: 1; /* Set z-index to 1 */
+    left: 10px;
+    top: 100%;
+    opacity: 0;
+    transition: opacity 0.3s;
+}
+.tooltip:hover .tooltiptext {
+    visibility: visible;
+    opacity: 1;
+    z-index: 9999; /* Set a high z-index value when hovering */
+}
+"""
 DESCRIPTION = """
     <p style="text-align: center; font-weight: bold;">
     <span style="font-size: 28px">Bounded Attention</span>
 """
 def inference(
+    model,
     boxes,
     prompts,
     subject_token_indices,
     )
     register_attention_editor_diffusers(model, editor)
     images = model(prompts, latents=start_code, guidance_scale=classifier_free_guidance_scale).images
     unregister_attention_editor_diffusers(model)
     model.to(torch.device("cpu"))
+    return images
 @spaces.GPU(duration=300)
 def generate(
+    model,
     prompt,
     subject_token_indices,
     filter_token_indices,
     prompts = [prompt.strip(".").strip(",").strip()] * batch_size
     images = inference(
+        model, boxes, prompts, subject_token_indices, filter_token_indices, num_tokens, init_step_size,
         final_step_size, num_clusters_per_subject, cross_loss_scale, self_loss_scale, classifier_free_guidance_scale,
         num_iterations, loss_threshold, num_guidance_steps, seed)
 def main():
+    nltk.download("averaged_perceptron_tagger")
+    scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False)
+    model = StableDiffusionXLPipeline.from_pretrained(MODEL_PATH, scheduler=scheduler, torch_dtype=torch.float16)
+    model.unet.set_default_attn_processor()
+    model.enable_sequential_cpu_offload()
+    with gr.Blocks(
+            css=CSS,
+            title="Bounded Attention demo",
+    ) as demo:
+        gr.HTML(DESCRIPTION)
+        gr.HTML(COPY_LINK)
+        with gr.Column():
+            gr.HTML("Scroll down to see examples of the required input format.")
+            prompt = gr.Textbox(
+                label="Text prompt",
+            )
+            subject_token_indices = gr.Textbox(
+                label="The token indices of each subject (separate indices for the same subject with commas, and for different subjects with semicolons)",
+            )
+            filter_token_indices = gr.Textbox(
+                label="Optional: The token indices to filter, i.e. conjunctions, numbers, postional relations, etc. (if left empty, this will be automatically inferred)",
+            )
+            num_tokens = gr.Textbox(
+                label="Optional: The number of tokens in the prompt (We use this to verify your input, as sometimes rare words are split into more than one token)",
+            )
+            with gr.Row():
+                sketchpad = gr.Sketchpad(label="Sketch Pad (draw each bounding box in a different layer)")
+                layout_image = gr.Image(type="pil", label="Bounding Boxes", interactive=False)
+            with gr.Row():
+                clear_button = gr.Button(value="Clear")
+                generate_layout_button = gr.Button(value="Generate layout")
+                generate_image_button = gr.Button(value="Generate image")
+            with gr.Row():
+                out_images = gr.Gallery(type="pil", label="Generated Images", interactive=False)
+            with gr.Accordion("Advanced Options", open=False):
+                with gr.Column():
+                    gr.HTML(ADVANCED_OPTION_DESCRIPTION)
+                    batch_size = gr.Slider(minimum=1, maximum=5, step=1, value=1, label="Number of samples (limited to one sample on current space)")
+                    num_guidance_steps = gr.Slider(minimum=5, maximum=20, step=1, value=8, label="Number of timesteps to perform guidance")
+                    init_step_size = gr.Slider(minimum=0, maximum=50, step=0.5, value=25, label="Initial step size")
+                    final_step_size = gr.Slider(minimum=0, maximum=20, step=0.5, value=10, label="Final step size")
+                    num_clusters_per_subject = gr.Slider(minimum=0, maximum=5, step=0.5, value=3, label="Number of clusters per subject")
+                    cross_loss_scale = gr.Slider(minimum=0, maximum=2, step=0.1, value=1, label="Cross-attention loss scale factor")
+                    self_loss_scale = gr.Slider(minimum=0, maximum=2, step=0.1, value=1, label="Self-attention loss scale factor")
+                    num_iterations = gr.Slider(minimum=0, maximum=10, step=1, value=5, label="Number of Gradient Descent iterations")
+                    loss_threshold = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.2, label="Loss threshold")
+                    classifier_free_guidance_scale = gr.Slider(minimum=0, maximum=50, step=0.5, value=7.5, label="Classifier-free guidance Scale")
+                    seed = gr.Slider(minimum=0, maximum=1000, step=1, value=445, label="Random Seed")
+            boxes = gr.State([])
+            clear_button.click(
+                clear,
+                inputs=[batch_size],
+                outputs=[boxes, sketchpad, layout_image, out_images],
+                queue=False,
+            )
+            generate_layout_button.click(
+                draw,
+                inputs=[sketchpad],
+                outputs=[boxes, layout_image],
+                queue=False,
+            )
+            generate_image_button.click(
+                fn=partial(generate, model),
+                inputs=[
+                    prompt, subject_token_indices, filter_token_indices, num_tokens,
+                    init_step_size, final_step_size, num_clusters_per_subject, cross_loss_scale, self_loss_scale,
+                    classifier_free_guidance_scale, batch_size, num_iterations, loss_threshold, num_guidance_steps,
+                    seed,
+                    boxes,
+                ],
+                outputs=[out_images],
+                queue=True,
+            )
+        with gr.Column():
+            gr.Examples(
+                examples=[
+                    ["a ginger kitten and a gray puppy in a yard", "2,3;6,7", "1,4,5,8,9", "10"],
+                    ["a realistic photo of a highway with a semi trailer and a concrete mixer and a helicopter", "9,10;13,14;17", "1,4,5,7,8,11,12,15,16", "17"],
+                ],
+                inputs=[prompt, subject_token_indices, filter_token_indices, num_tokens],
+            )
+        gr.HTML(FOOTNOTE)
+    demo.launch(show_api=False, show_error=True)
+if name == "__main__":
+    main()