Spaces:

OzzyGT
/

diffusers-fast-inpaint

Running on Zero

App Files Files Community

OzzyGT HF Staff commited on 8 days ago

Commit

1fcbe69

1 Parent(s): 2100afd

fix

Browse files

Files changed (3) hide show

README.md +0 -1
app.py +77 -59
requirements.txt +3 -4

README.md CHANGED Viewed

@@ -4,7 +4,6 @@ emoji: 👀
 colorFrom: pink
 colorTo: gray
 sdk: gradio
-sdk_version: 4.42.0
 app_file: app.py
 pinned: false
 license: apache-2.0

 colorFrom: pink
 colorTo: gray
 sdk: gradio
 app_file: app.py
 pinned: false
 license: apache-2.0

app.py CHANGED Viewed

@@ -1,58 +1,66 @@
 import gradio as gr
 import spaces
 import torch
-from diffusers import AutoencoderKL, TCDScheduler
-from diffusers.models.model_loading_utils import load_state_dict
-from gradio_imageslider import ImageSlider
-from huggingface_hub import hf_hub_download
-from controlnet_union import ControlNetModel_Union
-from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
 MODELS = {
     "RealVisXL V5.0 Lightning": "SG161222/RealVisXL_V5.0_Lightning",
 }
-config_file = hf_hub_download(
-    "xinsir/controlnet-union-sdxl-1.0",
-    filename="config_promax.json",
-)
-config = ControlNetModel_Union.load_config(config_file)
-controlnet_model = ControlNetModel_Union.from_config(config)
-model_file = hf_hub_download(
-    "xinsir/controlnet-union-sdxl-1.0",
-    filename="diffusion_pytorch_model_promax.safetensors",
-)
-state_dict = load_state_dict(model_file)
-model, _, _, _, _ = ControlNetModel_Union._load_pretrained_model(
-    controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0"
 )
-model.to(device="cuda", dtype=torch.float16)
-vae = AutoencoderKL.from_pretrained(
-    "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
-).to("cuda")
-pipe = StableDiffusionXLFillPipeline.from_pretrained(
     "SG161222/RealVisXL_V5.0_Lightning",
     torch_dtype=torch.float16,
     vae=vae,
-    controlnet=model,
-    variant="fp16",
 ).to("cuda")
 pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
 @spaces.GPU(duration=24)
-def fill_image(prompt, image, model_selection, paste_back):
     (
         prompt_embeds,
         negative_prompt_embeds,
         pooled_prompt_embeds,
         negative_pooled_prompt_embeds,
-    ) = pipe.encode_prompt(prompt, "cuda", True)
     source = image["background"]
     mask = image["layers"][0]
@@ -62,17 +70,25 @@ def fill_image(prompt, image, model_selection, paste_back):
     cnet_image = source.copy()
     cnet_image.paste(0, (0, 0), binary_mask)
-    for image in pipe(
         prompt_embeds=prompt_embeds,
         negative_prompt_embeds=negative_prompt_embeds,
         pooled_prompt_embeds=pooled_prompt_embeds,
         negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
-        image=cnet_image,
-    ):
-        yield image, cnet_image
-    print(f"{model_selection=}")
-    print(f"{paste_back=}")
     if paste_back:
         image = image.convert("RGBA")
@@ -87,10 +103,8 @@ def clear_result():
     return gr.update(value=None)
-title = """<h1 align="center">Diffusers Fast Inpaint</h1>
 <div align="center">Draw the mask over the subject you want to erase or change and write what you want to inpaint it with.</div>
-<div align="center">This is a lighting model with almost no CFG and 12 steps, so don't expect high quality generations.</div>
-<div align="center">This space is a PoC made for the guide <a href='https://huggingface.co/blog/OzzyGT/diffusers-image-fill'>Diffusers Image Fill</a>.</div>
 """
 with gr.Blocks() as demo:
@@ -99,41 +113,45 @@ with gr.Blocks() as demo:
         with gr.Column():
             prompt = gr.Textbox(
                 label="Prompt",
-                info="Describe what to inpaint the mask with",
-                lines=3,
             )
         with gr.Column():
-            model_selection = gr.Dropdown(
-                choices=list(MODELS.keys()),
-                value="RealVisXL V5.0 Lightning",
-                label="Model",
-            )
             with gr.Row():
-                with gr.Column():
-                    run_button = gr.Button("Generate")
-                with gr.Column():
-                    paste_back = gr.Checkbox(True, label="Paste back original")
     with gr.Row():
         input_image = gr.ImageMask(
-            type="pil", label="Input Image", crop_size=(1024, 1024), layers=False
         )
-        result = ImageSlider(
             interactive=False,
             label="Generated Image",
         )
     use_as_input_button = gr.Button("Use as Input Image", visible=False)
     def use_output_as_input(output_image):
         return gr.update(value=output_image[1])
-    use_as_input_button.click(
-        fn=use_output_as_input, inputs=[result], outputs=[input_image]
-    )
     run_button.click(
         fn=clear_result,
@@ -145,7 +163,7 @@ with gr.Blocks() as demo:
         outputs=use_as_input_button,
     ).then(
         fn=fill_image,
-        inputs=[prompt, input_image, model_selection, paste_back],
         outputs=result,
     ).then(
         fn=lambda: gr.update(visible=True),
@@ -163,7 +181,7 @@ with gr.Blocks() as demo:
         outputs=use_as_input_button,
     ).then(
         fn=fill_image,
-        inputs=[prompt, input_image, model_selection, paste_back],
         outputs=result,
     ).then(
         fn=lambda: gr.update(visible=True),

 import gradio as gr
 import spaces
 import torch
+from diffusers import AutoencoderKL, ControlNetUnionModel, DiffusionPipeline, TCDScheduler
+def callback_cfg_cutoff(pipeline, step_index, timestep, callback_kwargs):
+    if step_index == int(pipeline.num_timesteps * 0.2):
+        prompt_embeds = callback_kwargs["prompt_embeds"]
+        prompt_embeds = prompt_embeds[-1:]
+        add_text_embeds = callback_kwargs["add_text_embeds"]
+        add_text_embeds = add_text_embeds[-1:]
+        add_time_ids = callback_kwargs["add_time_ids"]
+        add_time_ids = add_time_ids[-1:]
+        control_image = callback_kwargs["control_image"]
+        control_image[0] = control_image[0][-1:]
+        control_type = callback_kwargs["control_type"]
+        control_type = control_type[-1:]
+        pipeline._guidance_scale = 0.0
+        callback_kwargs["prompt_embeds"] = prompt_embeds
+        callback_kwargs["add_text_embeds"] = add_text_embeds
+        callback_kwargs["add_time_ids"] = add_time_ids
+        callback_kwargs["control_image"] = control_image
+        callback_kwargs["control_type"] = control_type
+    return callback_kwargs
 MODELS = {
     "RealVisXL V5.0 Lightning": "SG161222/RealVisXL_V5.0_Lightning",
 }
+controlnet_model = ControlNetUnionModel.from_pretrained(
+    "OzzyGT/controlnet-union-promax-sdxl-1.0", variant="fp16", torch_dtype=torch.float16
 )
+controlnet_model.to(device="cuda", dtype=torch.float16)
+vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to("cuda")
+pipe = DiffusionPipeline.from_pretrained(
     "SG161222/RealVisXL_V5.0_Lightning",
     torch_dtype=torch.float16,
     vae=vae,
+    controlnet=controlnet_model,
+    custom_pipeline="OzzyGT/custom_sdxl_cnet_union",
 ).to("cuda")
 pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
 @spaces.GPU(duration=24)
+def fill_image(prompt, negative_prompt, image, model_selection, paste_back):
     (
         prompt_embeds,
         negative_prompt_embeds,
         pooled_prompt_embeds,
         negative_pooled_prompt_embeds,
+    ) = pipe.encode_prompt(prompt, device="cuda", negative_prompt=negative_prompt)
     source = image["background"]
     mask = image["layers"][0]
     cnet_image = source.copy()
     cnet_image.paste(0, (0, 0), binary_mask)
+    image = pipe(
         prompt_embeds=prompt_embeds,
         negative_prompt_embeds=negative_prompt_embeds,
         pooled_prompt_embeds=pooled_prompt_embeds,
         negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
+        control_image=[cnet_image],
+        controlnet_conditioning_scale=[1.0],
+        control_mode=[7],
+        num_inference_steps=8,
+        guidance_scale=1.5,
+        callback_on_step_end=callback_cfg_cutoff,
+        callback_on_step_end_tensor_inputs=[
+            "prompt_embeds",
+            "add_text_embeds",
+            "add_time_ids",
+            "control_image",
+            "control_type",
+        ],
+    ).images[0]
     if paste_back:
         image = image.convert("RGBA")
     return gr.update(value=None)
+title = """<h2 align="center">Diffusers Fast Inpaint</h2>
 <div align="center">Draw the mask over the subject you want to erase or change and write what you want to inpaint it with.</div>
 """
 with gr.Blocks() as demo:
         with gr.Column():
             prompt = gr.Textbox(
                 label="Prompt",
+                lines=1,
             )
         with gr.Column():
             with gr.Row():
+                negative_prompt = gr.Textbox(
+                    label="Negative Prompt",
+                    lines=1,
+                )
+    with gr.Row():
+        with gr.Column():
+            run_button = gr.Button("Generate")
+        with gr.Column():
+            paste_back = gr.Checkbox(True, label="Paste back original")
     with gr.Row():
         input_image = gr.ImageMask(
+            type="pil",
+            label="Input Image",
+            crop_size=(1024, 1024),
+            canvas_size=(1024, 1024),
+            layers=False,
+            height=512,
         )
+        result = gr.ImageSlider(
             interactive=False,
             label="Generated Image",
         )
     use_as_input_button = gr.Button("Use as Input Image", visible=False)
+    model_selection = gr.Dropdown(choices=list(MODELS.keys()), value="RealVisXL V5.0 Lightning", label="Model")
     def use_output_as_input(output_image):
         return gr.update(value=output_image[1])
+    use_as_input_button.click(fn=use_output_as_input, inputs=[result], outputs=[input_image])
     run_button.click(
         fn=clear_result,
         outputs=use_as_input_button,
     ).then(
         fn=fill_image,
+        inputs=[prompt, negative_prompt, input_image, model_selection, paste_back],
         outputs=result,
     ).then(
         fn=lambda: gr.update(visible=True),
         outputs=use_as_input_button,
     ).then(
         fn=fill_image,
+        inputs=[prompt, negative_prompt, input_image, model_selection, paste_back],
         outputs=result,
     ).then(
         fn=lambda: gr.update(visible=True),

requirements.txt CHANGED Viewed

@@ -1,10 +1,9 @@
 torch
 spaces
-gradio==4.42.0
-gradio-imageslider
-numpy==1.26.4
 transformers
 accelerate
 diffusers
-fastapi<0.113.0
 opencv-python

 torch
 spaces
+gradio
+numpy
 transformers
 accelerate
 diffusers
+fastapi
 opencv-python