img2img_test

Runtime error

App Files Files Community

Gemini899 commited on Feb 25

Commit

e05e986

verified ·

1 Parent(s): 8bd5dc7

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -201

app.py CHANGED Viewed

@@ -3,34 +3,42 @@ import gradio as gr
 import re
 from PIL import Image
 import os
 import numpy as np
 import torch
-# We'll lazy-load FluxImg2ImgPipeline
 from diffusers import FluxImg2ImgPipeline
-###############################################################################
-# GLOBAL PIPELINE REFERENCE (start as None, so we only load on first inference)
-###############################################################################
 pipe = None
-###############################################################################
-# HELPER FUNCTIONS
-###############################################################################
 def sanitize_prompt(prompt):
-    # Allow only alphanumeric characters, spaces, and basic punctuation
-    allowed_chars = re.compile(r"[^a-zA-Z0-9\s.,!?-]")
-    return allowed_chars.sub("", prompt)
-def convert_to_fit_size(original_width_and_height, maximum_size=512):
-    """
-    Resizes the image so its largest dimension = maximum_size (default 512).
-    Lower resolution => less VRAM usage.
-    """
     width, height = original_width_and_height
     if width <= maximum_size and height <= maximum_size:
         return width, height
     if width > height:
         scaling_factor = maximum_size / width
     else:
@@ -41,123 +49,72 @@ def convert_to_fit_size(original_width_and_height, maximum_size=512):
     return new_width, new_height
 def adjust_to_multiple_of_32(width: int, height: int):
-    """
-    Snap dimensions down to multiples of 32 (common for diffusion pipelines).
-    """
     width = width - (width % 32)
     height = height - (height % 32)
-    return max(width, 32), max(height, 32)
-def load_flux_pipeline():
-    """
-    Lazy-load the FluxImg2ImgPipeline in float16 with memory-saving features.
-    """
-    global pipe
-    if pipe is not None:
-        return pipe  # Already loaded
-    print("Loading FluxImg2ImgPipeline in float16...")
-    # 1) Load the pipeline using float16
-    local_pipe = FluxImg2ImgPipeline.from_pretrained(
-        "black-forest-labs/FLUX.1-schnell",
-        torch_dtype=torch.float16,       # IMPORTANT: no bfloat16
-        low_cpu_mem_usage=True
-    )
-    local_pipe.to("cuda")
-    # 2) Enable memory-efficient attention (xFormers), if installed
-    try:
-        local_pipe.enable_xformers_memory_efficient_attention()
-        print("xFormers memory efficient attention enabled.")
-    except Exception as e:
-        print("Could not enable xFormers:", e)
-    # 3) CPU offload (keeps only active layers on GPU)
-    try:
-        local_pipe.enable_model_cpu_offload()
-        print("CPU offload enabled.")
-    except Exception as e:
-        print("Could not enable model_cpu_offload:", e)
-    # 4) VAE slicing reduces peak memory usage
-    local_pipe.enable_vae_slicing()
-    # 5) Optionally set max sequence length (like your original code)
-    local_pipe.max_sequence_length = 256
-    pipe = local_pipe
-    print("Flux pipeline loaded successfully (float16).")
-    return pipe
-###############################################################################
-# MAIN INFERENCE FUNCTION
-###############################################################################
 @spaces.GPU(duration=120)
-def process_images(
-    image,
-    prompt="a girl",
-    strength=0.75,
-    seed=0,
-    inference_step=4,
-    progress=gr.Progress(track_tqdm=True)
-):
     progress(0, desc="Starting")
-    # 1) Lazy-load the pipeline
-    local_pipe = load_flux_pipeline()
-    # 2) If no image provided
-    if image is None:
-        print("No input image provided.")
-        return None
-    # 3) Resize input to reduce VRAM usage
-    fit_width, fit_height = convert_to_fit_size(image.size, maximum_size=512)
-    width, height = adjust_to_multiple_of_32(fit_width, fit_height)
-    # Use high-quality Lanczos resizing
-    image = image.resize((width, height), Image.LANCZOS)
-    # 4) Create generator for reproducibility
-    generator = torch.Generator("cuda").manual_seed(seed)
-    # 5) Actually run flux img2img
-    progress(50, desc="Running flux img2img")
-    print(f"Prompt: {prompt}, strength={strength}, steps={inference_step}")
-    output = local_pipe(
-        prompt=prompt,
-        image=image,
-        generator=generator,
-        strength=strength,
-        guidance_scale=0,              # same as your original code
-        num_inference_steps=inference_step,
-        # We don't explicitly pass width & height. If you want, remove them or keep them:
-        # width=width,
-        # height=height,
-    )
-    pil_image = output.images[0]
-    # 6) If the new image was forcibly changed shape by the model,
-    #    we can re-resize back to (fit_width, fit_height).
-    #    Usually not necessary with flux, but keep the logic if you want.
-    new_w, new_h = pil_image.size
-    if (new_w != fit_width) or (new_h != fit_height):
-        pil_image = pil_image.resize((fit_width, fit_height), Image.LANCZOS)
-    progress(100, desc="Done")
-    return pil_image
-###############################################################################
-# GRADIO APP
-###############################################################################
 def read_file(path: str) -> str:
     with open(path, 'r', encoding='utf-8') as f:
-        return f.read()
-css = """
 #col-left {
     margin: 0 auto;
     max-width: 640px;
@@ -172,100 +129,60 @@ css = """
   justify-content: center;
   gap:10px
 }
 .image {
   width: 128px;
   height: 128px;
   object-fit: cover;
 }
 .text {
   font-size: 16px;
 }
 """
 with gr.Blocks(css=css, elem_id="demo-container") as demo:
-    # Optionally load some HTML from files
-    try:
         gr.HTML(read_file("demo_header.html"))
-    except:
-        pass
-    try:
         gr.HTML(read_file("demo_tools.html"))
-    except:
-        pass
     with gr.Row():
-        with gr.Column():
-            image = gr.Image(
-                height=800,
-                sources=['upload','clipboard'],
-                image_mode='RGB',
-                elem_id="image_upload",
-                type="pil",
-                label="Upload"
-            )
-            with gr.Row(elem_id="prompt-container", equal_height=False):
-                prompt = gr.Textbox(
-                    label="Prompt",
-                    value="a woman",
-                    placeholder="Enter your prompt here",
-                    elem_id="prompt"
-                )
-            btn = gr.Button("Img2Img", elem_id="run_button", variant="primary")
-            with gr.Accordion(label="Advanced Settings", open=False):
-                with gr.Row(equal_height=True):
-                    strength = gr.Number(
-                        value=0.75,
-                        minimum=0,
-                        maximum=0.75,
-                        step=0.01,
-                        label="strength"
-                    )
-                    seed = gr.Number(
-                        value=100,
-                        minimum=0,
-                        step=1,
-                        label="seed"
-                    )
-                    inference_step = gr.Number(
-                        value=4,
-                        minimum=1,
-                        step=1,
-                        label="inference_step"
-                    )
-                id_input = gr.Text(label="Name", visible=False)
-        with gr.Column():
-            image_out = gr.Image(
-                height=800,
-                sources=[],
-                label="Output",
-                elem_id="output-img",
-                format="jpg"
-            )
-    # Provide example inputs if desired
     gr.Examples(
-        examples=[
-            ["examples/draw_input.jpg", None, "a woman, eyes closed, mouth opened"],
-            ["examples/gimp_input.jpg", None, "a woman, hand on neck"]
-        ],
-        inputs=[image, image_out, prompt],
     )
-    # Possibly load a footer HTML
-    try:
-        gr.HTML(read_file("demo_footer.html"))
-    except:
-        pass
-    # Link UI events to process_images
     gr.on(
         triggers=[btn.click, prompt.submit],
-        fn=process_images,
-        inputs=[image, prompt, strength, seed, inference_step],
-        outputs=[image_out]
     )
 if __name__ == "__main__":
-    demo.launch(share=True, show_error=True)

 import re
 from PIL import Image
 import os
+# Set memory optimization flags
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 import numpy as np
 import torch
 from diffusers import FluxImg2ImgPipeline
+# Global pipe variable for lazy loading
 pipe = None
+# Use float16 instead of bfloat16 for T4 compatibility
+dtype = torch.float16
+device = "cuda" if torch.cuda.is_available() else "cpu"
+def get_pipe():
+    global pipe
+    if pipe is None:
+        pipe = FluxImg2ImgPipeline.from_pretrained(
+            "black-forest-labs/FLUX.1-schnell",
+            torch_dtype=torch.float16,
+            variant="fp16"
+        ).to(device)
+    return pipe
 def sanitize_prompt(prompt):
+  # Allow only alphanumeric characters, spaces, and basic punctuation
+  allowed_chars = re.compile(r"[^a-zA-Z0-9\s.,!?-]")
+  sanitized_prompt = allowed_chars.sub("", prompt)
+  return sanitized_prompt
+def convert_to_fit_size(original_width_and_height, maximum_size = 1024):
     width, height = original_width_and_height
     if width <= maximum_size and height <= maximum_size:
         return width, height
     if width > height:
         scaling_factor = maximum_size / width
     else:
     return new_width, new_height
 def adjust_to_multiple_of_32(width: int, height: int):
     width = width - (width % 32)
     height = height - (height % 32)
+    return width, height
+def resize_image(image: Image.Image, max_dim: int = 512) -> Image.Image:
+    """Resizes image to fit within max_dim while preserving aspect ratio"""
+    w, h = image.size
+    ratio = min(max_dim / w, max_dim / h)
+    if ratio < 1.0:
+        new_w = int(w * ratio)
+        new_h = int(h * ratio)
+        image = image.resize((new_w, new_h), Image.LANCZOS)
+    return image
 @spaces.GPU(duration=120)
+def process_images(image, prompt="a girl", strength=0.75, seed=0, inference_step=4, progress=gr.Progress(track_tqdm=True)):
     progress(0, desc="Starting")
+    # Get the model using lazy loading
+    model = get_pipe()
+    def process_img2img(image, prompt="a person", strength=0.75, seed=0, num_inference_steps=4):
+        if image is None:
+            print("empty input image returned")
+            return None
+        # Resize image to reduce memory usage
+        image = resize_image(image, max_dim=512)
+        generator = torch.Generator(device).manual_seed(seed)
+        fit_width, fit_height = convert_to_fit_size(image.size, maximum_size=512)
+        width, height = adjust_to_multiple_of_32(fit_width, fit_height)
+        image = image.resize((width, height), Image.LANCZOS)
+        # Use autocast for better memory efficiency
+        with torch.cuda.amp.autocast(dtype=torch.float16):
+            with torch.no_grad():
+                output = model(
+                    prompt=prompt,
+                    image=image,
+                    generator=generator,
+                    strength=strength,
+                    width=width,
+                    height=height,
+                    guidance_scale=0,
+                    num_inference_steps=num_inference_steps,
+                    max_sequence_length=256
+                )
+        pil_image = output.images[0]
+        new_width, new_height = pil_image.size
+        if (new_width != fit_width) or (new_height != fit_height):
+            resized_image = pil_image.resize((fit_width, fit_height), Image.LANCZOS)
+            return resized_image
+        return pil_image
+    output = process_img2img(image, prompt, strength, seed, inference_step)
+    return output
 def read_file(path: str) -> str:
     with open(path, 'r', encoding='utf-8') as f:
+        content = f.read()
+    return content
+css="""
 #col-left {
     margin: 0 auto;
     max-width: 640px;
   justify-content: center;
   gap:10px
 }
 .image {
   width: 128px;
   height: 128px;
   object-fit: cover;
 }
 .text {
   font-size: 16px;
 }
 """
 with gr.Blocks(css=css, elem_id="demo-container") as demo:
+    with gr.Column():
         gr.HTML(read_file("demo_header.html"))
         gr.HTML(read_file("demo_tools.html"))
     with gr.Row():
+                with gr.Column():
+                    image = gr.Image(height=800,sources=['upload','clipboard'],image_mode='RGB', elem_id="image_upload", type="pil", label="Upload")
+                    with gr.Row(elem_id="prompt-container",  equal_height=False):
+                        with gr.Row():
+                            prompt = gr.Textbox(label="Prompt",value="a women",placeholder="Your prompt (what you want in place of what is erased)", elem_id="prompt")
+                    btn = gr.Button("Img2Img", elem_id="run_button",variant="primary")
+                    with gr.Accordion(label="Advanced Settings", open=False):
+                        with gr.Row( equal_height=True):
+                            strength = gr.Number(value=0.75, minimum=0, maximum=0.75, step=0.01, label="strength")
+                            seed = gr.Number(value=100, minimum=0, step=1, label="seed")
+                            inference_step = gr.Number(value=4, minimum=1, step=4, label="inference_step")
+                        id_input=gr.Text(label="Name", visible=False)
+                with gr.Column():
+                    image_out = gr.Image(height=800,sources=[],label="Output", elem_id="output-img",format="jpg")
     gr.Examples(
+               examples=[
+                    ["examples/draw_input.jpg", "examples/draw_output.jpg","a women ,eyes closed,mouth opened"],
+                    ["examples/draw-gimp_input.jpg", "examples/draw-gimp_output.jpg","a women ,eyes closed,mouth opened"],
+                    ["examples/gimp_input.jpg", "examples/gimp_output.jpg","a women ,hand on neck"],
+                    ["examples/inpaint_input.jpg", "examples/inpaint_output.jpg","a women ,hand on neck"]
+                         ]
+,
+                inputs=[image,image_out,prompt],
+    )
+    gr.HTML(
+       gr.HTML(read_file("demo_footer.html"))
     )
     gr.on(
         triggers=[btn.click, prompt.submit],
+        fn = process_images,
+        inputs = [image,prompt,strength,seed,inference_step],
+        outputs = [image_out]
     )
 if __name__ == "__main__":
+    demo.launch(share=True, show_error=True)