img2img_test

Runtime error

App Files Files Community

Gemini899 commited on Feb 25

Commit

9a1289b

verified ·

1 Parent(s): b1bb2b0

Update flux1_img2img.py

Browse files

Files changed (1) hide show

flux1_img2img.py +158 -41

flux1_img2img.py CHANGED Viewed

@@ -1,18 +1,81 @@
 import torch
-from diffusers import FluxImg2ImgPipeline
 from PIL import Image
-import sys
 import spaces
-def resize_image(image, max_res=512):
     w, h = image.size
-    ratio = min(max_res / w, max_res / h)
     if ratio < 1.0:
         new_w = int(w * ratio)
         new_h = int(h * ratio)
         image = image.resize((new_w, new_h), Image.LANCZOS)
     return image
 @spaces.GPU
 def process_image(
     image,
@@ -21,57 +84,111 @@ def process_image(
     model_id="black-forest-labs/FLUX.1-schnell",
     strength=0.75,
     seed=0,
-    num_inference_steps=4
 ):
-    print("start process image process_image")
     if image is None:
-        print("empty input image returned")
         return None
-    # Try resizing input to reduce VRAM usage
-    image = resize_image(image, 512)
-    # Load with float16
-    pipe = FluxImg2ImgPipeline.from_pretrained(
-        model_id,
-        torch_dtype=torch.float16
-    ).to("cuda")
-    # If xFormers installed, enable memory efficient attention
-    try:
-        pipe.enable_xformers_memory_efficient_attention()
-        print("Enabled xFormers memory efficient attention.")
-    except Exception as e:
-        print("Could not enable xFormers:", e)
-    # Enable CPU offload to reduce VRAM usage
-    # (Pick either model_cpu_offload or sequential_cpu_offload)
-    try:
-        pipe.enable_model_cpu_offload()
-    except Exception as e:
-        print("Could not enable model_cpu_offload:", e)
-    # Optional: enable VAE slicing
-    pipe.enable_vae_slicing()
     generator = torch.Generator("cuda").manual_seed(seed)
-    print(f"Prompt: {prompt}")
     output = pipe(
         prompt=prompt,
         image=image,
         generator=generator,
         strength=strength,
-        guidance_scale=0,
-        num_inference_steps=num_inference_steps,
-        max_sequence_length=256
     )
     return output.images[0]
 if __name__ == "__main__":
-    image = Image.open(sys.argv[1]).convert("RGB")
-    mask = Image.open(sys.argv[2]).convert("RGB")  # unused
-    result = process_image(image, mask)
-    if result:
-        result.save(sys.argv[3])

+import os
+import re
+import sys
 import torch
+import gradio as gr
 from PIL import Image
 import spaces
+from diffusers import FluxImg2ImgPipeline
+###############################################################################
+# GLOBAL PIPE VARIABLE (lazy-loaded so the Space can start without OOM)
+###############################################################################
+pipe = None  # We will load this when the user triggers an inference
+###############################################################################
+# OPTIONAL: Resize Helper for Lower VRAM Usage
+###############################################################################
+def resize_image(image, max_size=512):
+    """
+    Resizes the image so that the max dimension is 'max_size',
+    which helps reduce GPU memory usage on a T4.
+    """
     w, h = image.size
+    ratio = min(max_size / w, max_size / h)
     if ratio < 1.0:
         new_w = int(w * ratio)
         new_h = int(h * ratio)
         image = image.resize((new_w, new_h), Image.LANCZOS)
     return image
+###############################################################################
+# PIPELINE LOADER: Loads FLUX.1-schnell with memory-saving features
+###############################################################################
+def load_flux_pipeline():
+    """
+    Lazily loads the FluxImg2ImgPipeline with float16,
+    CPU offload, xFormers (if installed), etc.
+    """
+    global pipe
+    if pipe is not None:
+        return  # Already loaded
+    print("Loading FluxImg2ImgPipeline in float16 mode ...")
+    # Use float16 for T4
+    pipe_local = FluxImg2ImgPipeline.from_pretrained(
+        "black-forest-labs/FLUX.1-schnell",
+        torch_dtype=torch.float16,
+        low_cpu_mem_usage=True
+    )
+    # Move to GPU
+    pipe_local.to("cuda")
+    # Try enabling xFormers for memory-efficient attention
+    try:
+        pipe_local.enable_xformers_memory_efficient_attention()
+        print("Enabled xFormers memory efficient attention.")
+    except Exception as e:
+        print("Could not enable xFormers:", e)
+    # Offload model chunks to CPU if VRAM is tight
+    try:
+        pipe_local.enable_model_cpu_offload()
+        print("Enabled model CPU offload.")
+    except Exception as e:
+        print("Could not enable model_cpu_offload:", e)
+    # VAE slicing can reduce peak memory usage
+    pipe_local.enable_vae_slicing()
+    pipe_local.max_sequence_length = 256  # same as your original code suggestion
+    print("Flux pipeline loaded successfully.")
+    pipe = pipe_local
+###############################################################################
+# MAIN INFERENCE FUNCTION
+###############################################################################
 @spaces.GPU
 def process_image(
     image,
     model_id="black-forest-labs/FLUX.1-schnell",
     strength=0.75,
     seed=0,
+    num_inference_steps=4,
+    progress=gr.Progress(track_tqdm=True)
 ):
+    """
+    Runs Flux Img2Img with memory-optimized loading.
+    'mask_image' is not currently used.
+    """
+    # Let Gradio show progress
+    progress(0, desc="Starting Inference")
     if image is None:
+        print("No input image provided.")
         return None
+    # 1) Load pipeline if not loaded
+    load_flux_pipeline()
+    # 2) Resize input to reduce VRAM usage
+    image = resize_image(image, max_size=512)
+    # 3) Prepare generator for reproducible results
     generator = torch.Generator("cuda").manual_seed(seed)
+    # 4) Actually run the pipeline
+    print(f"Running Flux with prompt: '{prompt}' (strength={strength}, steps={num_inference_steps})")
     output = pipe(
         prompt=prompt,
         image=image,
         generator=generator,
         strength=strength,
+        guidance_scale=0,              # same as your original code
+        num_inference_steps=num_inference_steps
     )
+    progress(100, desc="Done")
     return output.images[0]
+###############################################################################
+# BUILD THE GRADIO UI
+###############################################################################
+css = """
+#col-left {
+    margin: 0 auto;
+    max-width: 640px;
+}
+#col-right {
+    margin: 0 auto;
+    max-width: 640px;
+}
+"""
+with gr.Blocks(css=css) as demo:
+    gr.Markdown("## Flux Img2Img - Memory-Optimized for T4")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(
+                label="Input Image (Img2Img)",
+                type="pil",
+                image_mode="RGB",
+                height=512
+            )
+            # The mask is not used in your original code, but we keep it in signature
+            mask_input = gr.Image(
+                label="Mask (unused)",
+                type="pil",
+                image_mode="RGB",
+                height=512
+            )
+            prompt_input = gr.Textbox(label="Prompt", value="a person")
+            strength_slider = gr.Slider(
+                minimum=0.0,
+                maximum=1.0,
+                value=0.75,
+                step=0.05,
+                label="Strength"
+            )
+            seed_box = gr.Number(label="Seed", value=0)
+            steps_box = gr.Slider(
+                minimum=1,
+                maximum=50,
+                value=4,
+                step=1,
+                label="Inference Steps"
+            )
+            run_button = gr.Button("Run Flux Img2Img")
+        with gr.Column():
+            output_image = gr.Image(label="Output", height=512)
+    # Connect button -> process_image
+    run_button.click(
+        fn=process_image,
+        inputs=[
+            image_input,
+            mask_input,
+            prompt_input,
+            # model_id is default, so we won't pass it from UI
+            strength_slider,
+            seed_box,
+            steps_box
+        ],
+        outputs=[output_image]
+    )
 if __name__ == "__main__":
+    demo.launch(share=True)