img2img_test

Runtime error

App Files Files Community

Gemini899 commited on Feb 25

Commit

1ac594b

verified ·

1 Parent(s): 9a1289b

Update flux1_img2img.py

Browse files

Files changed (1) hide show

flux1_img2img.py +64 -80

flux1_img2img.py CHANGED Viewed

@@ -1,6 +1,4 @@
 import os
-import re
-import sys
 import torch
 import gradio as gr
 from PIL import Image
@@ -9,20 +7,20 @@ import spaces
 from diffusers import FluxImg2ImgPipeline
 ###############################################################################
-# GLOBAL PIPE VARIABLE (lazy-loaded so the Space can start without OOM)
 ###############################################################################
-pipe = None  # We will load this when the user triggers an inference
 ###############################################################################
-# OPTIONAL: Resize Helper for Lower VRAM Usage
 ###############################################################################
-def resize_image(image, max_size=512):
     """
-    Resizes the image so that the max dimension is 'max_size',
-    which helps reduce GPU memory usage on a T4.
     """
     w, h = image.size
-    ratio = min(max_size / w, max_size / h)
     if ratio < 1.0:
         new_w = int(w * ratio)
         new_h = int(h * ratio)
@@ -30,96 +28,93 @@ def resize_image(image, max_size=512):
     return image
 ###############################################################################
-# PIPELINE LOADER: Loads FLUX.1-schnell with memory-saving features
 ###############################################################################
 def load_flux_pipeline():
-    """
-    Lazily loads the FluxImg2ImgPipeline with float16,
-    CPU offload, xFormers (if installed), etc.
-    """
     global pipe
     if pipe is not None:
         return  # Already loaded
-    print("Loading FluxImg2ImgPipeline in float16 mode ...")
-    # Use float16 for T4
     pipe_local = FluxImg2ImgPipeline.from_pretrained(
         "black-forest-labs/FLUX.1-schnell",
-        torch_dtype=torch.float16,
         low_cpu_mem_usage=True
     )
-    # Move to GPU
     pipe_local.to("cuda")
-    # Try enabling xFormers for memory-efficient attention
     try:
         pipe_local.enable_xformers_memory_efficient_attention()
-        print("Enabled xFormers memory efficient attention.")
     except Exception as e:
         print("Could not enable xFormers:", e)
-    # Offload model chunks to CPU if VRAM is tight
     try:
         pipe_local.enable_model_cpu_offload()
-        print("Enabled model CPU offload.")
     except Exception as e:
         print("Could not enable model_cpu_offload:", e)
-    # VAE slicing can reduce peak memory usage
     pipe_local.enable_vae_slicing()
-    pipe_local.max_sequence_length = 256  # same as your original code suggestion
-    print("Flux pipeline loaded successfully.")
     pipe = pipe_local
 ###############################################################################
-# MAIN INFERENCE FUNCTION
 ###############################################################################
 @spaces.GPU
 def process_image(
-    image,
-    mask_image,
-    prompt="a person",
-    model_id="black-forest-labs/FLUX.1-schnell",
     strength=0.75,
     seed=0,
     num_inference_steps=4,
     progress=gr.Progress(track_tqdm=True)
 ):
     """
-    Runs Flux Img2Img with memory-optimized loading.
-    'mask_image' is not currently used.
     """
-    # Let Gradio show progress
-    progress(0, desc="Starting Inference")
     if image is None:
         print("No input image provided.")
         return None
-    # 1) Load pipeline if not loaded
-    load_flux_pipeline()
-    # 2) Resize input to reduce VRAM usage
-    image = resize_image(image, max_size=512)
-    # 3) Prepare generator for reproducible results
     generator = torch.Generator("cuda").manual_seed(seed)
-    # 4) Actually run the pipeline
-    print(f"Running Flux with prompt: '{prompt}' (strength={strength}, steps={num_inference_steps})")
     output = pipe(
         prompt=prompt,
         image=image,
         generator=generator,
         strength=strength,
-        guidance_scale=0,              # same as your original code
         num_inference_steps=num_inference_steps
     )
-    progress(100, desc="Done")
     return output.images[0]
 ###############################################################################
@@ -137,57 +132,46 @@ css = """
 """
 with gr.Blocks(css=css) as demo:
-    gr.Markdown("## Flux Img2Img - Memory-Optimized for T4")
     with gr.Row():
         with gr.Column():
-            image_input = gr.Image(
                 label="Input Image (Img2Img)",
                 type="pil",
                 image_mode="RGB",
                 height=512
             )
-            # The mask is not used in your original code, but we keep it in signature
-            mask_input = gr.Image(
                 label="Mask (unused)",
                 type="pil",
                 image_mode="RGB",
-                height=512
-            )
-            prompt_input = gr.Textbox(label="Prompt", value="a person")
-            strength_slider = gr.Slider(
-                minimum=0.0,
-                maximum=1.0,
-                value=0.75,
-                step=0.05,
-                label="Strength"
-            )
-            seed_box = gr.Number(label="Seed", value=0)
-            steps_box = gr.Slider(
-                minimum=1,
-                maximum=50,
-                value=4,
-                step=1,
-                label="Inference Steps"
             )
-            run_button = gr.Button("Run Flux Img2Img")
         with gr.Column():
-            output_image = gr.Image(label="Output", height=512)
-    # Connect button -> process_image
     run_button.click(
         fn=process_image,
-        inputs=[
-            image_input,
-            mask_input,
-            prompt_input,
-            # model_id is default, so we won't pass it from UI
-            strength_slider,
-            seed_box,
-            steps_box
-        ],
-        outputs=[output_image]
     )
 if __name__ == "__main__":

 import os
 import torch
 import gradio as gr
 from PIL import Image
 from diffusers import FluxImg2ImgPipeline
 ###############################################################################
+# GLOBALS
 ###############################################################################
+pipe = None  # We'll load it lazily to avoid OOM during space startup
 ###############################################################################
+# Helper: Resize the input image
 ###############################################################################
+def resize_image(image: Image.Image, max_dim: int = 512) -> Image.Image:
     """
+    Resizes 'image' so that its largest dimension <= max_dim,
+    preserving aspect ratio. This helps reduce VRAM usage on T4.
     """
     w, h = image.size
+    ratio = min(max_dim / w, max_dim / h)
     if ratio < 1.0:
         new_w = int(w * ratio)
         new_h = int(h * ratio)
     return image
 ###############################################################################
+# Lazy-load function for FLUX.1-schnell pipeline in float16
 ###############################################################################
 def load_flux_pipeline():
     global pipe
     if pipe is not None:
         return  # Already loaded
+    print("Loading FLUX.1-schnell with float16 on T4...")
+    # 1) Load in float16 (NOT bfloat16)
     pipe_local = FluxImg2ImgPipeline.from_pretrained(
         "black-forest-labs/FLUX.1-schnell",
+        torch_dtype=torch.float16,     # crucial for T4
         low_cpu_mem_usage=True
     )
+    # 2) Move to GPU
     pipe_local.to("cuda")
+    # 3) Memory Efficient Attention (xFormers)
     try:
         pipe_local.enable_xformers_memory_efficient_attention()
+        print("xFormers memory efficient attention enabled.")
     except Exception as e:
         print("Could not enable xFormers:", e)
+    # 4) CPU offload (keeps only active layers on GPU)
     try:
         pipe_local.enable_model_cpu_offload()
+        print("Model CPU offload enabled.")
     except Exception as e:
         print("Could not enable model_cpu_offload:", e)
+    # 5) VAE slicing reduces peak memory usage
     pipe_local.enable_vae_slicing()
+    # Save to global
+    pipe_local.max_sequence_length = 256
     pipe = pipe_local
+    print("Flux pipeline loaded successfully.")
 ###############################################################################
+# Main inference function
 ###############################################################################
 @spaces.GPU
 def process_image(
+    image: Image.Image,
+    mask_image: Image.Image,
+    prompt="A person",
     strength=0.75,
     seed=0,
     num_inference_steps=4,
     progress=gr.Progress(track_tqdm=True)
 ):
     """
+    Loads the pipeline if needed, resizes the input image,
+    then runs Flux Img2Img with minimal VRAM usage strategies.
     """
+    progress(0, desc="Preparing model")
+    # 1) Ensure pipeline is loaded
+    load_flux_pipeline()
+    progress(20, desc="Resizing input image")
     if image is None:
         print("No input image provided.")
         return None
+    # 2) Resize the input image to reduce VRAM usage
+    image = resize_image(image, max_dim=512)
+    # 3) Set up generator for reproducible results
     generator = torch.Generator("cuda").manual_seed(seed)
+    # 4) Run the pipeline
+    progress(50, desc="Running Flux Inference")
+    print(f"Prompt: {prompt} | Strength: {strength} | Steps: {num_inference_steps}")
     output = pipe(
         prompt=prompt,
         image=image,
         generator=generator,
         strength=strength,
+        guidance_scale=0,  # matches your original code
         num_inference_steps=num_inference_steps
     )
+    progress(100, desc="Done")
     return output.images[0]
 ###############################################################################
 """
 with gr.Blocks(css=css) as demo:
+    gr.Markdown("## FLUX Img2Img — Memory-Optimized for T4\n"
+                "Using float16, CPU offload, xFormers, and image resizing to reduce VRAM usage.")
     with gr.Row():
         with gr.Column():
+            # The main input image
+            input_image = gr.Image(
                 label="Input Image (Img2Img)",
                 type="pil",
                 image_mode="RGB",
                 height=512
             )
+            # Mask is not used in your code, but we keep it to match your function signature
+            mask_image = gr.Image(
                 label="Mask (unused)",
                 type="pil",
                 image_mode="RGB",
+                height=200
             )
+            prompt = gr.Textbox(label="Prompt", value="A person")
+            strength_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Strength")
+            seed_box = gr.Number(value=0, label="Seed", precision=0)
+            steps_box = gr.Slider(1, 50, value=4, step=1, label="Inference Steps")
+            run_button = gr.Button("Generate")
         with gr.Column():
+            result_image = gr.Image(
+                label="Output",
+                type="pil",
+                height=512
+            )
+    # Tie the button to our inference function
     run_button.click(
         fn=process_image,
+        inputs=[input_image, mask_image, prompt, strength_slider, seed_box, steps_box],
+        outputs=result_image
     )
 if __name__ == "__main__":