img2img_test

Runtime error

App Files Files Community

Gemini899 commited on Feb 25

Commit

c20ce4a

verified ·

1 Parent(s): e05e986

Update flux1_img2img.py

Browse files

Files changed (1) hide show

flux1_img2img.py +47 -151

flux1_img2img.py CHANGED Viewed

@@ -1,24 +1,18 @@
 import os
 import torch
-import gradio as gr
 from PIL import Image
 import spaces
-from diffusers import FluxImg2ImgPipeline
-###############################################################################
-# GLOBALS
-###############################################################################
-pipe = None  # We'll load it lazily to avoid OOM during space startup
-###############################################################################
-# Helper: Resize the input image
-###############################################################################
 def resize_image(image: Image.Image, max_dim: int = 512) -> Image.Image:
-    """
-    Resizes 'image' so that its largest dimension <= max_dim,
-    preserving aspect ratio. This helps reduce VRAM usage on T4.
-    """
     w, h = image.size
     ratio = min(max_dim / w, max_dim / h)
     if ratio < 1.0:
@@ -27,152 +21,54 @@ def resize_image(image: Image.Image, max_dim: int = 512) -> Image.Image:
         image = image.resize((new_w, new_h), Image.LANCZOS)
     return image
-###############################################################################
-# Lazy-load function for FLUX.1-schnell pipeline in float16
-###############################################################################
-def load_flux_pipeline():
     global pipe
-    if pipe is not None:
-        return  # Already loaded
-    print("Loading FLUX.1-schnell with float16 on T4...")
-    # 1) Load in float16 (NOT bfloat16)
-    pipe_local = FluxImg2ImgPipeline.from_pretrained(
-        "black-forest-labs/FLUX.1-schnell",
-        torch_dtype=torch.float16,     # crucial for T4
-        low_cpu_mem_usage=True
-    )
-    # 2) Move to GPU
-    pipe_local.to("cuda")
-    # 3) Memory Efficient Attention (xFormers)
-    try:
-        pipe_local.enable_xformers_memory_efficient_attention()
-        print("xFormers memory efficient attention enabled.")
-    except Exception as e:
-        print("Could not enable xFormers:", e)
-    # 4) CPU offload (keeps only active layers on GPU)
-    try:
-        pipe_local.enable_model_cpu_offload()
-        print("Model CPU offload enabled.")
-    except Exception as e:
-        print("Could not enable model_cpu_offload:", e)
-    # 5) VAE slicing reduces peak memory usage
-    pipe_local.enable_vae_slicing()
-    # Save to global
-    pipe_local.max_sequence_length = 256
-    pipe = pipe_local
-    print("Flux pipeline loaded successfully.")
-###############################################################################
-# Main inference function
-###############################################################################
 @spaces.GPU
-def process_image(
-    image: Image.Image,
-    mask_image: Image.Image,
-    prompt="A person",
-    strength=0.75,
-    seed=0,
-    num_inference_steps=4,
-    progress=gr.Progress(track_tqdm=True)
-):
-    """
-    Loads the pipeline if needed, resizes the input image,
-    then runs Flux Img2Img with minimal VRAM usage strategies.
-    """
-    progress(0, desc="Preparing model")
-    # 1) Ensure pipeline is loaded
-    load_flux_pipeline()
-    progress(20, desc="Resizing input image")
     if image is None:
-        print("No input image provided.")
         return None
-    # 2) Resize the input image to reduce VRAM usage
     image = resize_image(image, max_dim=512)
-    # 3) Set up generator for reproducible results
     generator = torch.Generator("cuda").manual_seed(seed)
-    # 4) Run the pipeline
-    progress(50, desc="Running Flux Inference")
-    print(f"Prompt: {prompt} | Strength: {strength} | Steps: {num_inference_steps}")
-    output = pipe(
-        prompt=prompt,
-        image=image,
-        generator=generator,
-        strength=strength,
-        guidance_scale=0,  # matches your original code
-        num_inference_steps=num_inference_steps
-    )
-    progress(100, desc="Done")
-    return output.images[0]
-###############################################################################
-# BUILD THE GRADIO UI
-###############################################################################
-css = """
-#col-left {
-    margin: 0 auto;
-    max-width: 640px;
-}
-#col-right {
-    margin: 0 auto;
-    max-width: 640px;
-}
-"""
-with gr.Blocks(css=css) as demo:
-    gr.Markdown("## FLUX Img2Img — Memory-Optimized for T4\n"
-                "Using float16, CPU offload, xFormers, and image resizing to reduce VRAM usage.")
-    with gr.Row():
-        with gr.Column():
-            # The main input image
-            input_image = gr.Image(
-                label="Input Image (Img2Img)",
-                type="pil",
-                image_mode="RGB",
-                height=512
             )
-            # Mask is not used in your code, but we keep it to match your function signature
-            mask_image = gr.Image(
-                label="Mask (unused)",
-                type="pil",
-                image_mode="RGB",
-                height=200
-            )
-            prompt = gr.Textbox(label="Prompt", value="A person")
-            strength_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Strength")
-            seed_box = gr.Number(value=0, label="Seed", precision=0)
-            steps_box = gr.Slider(1, 50, value=4, step=1, label="Inference Steps")
-            run_button = gr.Button("Generate")
-        with gr.Column():
-            result_image = gr.Image(
-                label="Output",
-                type="pil",
-                height=512
-            )
-    # Tie the button to our inference function
-    run_button.click(
-        fn=process_image,
-        inputs=[input_image, mask_image, prompt, strength_slider, seed_box, steps_box],
-        outputs=result_image
-    )
 if __name__ == "__main__":
-    demo.launch(share=True)

 import os
 import torch
+from diffusers import FluxImg2ImgPipeline
 from PIL import Image
+import sys
 import spaces
+# Set memory optimization flags
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
+# Global pipe variable for lazy loading
+pipe = None
 def resize_image(image: Image.Image, max_dim: int = 512) -> Image.Image:
+    """Resizes image to fit within max_dim while preserving aspect ratio"""
     w, h = image.size
     ratio = min(max_dim / w, max_dim / h)
     if ratio < 1.0:
         image = image.resize((new_w, new_h), Image.LANCZOS)
     return image
+def get_pipe(model_id="black-forest-labs/FLUX.1-schnell"):
     global pipe
+    if pipe is None:
+        pipe = FluxImg2ImgPipeline.from_pretrained(
+            model_id,
+            torch_dtype=torch.float16,
+            variant="fp16"
+        ).to("cuda")
+    return pipe
 @spaces.GPU
+def process_image(image, mask_image, prompt="a person", model_id="black-forest-labs/FLUX.1-schnell", strength=0.75, seed=0, num_inference_steps=4):
+    print("start process image process_image")
     if image is None:
+        print("empty input image returned")
         return None
+    # Resize image to reduce memory usage
     image = resize_image(image, max_dim=512)
+    # Get model using lazy loading
+    model = get_pipe(model_id)
+    generators = []
     generator = torch.Generator("cuda").manual_seed(seed)
+    generators.append(generator)
+    # Use autocast for better memory efficiency
+    with torch.cuda.amp.autocast(dtype=torch.float16):
+        with torch.no_grad():
+            # more parameter see https://huggingface.co/docs/diffusers/api/pipelines/flux#diffusers.FluxInpaintPipeline
+            print(prompt)
+            output = model(
+                prompt=prompt,
+                image=image,
+                generator=generator,
+                strength=strength,
+                guidance_scale=0,
+                num_inference_steps=num_inference_steps,
+                max_sequence_length=256
             )
+    # TODO support mask
+    return output.images[0]
 if __name__ == "__main__":
+    #args input-image input-mask output
+    image = Image.open(sys.argv[1])
+    mask = Image.open(sys.argv[2])
+    output = process_image(image, mask)
+    output.save(sys.argv[3])