img2img_test

Runtime error

App Files Files Community

Gemini899 commited on Feb 25

Commit

fc388c8

verified ·

1 Parent(s): a7d573f

Update flux1_img2img.py

Browse files

Files changed (1) hide show

flux1_img2img.py +49 -56

flux1_img2img.py CHANGED Viewed

@@ -1,58 +1,51 @@
-import os
 import torch
-from diffusers import StableDiffusionImg2ImgPipeline, AutoencoderKL
-from PIL import Image
-# Set environment variable for better CUDA memory management
-os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
-# Select device
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load a lightweight VAE (Tiny VAE) to reduce memory consumption.
-vae = AutoencoderKL.from_pretrained(
-    "madebyollin/taesdxl",  # Replace with your chosen Tiny VAE model ID if different.
-    torch_dtype=torch.float16
-).to(device)
-# Use a lightweight model variant (e.g. FLUX.1-schnell or any lighter SD variant)
-model_id = "black-forest-labs/FLUX.1-schnell"  # Example lightweight model
-pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
-    model_id,
-    torch_dtype=torch.float16,
-    vae=vae,
-    use_safetensors=True
-)
-# Enable memory optimizations
-pipe.enable_model_cpu_offload()
-pipe.enable_attention_slicing()
-# Load and prepare the initial image (resize to lower resolution for speed)
-init_image = Image.open("input.png").convert("RGB")
-init_image = init_image.resize((256, 256), Image.LANCZOS)
-# Define your prompt and parameters
-prompt = "A person in a surreal landscape"
-strength = 0.75          # How much noise is added (0.0 to 1.0)
-num_inference_steps = 25 # Lower steps for faster inference (adjust as needed)
-guidance_scale = 7.5     # How closely to follow the prompt
-seed = 42
-# Setup a random generator for reproducibility
-generator = torch.Generator(device=device).manual_seed(seed)
-# Run the pipeline
-output = pipe(
-    prompt=prompt,
-    image=init_image,
-    strength=strength,
-    num_inference_steps=num_inference_steps,
-    guidance_scale=guidance_scale,
-    generator=generator
-)
-# Save the output image
-output_image = output.images[0]
-output_image.save("output.png")
-print("Output image saved as output.png")

 import torch
+from diffusers import StableDiffusionImg2ImgPipeline
+from PIL import Image
+import sys
+import spaces
+# Defaulting to Stable Diffusion v1.5 here. Adjust model_id as you like.
+@spaces.GPU
+def process_image(
+    image,
+    mask_image,
+    prompt="a person",
+    model_id="runwayml/stable-diffusion-v1-5",
+    strength=0.75,
+    seed=0,
+    num_inference_steps=4
+):
+    print("start process image process_image")
+    if image is None:
+        print("empty input image returned")
+        return None
+    # Load pipeline
+    pipe = StableDiffusionImg2ImgPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+    pipe.to("cuda")
+    # Create generator for reproducible results
+    generator = torch.Generator("cuda").manual_seed(seed)
+    # The mask is not currently used in this snippet (TODO in your code)
+    # If you want to use the mask, you'd switch to an inpainting pipeline or handle the mask in code.
+    print(prompt)
+    output = pipe(
+        prompt=prompt,
+        image=image,
+        generator=generator,
+        strength=strength,
+        guidance_scale=0,
+        num_inference_steps=num_inference_steps
+    )
+    return output.images[0]
+if __name__ == "__main__":
+    # args: input-image input-mask output
+    image = Image.open(sys.argv[1])
+    mask = Image.open(sys.argv[2])
+    output = process_image(image, mask)
+    output.save(sys.argv[3])