Gemini899 commited on
Commit
fc388c8
·
verified ·
1 Parent(s): a7d573f

Update flux1_img2img.py

Browse files
Files changed (1) hide show
  1. flux1_img2img.py +49 -56
flux1_img2img.py CHANGED
@@ -1,58 +1,51 @@
1
- import os
2
  import torch
3
- from diffusers import StableDiffusionImg2ImgPipeline, AutoencoderKL
4
- from PIL import Image
5
 
6
- # Set environment variable for better CUDA memory management
7
- os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
8
-
9
- # Select device
10
- device = "cuda" if torch.cuda.is_available() else "cpu"
11
-
12
- # Load a lightweight VAE (Tiny VAE) to reduce memory consumption.
13
- vae = AutoencoderKL.from_pretrained(
14
- "madebyollin/taesdxl", # Replace with your chosen Tiny VAE model ID if different.
15
- torch_dtype=torch.float16
16
- ).to(device)
17
-
18
- # Use a lightweight model variant (e.g. FLUX.1-schnell or any lighter SD variant)
19
- model_id = "black-forest-labs/FLUX.1-schnell" # Example lightweight model
20
- pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
21
- model_id,
22
- torch_dtype=torch.float16,
23
- vae=vae,
24
- use_safetensors=True
25
- )
26
-
27
- # Enable memory optimizations
28
- pipe.enable_model_cpu_offload()
29
- pipe.enable_attention_slicing()
30
-
31
- # Load and prepare the initial image (resize to lower resolution for speed)
32
- init_image = Image.open("input.png").convert("RGB")
33
- init_image = init_image.resize((256, 256), Image.LANCZOS)
34
-
35
- # Define your prompt and parameters
36
- prompt = "A person in a surreal landscape"
37
- strength = 0.75 # How much noise is added (0.0 to 1.0)
38
- num_inference_steps = 25 # Lower steps for faster inference (adjust as needed)
39
- guidance_scale = 7.5 # How closely to follow the prompt
40
- seed = 42
41
-
42
- # Setup a random generator for reproducibility
43
- generator = torch.Generator(device=device).manual_seed(seed)
44
-
45
- # Run the pipeline
46
- output = pipe(
47
- prompt=prompt,
48
- image=init_image,
49
- strength=strength,
50
- num_inference_steps=num_inference_steps,
51
- guidance_scale=guidance_scale,
52
- generator=generator
53
- )
54
-
55
- # Save the output image
56
- output_image = output.images[0]
57
- output_image.save("output.png")
58
- print("Output image saved as output.png")
 
 
1
  import torch
2
+ from diffusers import StableDiffusionImg2ImgPipeline
 
3
 
4
+ from PIL import Image
5
+ import sys
6
+ import spaces
7
+
8
+ # Defaulting to Stable Diffusion v1.5 here. Adjust model_id as you like.
9
+ @spaces.GPU
10
+ def process_image(
11
+ image,
12
+ mask_image,
13
+ prompt="a person",
14
+ model_id="runwayml/stable-diffusion-v1-5",
15
+ strength=0.75,
16
+ seed=0,
17
+ num_inference_steps=4
18
+ ):
19
+ print("start process image process_image")
20
+ if image is None:
21
+ print("empty input image returned")
22
+ return None
23
+
24
+ # Load pipeline
25
+ pipe = StableDiffusionImg2ImgPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
26
+ pipe.to("cuda")
27
+
28
+ # Create generator for reproducible results
29
+ generator = torch.Generator("cuda").manual_seed(seed)
30
+
31
+ # The mask is not currently used in this snippet (TODO in your code)
32
+ # If you want to use the mask, you'd switch to an inpainting pipeline or handle the mask in code.
33
+
34
+ print(prompt)
35
+ output = pipe(
36
+ prompt=prompt,
37
+ image=image,
38
+ generator=generator,
39
+ strength=strength,
40
+ guidance_scale=0,
41
+ num_inference_steps=num_inference_steps
42
+ )
43
+
44
+ return output.images[0]
45
+
46
+ if __name__ == "__main__":
47
+ # args: input-image input-mask output
48
+ image = Image.open(sys.argv[1])
49
+ mask = Image.open(sys.argv[2])
50
+ output = process_image(image, mask)
51
+ output.save(sys.argv[3])