Spaces:
Running
Running
import torch | |
from diffusers import FluxPipeline | |
# Optimized for Apple Silicon (MPS) - 16 CPU cores, MPS available | |
# Memory-efficient configuration for Apple Silicon | |
# Load pipeline with bfloat16 for better MPS performance | |
pipe = FluxPipeline.from_pretrained( | |
"black-forest-labs/FLUX.1-schnell", | |
torch_dtype=torch.bfloat16, | |
use_safetensors=True | |
) | |
# Move to MPS device for GPU acceleration on Apple Silicon | |
pipe.to("mps") | |
# Apple Silicon optimizations | |
pipe.enable_attention_slicing() # Reduce memory usage | |
pipe.enable_vae_slicing() # VAE memory optimization | |
# Optional: Enable model CPU offload if memory is tight | |
# pipe.enable_model_cpu_offload() | |
# For Apple Silicon, compile the UNet for speed (if supported) | |
try: | |
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) | |
except Exception: | |
print("Torch compile not supported, proceeding without compilation") | |
prompt = "A cat holding a sign that says hello world" | |
# Generate image with optimized settings for Apple Silicon | |
with torch.inference_mode(): | |
out = pipe( | |
prompt=prompt, | |
guidance_scale=0.0, # FLUX.1-schnell works best with guidance_scale=0 | |
height=768, | |
width=1360, | |
num_inference_steps=4, # FLUX.1-schnell is optimized for 4 steps | |
max_sequence_length=256, # Reduced for memory efficiency | |
generator=torch.Generator(device="mps").manual_seed(42) # Reproducible results | |
).images[0] | |
# Save the generated image | |
out.save("image.png") | |
print("Image generated and saved as 'image.png'") | |
print("Optimizations applied: MPS device, bfloat16 precision, attention slicing, VAE slicing") |