import torch from diffusers import FluxPipeline # Optimized for Apple Silicon (MPS) - 16 CPU cores, MPS available # Memory-efficient configuration for Apple Silicon # Load pipeline with bfloat16 for better MPS performance pipe = FluxPipeline.from_pretrained( "black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16, use_safetensors=True ) # Move to MPS device for GPU acceleration on Apple Silicon pipe.to("mps") # Apple Silicon optimizations pipe.enable_attention_slicing() # Reduce memory usage pipe.enable_vae_slicing() # VAE memory optimization # Optional: Enable model CPU offload if memory is tight # pipe.enable_model_cpu_offload() # For Apple Silicon, compile the UNet for speed (if supported) try: pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) except Exception: print("Torch compile not supported, proceeding without compilation") prompt = "A cat holding a sign that says hello world" # Generate image with optimized settings for Apple Silicon with torch.inference_mode(): out = pipe( prompt=prompt, guidance_scale=0.0, # FLUX.1-schnell works best with guidance_scale=0 height=768, width=1360, num_inference_steps=4, # FLUX.1-schnell is optimized for 4 steps max_sequence_length=256, # Reduced for memory efficiency generator=torch.Generator(device="mps").manual_seed(42) # Reproducible results ).images[0] # Save the generated image out.save("image.png") print("Image generated and saved as 'image.png'") print("Optimizations applied: MPS device, bfloat16 precision, attention slicing, VAE slicing")