from diffusers import StableDiffusionPipeline import torch from PIL import Image import io class ImageGenerator: def __init__(self, model_name="CompVis/stable-diffusion-v1-4"): # Explicit GPU detection and setup if torch.cuda.is_available(): self.device = torch.device("cuda") print(f"Image Generator: Using GPU - {torch.cuda.get_device_name(0)}") print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB") else: self.device = torch.device("cpu") print("Image Generator: Using CPU") print(f"Loading model {model_name}...") self.pipe = StableDiffusionPipeline.from_pretrained( model_name, torch_dtype=torch.float16 if self.device.type == "cuda" else torch.float32, safety_checker=None, # Disable safety checker for better performance variant="fp16" if self.device.type == "cuda" else None # Use fp16 weights on GPU ).to(self.device) print(f"Model loaded and moved to {self.device}") def generate_image(self, prompt, num_inference_steps=30, guidance_scale=7.0): """ Generate an image based on the given prompt Args: prompt (str): The text prompt to generate from num_inference_steps (int): Number of denoising steps guidance_scale (float): Scale for classifier-free guidance Returns: PIL.Image: Generated image """ try: print(f"Generating image on {self.device}...") # Add quality prompts enhanced_prompt = f"{prompt}, high quality, detailed, 4k, professional photography" image = self.pipe( enhanced_prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, negative_prompt="blurry, low quality, distorted, deformed", width=512, # Reduced resolution for faster generation height=512 # Reduced resolution for faster generation ).images[0] return image except Exception as e: return f"Error generating image: {str(e)}"