# Import spaces before any CUDA/torch imports
import spaces

# Other imports below
import torch
import numpy as np
from PIL import Image
from transformers import pipeline
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
from diffusers.utils import load_image
import os
import huggingface_hub
import config

class ControlNetPipeline:
    def __init__(self):
        """Initialize the ControlNet pipeline with lazy loading"""
        self.depth_estimator = None
        self.pipe = None
        self.controlnet = None
        self.is_initialized = False

    @spaces.GPU
    def initialize(self):
        """Initialize the models with GPU acceleration"""
        if self.is_initialized:
            return

        # Load depth estimator
        self.depth_estimator = pipeline('depth-estimation')

        # Load ControlNet model
        self.controlnet = ControlNetModel.from_pretrained(
            config.CONTROLNET_MODEL, 
            torch_dtype=torch.float16
        )

        # Load Stable Diffusion pipeline with ControlNet
        self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
            config.BASE_MODEL, 
            controlnet=self.controlnet, 
            safety_checker=None, 
            torch_dtype=torch.float16
        )

        # Use more efficient scheduler
        self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config)

        # Enable memory optimizations
        try:
            self.pipe.enable_xformers_memory_efficient_attention()
        except:
            print("xformers not available, using default attention mechanism")

        self.pipe.enable_model_cpu_offload()
        self.is_initialized = True

    @spaces.GPU
    def process_image(self, image):
        """Process the input image to generate depth map"""
        # Ensure model is initialized
        if not self.is_initialized:
            self.initialize()

        # Generate depth map
        depth = self.depth_estimator(image)['depth']
        depth_array = np.array(depth)
        depth_array = depth_array[:, :, None]
        depth_array = np.concatenate([depth_array, depth_array, depth_array], axis=2)
        depth_image = Image.fromarray(depth_array)
        
        return depth_image

    @spaces.GPU
    def generate(self, prompt, image, negative_prompt=None, guidance_scale=7.5, num_inference_steps=20):
        """Generate an image using ControlNet with the provided prompt and input image"""
        # Ensure model is initialized
        if not self.is_initialized:
            self.initialize()

        # Process image to get depth map
        depth_image = self.process_image(image)
        
        # Generate the image
        output = self.pipe(
            prompt=prompt,
            image=depth_image,
            negative_prompt=negative_prompt,
            guidance_scale=float(guidance_scale),
            num_inference_steps=int(num_inference_steps)
        )
        
        return output.images[0]