import cv2
import numpy as np
import gradio as gr
from ultralytics import YOLO
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
from PIL import Image
import torch  # Added torch import for depth estimation

# Load YOLO model for tree detection
# Replace with your model path (local or Hugging Face Hub)
yolo_model = YOLO("./data/best.pt") # Update with your YOLO model path

# Load depth estimation model and processor from Hugging Face
processor = AutoImageProcessor.from_pretrained("Intel/dpt-large")
depth_model = AutoModelForDepthEstimation.from_pretrained("Intel/dpt-large")

# Function to process image and estimate tree heights
def process_image(image, focal_length_mm=3.6, sensor_height_mm=4.8, depth_scale=100):
    """
    Process an input image to detect trees and estimate their heights.
    Args:
        image: PIL Image from Gradio
        focal_length_mm: Camera focal length in millimeters (default: 3.6)
        sensor_height_mm: Camera sensor height in millimeters (default: 4.8)
        depth_scale: Scaling factor to convert depth map to centimeters (default: 100)
    Returns:
        Annotated image and JSON with tree heights
    """
    # Convert PIL image to OpenCV format (BGR)
    image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
    image_height = image_cv.shape[0]  # Image height in pixels

    # Step 1: Run YOLO to detect trees
    results = yolo_model(image_cv)
    boxes = results[0].boxes.xyxy.cpu().numpy()  # Bounding boxes [x_min, y_min, x_max, y_max]

    # Step 2: Prepare image for depth estimation
    # Convert OpenCV image (BGR) to PIL for transformers
    image_pil = Image.fromarray(cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB))
    # Preprocess image for depth model
    inputs = processor(images=image_pil, return_tensors="pt")

    # Step 3: Run depth estimation
    with torch.no_grad():
        outputs = depth_model(**inputs)
        predicted_depth = outputs.predicted_depth

    # Resize depth map to match input image size
    depth_map = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=(image_cv.shape[0], image_cv.shape[1]),
        mode="bicubic",
        align_corners=False,
    ).squeeze().cpu().numpy()

    # Step 4: Process each detected tree
    output = []
    for box in boxes:
        x_min, y_min, x_max, y_max = map(int, box)
        h_pixel = y_max - y_min  # Bounding box height in pixels

        # Extract depth for the tree’s bounding box
        depth_region = depth_map[y_min:y_max, x_min:x_max]
        avg_depth = np.mean(depth_region)  # Average depth (relative units)

        # Convert depth to centimeters using scaling factor
        distance_cm = avg_depth * depth_scale  # Tune depth_scale based on testing

        # Calculate tree height in centimeters
        # Formula: H = (h_pixel * D * sensor_height) / (focal_length * image_height)
        tree_height_cm = (h_pixel * distance_cm * sensor_height_mm) / (focal_length_mm * image_height)
        tree_height_cm = round(tree_height_cm, 2)  # Round to 2 decimal places

        output.append({
            "box": (x_min, y_min, x_max, y_max),
            "height_cm": tree_height_cm
        })

    # Step 5: Draw results on the image
    for item in output:
        x_min, y_min, x_max, y_max = item["box"]
        # Draw bounding box
        cv2.rectangle(image_cv, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
        # Add height text
        cv2.putText(image_cv, f"Height: {item['height_cm']} cm", (x_min, y_min - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    # Convert back to RGB for Gradio
    image_rgb = cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB)

    return image_rgb, output

# Create Gradio interface
iface = gr.Interface(
    fn=process_image,
    inputs=[
        gr.Image(type="pil", label="Upload Image"),
        gr.Number(label="Focal Length (mm)", value=3.6),
        gr.Number(label="Sensor Height (mm)", value=4.8),
        gr.Number(label="Depth Scale Factor", value=100)
    ],
    outputs=[
        gr.Image(label="Detected Trees with Heights"),
        gr.JSON(label="Tree Heights (cm)")
    ],
    title="Tree Detection and Height Estimation",
    description="Upload an image to detect trees and estimate their heights in centimeters. Adjust camera parameters and depth scale as needed."
)

# Launch the interface locally
iface.launch(server_name="0.0.0.0", server_port=7860)