import cv2
import numpy as np
import gradio as gr
from mtcnn import MTCNN
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.xception import preprocess_input as xcp_pre
from tensorflow.keras.applications.efficientnet import preprocess_input as eff_pre
from huggingface_hub import hf_hub_download


# Load models
xcp_path = hf_hub_download(repo_id="Zeyadd-Mostaffa/deepfake-image-detector_final", filename="xception_model.h5")
eff_path = hf_hub_download(repo_id="Zeyadd-Mostaffa/deepfake-image-detector_final", filename="efficientnet_model.h5")
xcp_model = load_model(xcp_path)
eff_model = load_model(eff_path)


# Load face detector
detector = MTCNN()

def expand_box(x, y, w, h, scale=1.5, img_shape=None):
    """Expand face bounding box with margin."""
    cx, cy = x + w // 2, y + h // 2
    new_w, new_h = int(w * scale), int(h * scale)
    x1 = max(0, cx - new_w // 2)
    y1 = max(0, cy - new_h // 2)
    x2 = min(img_shape[1], cx + new_w // 2)
    y2 = min(img_shape[0], cy + new_h // 2)
    return x1, y1, x2, y2

def predict(image):
    faces = detector.detect_faces(image)
    if not faces:
        return "No face detected", image

    output_image = image.copy()
    results = []

    for idx, face in enumerate(faces):
        x, y, w, h = face['box']

        # Add 20% margin while staying inside bounds
        margin = 0.2
        img_h, img_w = image.shape[:2]
        x = max(0, int(x - w * margin))
        y = max(0, int(y - h * margin))
        w = int(w * (1 + 2 * margin))
        h = int(h * (1 + 2 * margin))
        x2 = min(img_w, x + w)
        y2 = min(img_h, y + h)

        face_img = image[y:y2, x:x2]

        # Resize + preprocess
        face_xcp = cv2.resize(face_img, (299, 299))
        face_eff = cv2.resize(face_img, (224, 224))
        xcp_tensor = xcp_pre(face_xcp.astype(np.float32))[np.newaxis, ...]
        eff_tensor = eff_pre(face_eff.astype(np.float32))[np.newaxis, ...]

        # Predictions
        pred_xcp = xcp_model.predict(xcp_tensor, verbose=0).flatten()[0]
        pred_eff = eff_model.predict(eff_tensor, verbose=0).flatten()[0]
        avg = (pred_xcp + pred_eff) / 2  # Real confidence

        if avg > 0.41:
            label = "Real"
            confidence = avg
            color = (0, 255, 0)
        else:
            label = "Fake"
            confidence = 1 - avg  # Confidence in Fake

            color = (0, 0, 255)

        # Annotate image with percentage confidence
        cv2.rectangle(output_image, (x, y), (x2, y2), color, 2)
        cv2.putText(output_image, f"{label} ({confidence * 100:.2f}%)", (x, y - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

        # Save results
        results.append(
            f"Face {idx+1}: {label} (Confidence: {confidence * 100:.2f}%, Avg Real: {avg * 100:.2f}%, XCP: {pred_xcp * 100:.2f}%, EFF: {pred_eff * 100:.2f}%)"
        )

    return "\n".join(results), output_image


# Gradio Interface
interface = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="numpy", label="Upload Image"),
    outputs=[
        gr.Textbox(label="Predictions"),
        gr.Image(type="numpy", label="Annotated Image"),
    ],
    title="Deepfake Detector (Multi-Face Ensemble)",
    description="Detects all faces in an image and classifies each one as real or fake using Xception and EfficientNetB4 ensemble.",
)

interface.launch()