import os
import cv2
import io
import tempfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn.functional as F
from torchvision import transforms
from facenet_pytorch import MTCNN
import gradio as gr

class EmotionModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.net = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, 3, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2),
            torch.nn.Conv2d(32, 64, 3, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2),
            torch.nn.Flatten(),
            torch.nn.Linear(64 * 12 * 12, 128),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 7)  # 7 emotion classes
        )

    def forward(self, x):
        return self.net(x)

class EmotionDetector:
    def __init__(self, device='cpu'):
        self.device = device
        self.model = EmotionModel().to(self.device)
        self.model.eval()
        self.emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
        self.face_detector = MTCNN(keep_all=False, device=self.device)
        self.transform = transforms.Compose([
            transforms.Resize((48, 48))
        ])
        self.softmax = torch.nn.Softmax(dim=1)

        # Load pre-trained weights here if available
        # self.model.load_state_dict(torch.load("emotion_model.pt", map_location=self.device))

    def detect_emotions_video(self, video_path, sample_rate=30, max_size_mb=50):
        try:
            if video_path is None:
                return None, "No video provided"

            if os.path.getsize(video_path) / (1024 * 1024) > max_size_mb:
                return None, f"File too large (>{max_size_mb} MB)."

            cap = cv2.VideoCapture(video_path)
            fps = int(cap.get(cv2.CAP_PROP_FPS))
            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

            if frame_count == 0:
                return None, "Invalid video file"

            frame_indices = range(0, frame_count, sample_rate)
            emotions_over_time = []

            for frame_idx in frame_indices:
                cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
                ret, frame = cap.read()
                if not ret:
                    continue

                img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                img_pil = Image.fromarray(img_rgb)

                face_tensor = self.face_detector(img_pil)
                if face_tensor is None:
                    continue

                face_tensor = self.transform(face_tensor)  # Resize
                face_tensor = face_tensor.mean(dim=0, keepdim=True)  # grayscale
                face_tensor = face_tensor.unsqueeze(0).to(self.device)  # batch + channel

                with torch.no_grad():
                    output = self.model(face_tensor)
                    probs = self.softmax(output).cpu().numpy()[0]

                emotion_data = {self.emotions[i]: float(probs[i]) * 100 for i in range(len(self.emotions))}
                emotion_data['timestamp'] = frame_idx / fps
                emotions_over_time.append(emotion_data)

            cap.release()

            if not emotions_over_time:
                return None, "No emotions detected."

            df = pd.DataFrame(emotions_over_time)

            plt.figure(figsize=(12, 8))
            for emotion in self.emotions:
                if emotion in df.columns:
                    plt.plot(df['timestamp'], df[emotion], label=emotion.title(), linewidth=2)

            plt.xlabel('Time (seconds)')
            plt.ylabel('Confidence (%)')
            plt.title('Emotions Over Time')
            plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
            plt.grid(True)
            plt.tight_layout()

            img_buf = io.BytesIO()
            plt.savefig(img_buf, format='png', dpi=150, bbox_inches='tight')
            img_buf.seek(0)
            plt.close()

            chart_image = Image.open(img_buf)
            avg_emotions = df[self.emotions].mean().sort_values(ascending=False)

            result_text = f"**Video Analysis Complete**\n"
            result_text += f"**Frames Analyzed:** {len(emotions_over_time)}\n"
            result_text += f"**Duration:** {df['timestamp'].max():.1f} seconds\n\n"
            result_text += "**Average Emotions:**\n"
            for emotion, confidence in avg_emotions.items():
                result_text += f"• {emotion.title()}: {confidence:.1f}%\n"

            return chart_image, result_text

        except Exception as e:
            return None, f"Error: {str(e)}"

def create_interface():
    detector = EmotionDetector()

    def create_interface():
    detector = EmotionDetector()

    def process(video_path, sample_rate):
        if video_path is None or not os.path.exists(video_path):
            return None, "Invalid video path or no video uploaded."

        return detector.detect_emotions_video(video_path, sample_rate)

    return gr.Interface(
        fn=process,
        inputs=[
            gr.Video(label="Upload Video"),  # Removed type="file"
            gr.Slider(minimum=1, maximum=60, step=1, value=30, label="Sample Rate (Frames)")
        ],
        outputs=[
            gr.Image(type="pil", label="Emotion Chart"),
            gr.Textbox(label="Analysis Summary")
        ],
        title="AI Emotion Detection",
        description="Upload a video to analyze emotions over time."
    )


if __name__ == "__main__":
    create_interface().launch()