Spaces:

yunusajib
/

Video-Emotional-Detection

Sleeping

App Files Files Community

yunusajib commited on Jun 5

Commit

d4c1346

verified ·

1 Parent(s): 0262090

app upgrade

Browse files

upgrade some features

Files changed (1) hide show

app.py +97 -68

app.py CHANGED Viewed

@@ -1,104 +1,133 @@
 import os
 import cv2
-import torch
-import tempfile
 import io
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 from PIL import Image
 from torchvision import transforms
 from facenet_pytorch import MTCNN
 import gradio as gr
-# Dummy emotion model (replace with trained PyTorch model)
-class SimpleEmotionClassifier(torch.nn.Module):
-    def __init__(self, num_classes=7):
-        super(SimpleEmotionClassifier, self).__init__()
-        self.model = torch.nn.Sequential(
             torch.nn.Flatten(),
-            torch.nn.Linear(3 * 48 * 48, 256),
             torch.nn.ReLU(),
-            torch.nn.Linear(256, num_classes)
         )
     def forward(self, x):
-        return self.model(x)
 class EmotionDetector:
-    def __init__(self):
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
-        # Load models
         self.face_detector = MTCNN(keep_all=False, device=self.device)
-        self.emotion_model = SimpleEmotionClassifier(num_classes=len(self.emotions)).to(self.device)
-        self.emotion_model.eval()
-        # Dummy weights — you can replace this with trained weights
-        # torch.load('emotion_model.pth') to load a trained one
         self.transform = transforms.Compose([
-            transforms.Resize((48, 48)),
-            transforms.ToTensor(),
         ])
-    def detect_emotions_video(self, video_path, sample_rate=30):
-        cap = cv2.VideoCapture(video_path)
-        fps = cap.get(cv2.CAP_PROP_FPS)
-        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        frame_indices = range(0, frame_count, sample_rate)
-        emotions_over_time = []
-        for idx in frame_indices:
-            cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
-            ret, frame = cap.read()
-            if not ret:
-                continue
-            img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            face = self.face_detector(Image.fromarray(img_rgb))
-            if face is not None:
-                face = self.transform(face).unsqueeze(0).to(self.device)
                 with torch.no_grad():
-                    logits = self.emotion_model(face)
-                    probs = torch.nn.functional.softmax(logits, dim=1)[0].cpu().numpy()
-                emotion_data = {emotion: float(probs[i]) * 100 for i, emotion in enumerate(self.emotions)}
-                emotion_data['timestamp'] = idx / fps
                 emotions_over_time.append(emotion_data)
-        cap.release()
-        if not emotions_over_time:
-            return None, "No faces/emotions detected."
-        df = pd.DataFrame(emotions_over_time)
-        # Plot
-        plt.figure(figsize=(12, 6))
-        for emotion in self.emotions:
-            plt.plot(df['timestamp'], df[emotion], label=emotion)
-        plt.xlabel("Time (s)")
-        plt.ylabel("Emotion Confidence (%)")
-        plt.legend()
-        plt.tight_layout()
-        buf = io.BytesIO()
-        plt.savefig(buf, format='png')
-        buf.seek(0)
-        chart_img = Image.open(buf)
-        plt.close('all')
-        # Text Summary
-        avg_emotions = df[self.emotions].mean().sort_values(ascending=False)
-        result_text = "**Average Emotions:**\n"
-        for emo, val in avg_emotions.items():
-            result_text += f"• {emo.title()}: {val:.1f}%\n"
-        return chart_img, result_text
 def create_interface():
     detector = EmotionDetector()
@@ -112,14 +141,14 @@ def create_interface():
         fn=process,
         inputs=[
             gr.Video(label="Upload Video"),
-            gr.Slider(1, 60, step=1, value=30, label="Sample Rate (Frames)")
         ],
         outputs=[
             gr.Image(type="pil", label="Emotion Chart"),
-            gr.Textbox(label="Summary")
         ],
-        title="AI Video Emotion Detector",
-        description="Detects facial emotions in a video you provided"
     )
 if __name__ == "__main__":

 import os
 import cv2
 import io
+import tempfile
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 from PIL import Image
+import torch
+import torch.nn.functional as F
 from torchvision import transforms
 from facenet_pytorch import MTCNN
 import gradio as gr
+class EmotionModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.net = torch.nn.Sequential(
+            torch.nn.Conv2d(1, 32, 3, padding=1),
+            torch.nn.ReLU(),
+            torch.nn.MaxPool2d(2),
+            torch.nn.Conv2d(32, 64, 3, padding=1),
+            torch.nn.ReLU(),
+            torch.nn.MaxPool2d(2),
             torch.nn.Flatten(),
+            torch.nn.Linear(64 * 12 * 12, 128),
             torch.nn.ReLU(),
+            torch.nn.Linear(128, 7)  # 7 emotion classes
         )
     def forward(self, x):
+        return self.net(x)
 class EmotionDetector:
+    def __init__(self, device='cpu'):
+        self.device = device
+        self.model = EmotionModel().to(self.device)
+        self.model.eval()
         self.emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
         self.face_detector = MTCNN(keep_all=False, device=self.device)
         self.transform = transforms.Compose([
+            transforms.Resize((48, 48))
+            # No ToTensor() here – face already comes as tensor
         ])
+        self.softmax = torch.nn.Softmax(dim=1)
+        # OPTIONAL: load pre-trained weights if available
+        # self.model.load_state_dict(torch.load("emotion_model.pt", map_location=self.device))
+    def detect_emotions_video(self, video_path, sample_rate=30, max_size_mb=50):
+        try:
+            if video_path is None:
+                return None, "No video provided"
+            if os.path.getsize(video_path) / (1024 * 1024) > max_size_mb:
+                return None, f"File too large (>{max_size_mb} MB)."
+            cap = cv2.VideoCapture(video_path)
+            fps = int(cap.get(cv2.CAP_PROP_FPS))
+            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            if frame_count == 0:
+                return None, "Invalid video file"
+            frame_indices = range(0, frame_count, sample_rate)
+            emotions_over_time = []
+            for frame_idx in frame_indices:
+                cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
+                ret, frame = cap.read()
+                if not ret:
+                    continue
+                img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                img_pil = Image.fromarray(img_rgb)
+                face_tensor = self.face_detector(img_pil)
+                if face_tensor is None:
+                    continue
+                face_tensor = self.transform(face_tensor)  # Resize
+                face_tensor = face_tensor.mean(dim=0, keepdim=True)  # convert to grayscale
+                face_tensor = face_tensor.unsqueeze(0).to(self.device)  # Add batch + channel
                 with torch.no_grad():
+                    output = self.model(face_tensor)
+                    probs = self.softmax(output).cpu().numpy()[0]
+                emotion_data = {self.emotions[i]: float(probs[i]) * 100 for i in range(len(self.emotions))}
+                emotion_data['timestamp'] = frame_idx / fps
                 emotions_over_time.append(emotion_data)
+            cap.release()
+            if not emotions_over_time:
+                return None, "No emotions detected."
+            df = pd.DataFrame(emotions_over_time)
+            plt.figure(figsize=(12, 8))
+            for emotion in self.emotions:
+                if emotion in df.columns:
+                    plt.plot(df['timestamp'], df[emotion], label=emotion.title(), linewidth=2)
+            plt.xlabel('Time (seconds)')
+            plt.ylabel('Confidence (%)')
+            plt.title('Emotions Over Time')
+            plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
+            plt.grid(True)
+            plt.tight_layout()
+            img_buf = io.BytesIO()
+            plt.savefig(img_buf, format='png', dpi=150, bbox_inches='tight')
+            img_buf.seek(0)
+            plt.close()
+            chart_image = Image.open(img_buf)
+            avg_emotions = df[self.emotions].mean().sort_values(ascending=False)
+            result_text = f"**Video Analysis Complete**\n"
+            result_text += f"**Frames Analyzed:** {len(emotions_over_time)}\n"
+            result_text += f"**Duration:** {df['timestamp'].max():.1f} seconds\n\n"
+            result_text += "**Average Emotions:**\n"
+            for emotion, confidence in avg_emotions.items():
+                result_text += f"• {emotion.title()}: {confidence:.1f}%\n"
+            return chart_image, result_text
+        except Exception as e:
+            return None, f"Error: {str(e)}"
 def create_interface():
     detector = EmotionDetector()
         fn=process,
         inputs=[
             gr.Video(label="Upload Video"),
+            gr.Slider(minimum=1, maximum=60, step=1, value=30, label="Sample Rate (Frames)")
         ],
         outputs=[
             gr.Image(type="pil", label="Emotion Chart"),
+            gr.Textbox(label="Analysis Summary")
         ],
+        title="PyTorch-Only Emotion Detection",
+        description="Analyze emotions from a video using PyTorch and facenet-pytorch."
     )
 if __name__ == "__main__":