Spaces:

yunusajib
/

Real-Time-Emotional-Detection

Sleeping

App Files Files Community

yunusajib commited on Jun 11

Commit

054ccd5

verified ·

1 Parent(s): 3baa918

app update

Browse files

Files changed (1) hide show

app.py +90 -58

app.py CHANGED Viewed

@@ -11,41 +11,90 @@ import requests
 import os
 from sklearn.preprocessing import StandardScaler
-# Download emotion recognition ONNX model
 MODEL_URL = "https://github.com/onnx/models/raw/main/vision/body_analysis/emotion_ferplus/model/emotion-ferplus-8.onnx"
 MODEL_PATH = "emotion-ferplus-8.onnx"
-if not os.path.exists(MODEL_PATH):
-    print("Downloading emotion recognition model...")
-    response = requests.get(MODEL_URL)
-    with open(MODEL_PATH, "wb") as f:
-        f.write(response.content)
-# Initialize face emotion detection
-emotion_session = ort.InferenceSession(MODEL_PATH)
-emotion_labels = ['neutral', 'happy', 'surprise', 'sad', 'angry', 'disgust', 'fear', 'contempt']
-# Simple voice emotion classifier
 class VoiceEmotionClassifier:
     def __init__(self):
         self.scaler = StandardScaler()
     def extract_features(self, audio):
-        sr, y = audio
-        y = y.astype(np.float32)
-        # Convert to mono if stereo
-        if len(y.shape) > 1:
-            y = np.mean(y, axis=0)
-        # Resample to 16kHz if needed
-        if sr != 16000:
-            y = librosa.resample(y, orig_sr=sr, target_sr=16000)
-            sr = 16000
-        # Extract MFCC features
-        mfcc_features = mfcc(y, sr, numcep=13)
-        return np.mean(mfcc_features, axis=0)
     def predict(self, audio):
         try:
@@ -53,17 +102,20 @@ class VoiceEmotionClassifier:
             features = self.scaler.transform(features)
             # Simple rule-based classifier (replace with actual trained model)
-            if features[0, 0] > 0.5:
                 return "happy", [{"label": "happy", "score": 0.8}]
-            elif features[0, 0] < -0.5:
                 return "sad", [{"label": "sad", "score": 0.7}]
             else:
                 return "neutral", [{"label": "neutral", "score": 0.9}]
         except Exception as e:
-            print(f"Voice analysis error: {e}")
             return "neutral", [{"label": "neutral", "score": 1.0}]
 # Initialize models
 voice_classifier = VoiceEmotionClassifier()
 # Global variables to store results
@@ -74,7 +126,6 @@ last_update_time = time.time()
 def analyze_face(frame):
     """Analyze facial expressions in the frame using ONNX model"""
     try:
-        # Preprocess frame
         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
         face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
         faces = face_cascade.detectMultiScale(gray, 1.3, 5)
@@ -84,26 +135,19 @@ def analyze_face(frame):
             face_roi = gray[y:y+h, x:x+w]
             face_roi = cv2.resize(face_roi, (64, 64))
             face_roi = face_roi.astype('float32') / 255.0
-            face_roi = np.expand_dims(face_roi, axis=0)
-            face_roi = np.expand_dims(face_roi, axis=0)
-            # Run inference
-            input_name = emotion_session.get_inputs()[0].name
-            output_name = emotion_session.get_outputs()[0].name
-            results = emotion_session.run([output_name], {input_name: face_roi})[0]
-            # Get emotion probabilities
             emotion_probs = results[0]
-            dominant_emotion = emotion_labels[np.argmax(emotion_probs)]
-            # Create emotion dictionary
-            emotions = {label: float(prob) for label, prob in zip(emotion_labels, emotion_probs)}
             return dominant_emotion, emotions
-        return "neutral", {label: 0.0 for label in emotion_labels}
     except Exception as e:
-        print(f"Face analysis error: {e}")
-        return "neutral", {label: 0.0 for label in emotion_labels}
 def analyze_voice(audio):
     """Analyze voice tone from audio"""
@@ -114,24 +158,16 @@ def update_emotion_history(face_emotion, voice_emotion):
     global current_emotions, emotion_history, last_update_time
     current_time = datetime.now().strftime("%H:%M:%S")
-    # Update current emotions
     current_emotions = {
         "face": face_emotion,
         "voice": voice_emotion,
         "timestamp": current_time
     }
-    # Add to history (every 5 seconds or when emotion changes significantly)
     if (time.time() - last_update_time) > 5 or not emotion_history:
-        emotion_history.append({
-            "timestamp": current_time,
-            "face": face_emotion,
-            "voice": voice_emotion
-        })
         last_update_time = time.time()
-        # Keep only last 20 entries
         if len(emotion_history) > 20:
             emotion_history = emotion_history[-20:]
@@ -188,13 +224,11 @@ def process_input(video, audio):
         else:
             voice_emotion, voice_details = "neutral", {}
-        # Update history and get outputs
         update_emotion_history(face_emotion, voice_emotion)
         timeline_df = get_emotion_timeline()
         advice = get_practitioner_advice(face_emotion, voice_emotion)
-        # Prepare outputs
-        outputs = {
             "current_face": face_emotion,
             "current_voice": voice_emotion,
             "timeline": timeline_df,
@@ -202,10 +236,8 @@ def process_input(video, audio):
             "face_details": str(face_details),
             "voice_details": str(voice_details)
         }
-        return outputs
     except Exception as e:
-        print(f"Processing error: {e}")
         return {
             "current_face": "Error",
             "current_voice": "Error",
@@ -256,4 +288,4 @@ with gr.Blocks(title="Patient Emotion Recognition", theme="soft") as demo:
     )
 if __name__ == "__main__":
-    demo.launch(debug=True)

 import os
 from sklearn.preprocessing import StandardScaler
+# Constants
 MODEL_URL = "https://github.com/onnx/models/raw/main/vision/body_analysis/emotion_ferplus/model/emotion-ferplus-8.onnx"
 MODEL_PATH = "emotion-ferplus-8.onnx"
+MODEL_CHECKSUM_SIZE = 2483870  # Expected file size in bytes for verification
+class EmotionModel:
+    def __init__(self):
+        self.session = None
+        self.labels = ['neutral', 'happy', 'surprise', 'sad', 'angry', 'disgust', 'fear', 'contempt']
+        self.load_model()
+    def download_model(self):
+        try:
+            print("Downloading emotion recognition model...")
+            response = requests.get(MODEL_URL, stream=True, timeout=30)
+            response.raise_for_status()
+            with open(MODEL_PATH, "wb") as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+            # Verify download
+            if os.path.exists(MODEL_PATH):
+                actual_size = os.path.getsize(MODEL_PATH)
+                if actual_size != MODEL_CHECKSUM_SIZE:
+                    print(f"Warning: Downloaded file size {actual_size} doesn't match expected size {MODEL_CHECKSUM_SIZE}")
+                return True
+            return False
+        except Exception as e:
+            print(f"Download failed: {str(e)}")
+            return False
+    def load_model(self):
+        if not os.path.exists(MODEL_PATH):
+            if not self.download_model():
+                print("Using dummy emotion model")
+                self.session = DummyEmotionSession()
+                return
+        try:
+            so = ort.SessionOptions()
+            so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+            self.session = ort.InferenceSession(MODEL_PATH, so)
+            print("Emotion model loaded successfully")
+        except Exception as e:
+            print(f"Failed to load ONNX model: {str(e)}")
+            print("Using dummy emotion model")
+            self.session = DummyEmotionSession()
+    def predict(self, frame):
+        return self.session.run(None, {'Input3': frame})[0]
+class DummyEmotionSession:
+    def run(self, *args, **kwargs):
+        # Return mostly neutral with slight random variations
+        base = np.array([0.8] + [0.1]*7)
+        variation = np.random.normal(0, 0.01, size=8)
+        return [np.clip(base + variation, 0, 1).reshape(1, -1)]
 class VoiceEmotionClassifier:
     def __init__(self):
         self.scaler = StandardScaler()
+        # Initialize with dummy data for scaling
+        dummy_features = np.random.randn(100, 13)
+        self.scaler.fit(dummy_features)
     def extract_features(self, audio):
+        try:
+            sr, y = audio
+            y = y.astype(np.float32)
+            if len(y.shape) > 1:  # Convert stereo to mono
+                y = np.mean(y, axis=0)
+            if sr != 16000:  # Resample if needed
+                y = librosa.resample(y, orig_sr=sr, target_sr=16000)
+                sr = 16000
+            mfcc_features = mfcc(y, sr, numcep=13)
+            return np.mean(mfcc_features, axis=0)
+        except Exception as e:
+            print(f"Feature extraction error: {str(e)}")
+            return np.zeros(13)
     def predict(self, audio):
         try:
             features = self.scaler.transform(features)
             # Simple rule-based classifier (replace with actual trained model)
+            if features[0, 0] > 1.0:
                 return "happy", [{"label": "happy", "score": 0.8}]
+            elif features[0, 0] < -1.0:
                 return "sad", [{"label": "sad", "score": 0.7}]
+            elif abs(features[0, 1]) > 0.8:
+                return "angry", [{"label": "angry", "score": 0.6}]
             else:
                 return "neutral", [{"label": "neutral", "score": 0.9}]
         except Exception as e:
+            print(f"Voice prediction error: {str(e)}")
             return "neutral", [{"label": "neutral", "score": 1.0}]
 # Initialize models
+emotion_model = EmotionModel()
 voice_classifier = VoiceEmotionClassifier()
 # Global variables to store results
 def analyze_face(frame):
     """Analyze facial expressions in the frame using ONNX model"""
     try:
         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
         face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
         faces = face_cascade.detectMultiScale(gray, 1.3, 5)
             face_roi = gray[y:y+h, x:x+w]
             face_roi = cv2.resize(face_roi, (64, 64))
             face_roi = face_roi.astype('float32') / 255.0
+            face_roi = np.expand_dims(face_roi, axis=(0, 1))
+            results = emotion_model.predict(face_roi)
             emotion_probs = results[0]
+            dominant_emotion = emotion_model.labels[np.argmax(emotion_probs)]
+            emotions = {label: float(prob) for label, prob in zip(emotion_model.labels, emotion_probs)}
             return dominant_emotion, emotions
+        return "neutral", {label: 0.0 for label in emotion_model.labels}
     except Exception as e:
+        print(f"Face analysis error: {str(e)}")
+        return "neutral", {label: 0.0 for label in emotion_model.labels}
 def analyze_voice(audio):
     """Analyze voice tone from audio"""
     global current_emotions, emotion_history, last_update_time
     current_time = datetime.now().strftime("%H:%M:%S")
     current_emotions = {
         "face": face_emotion,
         "voice": voice_emotion,
         "timestamp": current_time
     }
     if (time.time() - last_update_time) > 5 or not emotion_history:
+        emotion_history.append(current_emotions.copy())
         last_update_time = time.time()
         if len(emotion_history) > 20:
             emotion_history = emotion_history[-20:]
         else:
             voice_emotion, voice_details = "neutral", {}
         update_emotion_history(face_emotion, voice_emotion)
         timeline_df = get_emotion_timeline()
         advice = get_practitioner_advice(face_emotion, voice_emotion)
+        return {
             "current_face": face_emotion,
             "current_voice": voice_emotion,
             "timeline": timeline_df,
             "face_details": str(face_details),
             "voice_details": str(voice_details)
         }
     except Exception as e:
+        print(f"Processing error: {str(e)}")
         return {
             "current_face": "Error",
             "current_voice": "Error",
     )
 if __name__ == "__main__":
+    demo.launch(debug=True, server_name="0.0.0.0", server_port=7860)