Spaces:

yunusajib
/

Real-Time-Emotional-Detection

Sleeping

App Files Files Community

yunusajib commited on Jun 11

Commit

5eff629

verified ·

1 Parent(s): 655839f

Modify some app features

Browse files

Files changed (1) hide show

app.py +48 -91

app.py CHANGED Viewed

@@ -4,27 +4,70 @@ import cv2
 import pandas as pd
 from datetime import datetime
 import time
-from transformers import pipeline
 import librosa
 from python_speech_features import mfcc
 import onnxruntime as ort
 import requests
 import os
 # Download emotion recognition ONNX model
 MODEL_URL = "https://github.com/onnx/models/raw/main/vision/body_analysis/emotion_ferplus/model/emotion-ferplus-8.onnx"
 MODEL_PATH = "emotion-ferplus-8.onnx"
 if not os.path.exists(MODEL_PATH):
     response = requests.get(MODEL_URL)
     with open(MODEL_PATH, "wb") as f:
         f.write(response.content)
-# Initialize models
-voice_classifier = pipeline("audio-classification", model="superb/hubert-base-superb-er")
 emotion_session = ort.InferenceSession(MODEL_PATH)
 emotion_labels = ['neutral', 'happy', 'surprise', 'sad', 'angry', 'disgust', 'fear', 'contempt']
 # Global variables to store results
 emotion_history = []
 current_emotions = {"face": "neutral", "voice": "neutral"}
@@ -66,26 +109,7 @@ def analyze_face(frame):
 def analyze_voice(audio):
     """Analyze voice tone from audio"""
-    try:
-        sr, y = audio
-        y = y.astype(np.float32)
-        # Convert to mono if stereo
-        if len(y.shape) > 1:
-            y = np.mean(y, axis=0)
-        # Resample to 16kHz if needed
-        if sr != 16000:
-            y = librosa.resample(y, orig_sr=sr, target_sr=16000)
-            sr = 16000
-        # Classify emotion
-        result = voice_classifier({"sampling_rate": sr, "raw": y})
-        dominant_emotion = result[0]['label']
-        return dominant_emotion, result
-    except Exception as e:
-        print(f"Voice analysis error: {e}")
-        return "neutral", [{"label": "neutral", "score": 1.0}]
 def update_emotion_history(face_emotion, voice_emotion):
     """Update the emotion history and current emotions"""
@@ -167,71 +191,4 @@ def process_input(video, audio):
             voice_emotion, voice_details = "neutral", {}
         # Update history and get outputs
-        update_emotion_history(face_emotion, voice_emotion)
-        timeline_df = get_emotion_timeline()
-        advice = get_practitioner_advice(face_emotion, voice_emotion)
-        # Prepare outputs
-        outputs = {
-            "current_face": face_emotion,
-            "current_voice": voice_emotion,
-            "timeline": timeline_df,
-            "advice": advice,
-            "face_details": str(face_details),
-            "voice_details": str(voice_details)
-        }
-        return outputs
-    except Exception as e:
-        print(f"Processing error: {e}")
-        return {
-            "current_face": "Error",
-            "current_voice": "Error",
-            "timeline": pd.DataFrame(),
-            "advice": "System error occurred",
-            "face_details": "",
-            "voice_details": ""
-        }
-# Gradio interface
-with gr.Blocks(title="Patient Emotion Recognition", theme="soft") as demo:
-    gr.Markdown("# Real-Time Patient Emotion Recognition")
-    gr.Markdown("Analyze facial expressions and voice tone during medical consultations")
-    with gr.Row():
-        with gr.Column():
-            video_input = gr.Image(label="Live Camera Feed", source="webcam", streaming=True)
-            audio_input = gr.Audio(label="Voice Input", source="microphone", type="numpy")
-            submit_btn = gr.Button("Analyze Emotions")
-        with gr.Column():
-            current_face = gr.Textbox(label="Current Facial Emotion")
-            current_voice = gr.Textbox(label="Current Voice Emotion")
-            advice_output = gr.Textbox(label="Practitioner Suggestions", lines=3)
-            timeline_output = gr.Dataframe(label="Emotion Timeline", interactive=False)
-            face_details = gr.Textbox(label="Face Analysis Details", visible=False)
-            voice_details = gr.Textbox(label="Voice Analysis Details", visible=False)
-    # Live processing
-    video_input.change(
-        process_input,
-        inputs=[video_input, audio_input],
-        outputs=[current_face, current_voice, timeline_output, advice_output, face_details, voice_details],
-        show_progress="hidden"
-    )
-    audio_input.change(
-        process_input,
-        inputs=[video_input, audio_input],
-        outputs=[current_face, current_voice, timeline_output, advice_output, face_details, voice_details],
-        show_progress="hidden"
-    )
-    submit_btn.click(
-        process_input,
-        inputs=[video_input, audio_input],
-        outputs=[current_face, current_voice, timeline_output, advice_output, face_details, voice_details]
-    )
-if __name__ == "__main__":
-    demo.launch(debug=True)

 import pandas as pd
 from datetime import datetime
 import time
 import librosa
 from python_speech_features import mfcc
 import onnxruntime as ort
 import requests
 import os
+from sklearn.preprocessing import StandardScaler
+import joblib
 # Download emotion recognition ONNX model
 MODEL_URL = "https://github.com/onnx/models/raw/main/vision/body_analysis/emotion_ferplus/model/emotion-ferplus-8.onnx"
 MODEL_PATH = "emotion-ferplus-8.onnx"
 if not os.path.exists(MODEL_PATH):
+    print("Downloading emotion recognition model...")
     response = requests.get(MODEL_URL)
     with open(MODEL_PATH, "wb") as f:
         f.write(response.content)
+# Initialize face emotion detection
 emotion_session = ort.InferenceSession(MODEL_PATH)
 emotion_labels = ['neutral', 'happy', 'surprise', 'sad', 'angry', 'disgust', 'fear', 'contempt']
+# Simple voice emotion classifier (replace with your own trained model if needed)
+class VoiceEmotionClassifier:
+    def __init__(self):
+        self.scaler = StandardScaler()
+    def extract_features(self, audio):
+        sr, y = audio
+        y = y.astype(np.float32)
+        # Convert to mono if stereo
+        if len(y.shape) > 1:
+            y = np.mean(y, axis=0)
+        # Resample to 16kHz if needed
+        if sr != 16000:
+            y = librosa.resample(y, orig_sr=sr, target_sr=16000)
+            sr = 16000
+        # Extract MFCC features
+        mfcc_features = mfcc(y, sr, numcep=13)
+        return np.mean(mfcc_features, axis=0)
+    def predict(self, audio):
+        try:
+            features = self.extract_features(audio).reshape(1, -1)
+            features = self.scaler.transform(features)
+            # Simple rule-based classifier (replace with actual trained model)
+            # This is just a placeholder - you should train a proper model
+            if features[0, 0] > 0.5:
+                return "happy", [{"label": "happy", "score": 0.8}]
+            elif features[0, 0] < -0.5:
+                return "sad", [{"label": "sad", "score": 0.7}]
+            else:
+                return "neutral", [{"label": "neutral", "score": 0.9}]
+        except Exception as e:
+            print(f"Voice analysis error: {e}")
+            return "neutral", [{"label": "neutral", "score": 1.0}]
+# Initialize models
+voice_classifier = VoiceEmotionClassifier()
 # Global variables to store results
 emotion_history = []
 current_emotions = {"face": "neutral", "voice": "neutral"}
 def analyze_voice(audio):
     """Analyze voice tone from audio"""
+    return voice_classifier.predict(audio)
 def update_emotion_history(face_emotion, voice_emotion):
     """Update the emotion history and current emotions"""
             voice_emotion, voice_details = "neutral", {}
         # Update history and get outputs
+        update_em