app
Browse files
app.py
CHANGED
@@ -12,80 +12,85 @@ import requests
|
|
12 |
import os
|
13 |
from sklearn.preprocessing import StandardScaler
|
14 |
|
15 |
-
# Constants
|
16 |
-
|
|
|
|
|
|
|
17 |
MODEL_PATH = "emotion-ferplus-8.onnx"
|
18 |
-
|
19 |
-
|
20 |
-
VOICE_SCALER_PATH = "voice_scaler.pkl" # Pretrained voice scaler
|
21 |
|
22 |
class EmotionModel:
|
23 |
def __init__(self):
|
24 |
self.session = None
|
25 |
self.labels = ['neutral', 'happy', 'surprise', 'sad', 'angry', 'disgust', 'fear', 'contempt']
|
26 |
-
self.emotion_buffer = []
|
27 |
self.load_model()
|
28 |
|
29 |
def download_model(self):
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
print(f"Download failed: {str(e)}")
|
49 |
-
return False
|
50 |
|
51 |
def load_model(self):
|
52 |
if not os.path.exists(MODEL_PATH):
|
53 |
if not self.download_model():
|
54 |
-
|
|
|
|
|
55 |
|
56 |
try:
|
57 |
so = ort.SessionOptions()
|
58 |
so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
59 |
self.session = ort.InferenceSession(MODEL_PATH, so)
|
60 |
-
|
61 |
-
# Test the model with dummy input
|
62 |
-
dummy_input = np.random.rand(1, 1, 64, 64).astype(np.float32)
|
63 |
-
self.session.run(None, {'Input3': dummy_input})
|
64 |
-
print("Emotion model loaded and verified")
|
65 |
except Exception as e:
|
66 |
-
|
|
|
67 |
|
68 |
def softmax(self, x):
|
69 |
e_x = np.exp(x - np.max(x))
|
70 |
return e_x / e_x.sum()
|
71 |
|
72 |
def predict(self, frame):
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
if len(self.emotion_buffer) > 5:
|
79 |
-
self.emotion_buffer = self.emotion_buffer[-5:]
|
80 |
|
81 |
-
|
82 |
-
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
class VoiceEmotionClassifier:
|
86 |
def __init__(self):
|
87 |
try:
|
88 |
-
# Load pretrained models if available
|
89 |
if os.path.exists(VOICE_MODEL_PATH) and os.path.exists(VOICE_SCALER_PATH):
|
90 |
self.model = joblib.load(VOICE_MODEL_PATH)
|
91 |
self.scaler = joblib.load(VOICE_SCALER_PATH)
|
@@ -98,7 +103,6 @@ class VoiceEmotionClassifier:
|
|
98 |
print("Using limited rule-based voice analysis")
|
99 |
self.model = None
|
100 |
self.scaler = StandardScaler()
|
101 |
-
# Initialize with dummy data for scaling
|
102 |
dummy_features = np.random.randn(100, 18)
|
103 |
self.scaler.fit(dummy_features)
|
104 |
self.labels = ['neutral', 'happy', 'sad', 'angry', 'fear']
|
@@ -108,24 +112,21 @@ class VoiceEmotionClassifier:
|
|
108 |
y, sr = audio
|
109 |
features = []
|
110 |
|
111 |
-
if len(y.shape) > 1:
|
112 |
y = np.mean(y, axis=0)
|
113 |
|
114 |
-
if sr != 16000:
|
115 |
y = librosa.resample(y, orig_sr=sr, target_sr=16000)
|
116 |
sr = 16000
|
117 |
|
118 |
-
# MFCC features
|
119 |
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
120 |
features.extend(np.mean(mfccs, axis=1))
|
121 |
features.extend(np.std(mfccs, axis=1))
|
122 |
|
123 |
-
# Pitch features
|
124 |
pitches = librosa.yin(y, fmin=80, fmax=400)
|
125 |
features.append(np.nanmean(pitches))
|
126 |
features.append(np.nanstd(pitches))
|
127 |
|
128 |
-
# Spectral features
|
129 |
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
|
130 |
features.append(np.mean(spectral_centroid))
|
131 |
|
@@ -144,7 +145,6 @@ class VoiceEmotionClassifier:
|
|
144 |
emotion = self.labels[np.argmax(probs)]
|
145 |
details = [{"label": l, "score": p} for l, p in zip(self.labels, probs)]
|
146 |
else:
|
147 |
-
# Fallback rule-based classifier
|
148 |
if features[0, 0] > 1.0:
|
149 |
emotion = "happy"
|
150 |
details = [{"label": "happy", "score": 0.8}]
|
@@ -167,6 +167,7 @@ class VoiceEmotionClassifier:
|
|
167 |
emotion_model = EmotionModel()
|
168 |
voice_classifier = VoiceEmotionClassifier()
|
169 |
|
|
|
170 |
# Global variables to store results
|
171 |
emotion_history = []
|
172 |
current_emotions = {"face": "neutral", "voice": "neutral"}
|
|
|
12 |
import os
|
13 |
from sklearn.preprocessing import StandardScaler
|
14 |
|
15 |
+
# Constants - Updated with alternative model sources
|
16 |
+
MODEL_URLS = [
|
17 |
+
"https://github.com/onnx/models/raw/main/vision/body_analysis/emotion_ferplus/model/emotion-ferplus-8.onnx",
|
18 |
+
"https://www.dropbox.com/s/7mswy6h0k3f8ydo/emotion-ferplus-8.onnx?dl=1"
|
19 |
+
]
|
20 |
MODEL_PATH = "emotion-ferplus-8.onnx"
|
21 |
+
VOICE_MODEL_PATH = "voice_emotion_model.pkl"
|
22 |
+
VOICE_SCALER_PATH = "voice_scaler.pkl"
|
|
|
23 |
|
24 |
class EmotionModel:
|
25 |
def __init__(self):
|
26 |
self.session = None
|
27 |
self.labels = ['neutral', 'happy', 'surprise', 'sad', 'angry', 'disgust', 'fear', 'contempt']
|
28 |
+
self.emotion_buffer = []
|
29 |
self.load_model()
|
30 |
|
31 |
def download_model(self):
|
32 |
+
for url in MODEL_URLS:
|
33 |
+
try:
|
34 |
+
print(f"Attempting to download model from: {url}")
|
35 |
+
response = requests.get(url, stream=True, timeout=30)
|
36 |
+
response.raise_for_status()
|
37 |
+
|
38 |
+
with open(MODEL_PATH, "wb") as f:
|
39 |
+
for chunk in response.iter_content(chunk_size=8192):
|
40 |
+
if chunk:
|
41 |
+
f.write(chunk)
|
42 |
+
|
43 |
+
if os.path.exists(MODEL_PATH):
|
44 |
+
print(f"Successfully downloaded model from {url}")
|
45 |
+
return True
|
46 |
+
except Exception as e:
|
47 |
+
print(f"Download attempt failed from {url}: {str(e)}")
|
48 |
+
|
49 |
+
return False
|
|
|
|
|
50 |
|
51 |
def load_model(self):
|
52 |
if not os.path.exists(MODEL_PATH):
|
53 |
if not self.download_model():
|
54 |
+
print("Warning: Could not download emotion model. Using simple face detection only.")
|
55 |
+
self.session = None
|
56 |
+
return
|
57 |
|
58 |
try:
|
59 |
so = ort.SessionOptions()
|
60 |
so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
61 |
self.session = ort.InferenceSession(MODEL_PATH, so)
|
62 |
+
print("Emotion model loaded successfully")
|
|
|
|
|
|
|
|
|
63 |
except Exception as e:
|
64 |
+
print(f"Failed to load ONNX model: {str(e)}")
|
65 |
+
self.session = None
|
66 |
|
67 |
def softmax(self, x):
|
68 |
e_x = np.exp(x - np.max(x))
|
69 |
return e_x / e_x.sum()
|
70 |
|
71 |
def predict(self, frame):
|
72 |
+
if self.session is None:
|
73 |
+
# Return dummy probabilities if model failed to load
|
74 |
+
base = np.array([0.7] + [0.1]*7)
|
75 |
+
variation = np.random.normal(0, 0.01, size=8)
|
76 |
+
return [np.clip(base + variation, 0, 1).reshape(1, -1)]
|
|
|
|
|
77 |
|
78 |
+
try:
|
79 |
+
raw_prediction = self.session.run(None, {'Input3': frame})[0][0]
|
80 |
+
self.emotion_buffer.append(raw_prediction)
|
81 |
+
|
82 |
+
if len(self.emotion_buffer) > 5:
|
83 |
+
self.emotion_buffer = self.emotion_buffer[-5:]
|
84 |
+
|
85 |
+
smoothed_probs = np.mean(self.emotion_buffer, axis=0)
|
86 |
+
return self.softmax(smoothed_probs).reshape(1, -1)
|
87 |
+
except Exception as e:
|
88 |
+
print(f"Prediction error: {str(e)}")
|
89 |
+
return [np.array([[0.8] + [0.1]*7])] # Mostly neutral fallback
|
90 |
|
91 |
class VoiceEmotionClassifier:
|
92 |
def __init__(self):
|
93 |
try:
|
|
|
94 |
if os.path.exists(VOICE_MODEL_PATH) and os.path.exists(VOICE_SCALER_PATH):
|
95 |
self.model = joblib.load(VOICE_MODEL_PATH)
|
96 |
self.scaler = joblib.load(VOICE_SCALER_PATH)
|
|
|
103 |
print("Using limited rule-based voice analysis")
|
104 |
self.model = None
|
105 |
self.scaler = StandardScaler()
|
|
|
106 |
dummy_features = np.random.randn(100, 18)
|
107 |
self.scaler.fit(dummy_features)
|
108 |
self.labels = ['neutral', 'happy', 'sad', 'angry', 'fear']
|
|
|
112 |
y, sr = audio
|
113 |
features = []
|
114 |
|
115 |
+
if len(y.shape) > 1:
|
116 |
y = np.mean(y, axis=0)
|
117 |
|
118 |
+
if sr != 16000:
|
119 |
y = librosa.resample(y, orig_sr=sr, target_sr=16000)
|
120 |
sr = 16000
|
121 |
|
|
|
122 |
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
123 |
features.extend(np.mean(mfccs, axis=1))
|
124 |
features.extend(np.std(mfccs, axis=1))
|
125 |
|
|
|
126 |
pitches = librosa.yin(y, fmin=80, fmax=400)
|
127 |
features.append(np.nanmean(pitches))
|
128 |
features.append(np.nanstd(pitches))
|
129 |
|
|
|
130 |
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
|
131 |
features.append(np.mean(spectral_centroid))
|
132 |
|
|
|
145 |
emotion = self.labels[np.argmax(probs)]
|
146 |
details = [{"label": l, "score": p} for l, p in zip(self.labels, probs)]
|
147 |
else:
|
|
|
148 |
if features[0, 0] > 1.0:
|
149 |
emotion = "happy"
|
150 |
details = [{"label": "happy", "score": 0.8}]
|
|
|
167 |
emotion_model = EmotionModel()
|
168 |
voice_classifier = VoiceEmotionClassifier()
|
169 |
|
170 |
+
|
171 |
# Global variables to store results
|
172 |
emotion_history = []
|
173 |
current_emotions = {"face": "neutral", "voice": "neutral"}
|