Modify some app features
Browse files
app.py
CHANGED
@@ -4,27 +4,70 @@ import cv2
|
|
4 |
import pandas as pd
|
5 |
from datetime import datetime
|
6 |
import time
|
7 |
-
from transformers import pipeline
|
8 |
import librosa
|
9 |
from python_speech_features import mfcc
|
10 |
import onnxruntime as ort
|
11 |
import requests
|
12 |
import os
|
|
|
|
|
13 |
|
14 |
# Download emotion recognition ONNX model
|
15 |
MODEL_URL = "https://github.com/onnx/models/raw/main/vision/body_analysis/emotion_ferplus/model/emotion-ferplus-8.onnx"
|
16 |
MODEL_PATH = "emotion-ferplus-8.onnx"
|
17 |
|
18 |
if not os.path.exists(MODEL_PATH):
|
|
|
19 |
response = requests.get(MODEL_URL)
|
20 |
with open(MODEL_PATH, "wb") as f:
|
21 |
f.write(response.content)
|
22 |
|
23 |
-
# Initialize
|
24 |
-
voice_classifier = pipeline("audio-classification", model="superb/hubert-base-superb-er")
|
25 |
emotion_session = ort.InferenceSession(MODEL_PATH)
|
26 |
emotion_labels = ['neutral', 'happy', 'surprise', 'sad', 'angry', 'disgust', 'fear', 'contempt']
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
# Global variables to store results
|
29 |
emotion_history = []
|
30 |
current_emotions = {"face": "neutral", "voice": "neutral"}
|
@@ -66,26 +109,7 @@ def analyze_face(frame):
|
|
66 |
|
67 |
def analyze_voice(audio):
|
68 |
"""Analyze voice tone from audio"""
|
69 |
-
|
70 |
-
sr, y = audio
|
71 |
-
y = y.astype(np.float32)
|
72 |
-
|
73 |
-
# Convert to mono if stereo
|
74 |
-
if len(y.shape) > 1:
|
75 |
-
y = np.mean(y, axis=0)
|
76 |
-
|
77 |
-
# Resample to 16kHz if needed
|
78 |
-
if sr != 16000:
|
79 |
-
y = librosa.resample(y, orig_sr=sr, target_sr=16000)
|
80 |
-
sr = 16000
|
81 |
-
|
82 |
-
# Classify emotion
|
83 |
-
result = voice_classifier({"sampling_rate": sr, "raw": y})
|
84 |
-
dominant_emotion = result[0]['label']
|
85 |
-
return dominant_emotion, result
|
86 |
-
except Exception as e:
|
87 |
-
print(f"Voice analysis error: {e}")
|
88 |
-
return "neutral", [{"label": "neutral", "score": 1.0}]
|
89 |
|
90 |
def update_emotion_history(face_emotion, voice_emotion):
|
91 |
"""Update the emotion history and current emotions"""
|
@@ -167,71 +191,4 @@ def process_input(video, audio):
|
|
167 |
voice_emotion, voice_details = "neutral", {}
|
168 |
|
169 |
# Update history and get outputs
|
170 |
-
|
171 |
-
timeline_df = get_emotion_timeline()
|
172 |
-
advice = get_practitioner_advice(face_emotion, voice_emotion)
|
173 |
-
|
174 |
-
# Prepare outputs
|
175 |
-
outputs = {
|
176 |
-
"current_face": face_emotion,
|
177 |
-
"current_voice": voice_emotion,
|
178 |
-
"timeline": timeline_df,
|
179 |
-
"advice": advice,
|
180 |
-
"face_details": str(face_details),
|
181 |
-
"voice_details": str(voice_details)
|
182 |
-
}
|
183 |
-
|
184 |
-
return outputs
|
185 |
-
except Exception as e:
|
186 |
-
print(f"Processing error: {e}")
|
187 |
-
return {
|
188 |
-
"current_face": "Error",
|
189 |
-
"current_voice": "Error",
|
190 |
-
"timeline": pd.DataFrame(),
|
191 |
-
"advice": "System error occurred",
|
192 |
-
"face_details": "",
|
193 |
-
"voice_details": ""
|
194 |
-
}
|
195 |
-
|
196 |
-
# Gradio interface
|
197 |
-
with gr.Blocks(title="Patient Emotion Recognition", theme="soft") as demo:
|
198 |
-
gr.Markdown("# Real-Time Patient Emotion Recognition")
|
199 |
-
gr.Markdown("Analyze facial expressions and voice tone during medical consultations")
|
200 |
-
|
201 |
-
with gr.Row():
|
202 |
-
with gr.Column():
|
203 |
-
video_input = gr.Image(label="Live Camera Feed", source="webcam", streaming=True)
|
204 |
-
audio_input = gr.Audio(label="Voice Input", source="microphone", type="numpy")
|
205 |
-
submit_btn = gr.Button("Analyze Emotions")
|
206 |
-
|
207 |
-
with gr.Column():
|
208 |
-
current_face = gr.Textbox(label="Current Facial Emotion")
|
209 |
-
current_voice = gr.Textbox(label="Current Voice Emotion")
|
210 |
-
advice_output = gr.Textbox(label="Practitioner Suggestions", lines=3)
|
211 |
-
timeline_output = gr.Dataframe(label="Emotion Timeline", interactive=False)
|
212 |
-
face_details = gr.Textbox(label="Face Analysis Details", visible=False)
|
213 |
-
voice_details = gr.Textbox(label="Voice Analysis Details", visible=False)
|
214 |
-
|
215 |
-
# Live processing
|
216 |
-
video_input.change(
|
217 |
-
process_input,
|
218 |
-
inputs=[video_input, audio_input],
|
219 |
-
outputs=[current_face, current_voice, timeline_output, advice_output, face_details, voice_details],
|
220 |
-
show_progress="hidden"
|
221 |
-
)
|
222 |
-
|
223 |
-
audio_input.change(
|
224 |
-
process_input,
|
225 |
-
inputs=[video_input, audio_input],
|
226 |
-
outputs=[current_face, current_voice, timeline_output, advice_output, face_details, voice_details],
|
227 |
-
show_progress="hidden"
|
228 |
-
)
|
229 |
-
|
230 |
-
submit_btn.click(
|
231 |
-
process_input,
|
232 |
-
inputs=[video_input, audio_input],
|
233 |
-
outputs=[current_face, current_voice, timeline_output, advice_output, face_details, voice_details]
|
234 |
-
)
|
235 |
-
|
236 |
-
if __name__ == "__main__":
|
237 |
-
demo.launch(debug=True)
|
|
|
4 |
import pandas as pd
|
5 |
from datetime import datetime
|
6 |
import time
|
|
|
7 |
import librosa
|
8 |
from python_speech_features import mfcc
|
9 |
import onnxruntime as ort
|
10 |
import requests
|
11 |
import os
|
12 |
+
from sklearn.preprocessing import StandardScaler
|
13 |
+
import joblib
|
14 |
|
15 |
# Download emotion recognition ONNX model
|
16 |
MODEL_URL = "https://github.com/onnx/models/raw/main/vision/body_analysis/emotion_ferplus/model/emotion-ferplus-8.onnx"
|
17 |
MODEL_PATH = "emotion-ferplus-8.onnx"
|
18 |
|
19 |
if not os.path.exists(MODEL_PATH):
|
20 |
+
print("Downloading emotion recognition model...")
|
21 |
response = requests.get(MODEL_URL)
|
22 |
with open(MODEL_PATH, "wb") as f:
|
23 |
f.write(response.content)
|
24 |
|
25 |
+
# Initialize face emotion detection
|
|
|
26 |
emotion_session = ort.InferenceSession(MODEL_PATH)
|
27 |
emotion_labels = ['neutral', 'happy', 'surprise', 'sad', 'angry', 'disgust', 'fear', 'contempt']
|
28 |
|
29 |
+
# Simple voice emotion classifier (replace with your own trained model if needed)
|
30 |
+
class VoiceEmotionClassifier:
|
31 |
+
def __init__(self):
|
32 |
+
self.scaler = StandardScaler()
|
33 |
+
|
34 |
+
def extract_features(self, audio):
|
35 |
+
sr, y = audio
|
36 |
+
y = y.astype(np.float32)
|
37 |
+
|
38 |
+
# Convert to mono if stereo
|
39 |
+
if len(y.shape) > 1:
|
40 |
+
y = np.mean(y, axis=0)
|
41 |
+
|
42 |
+
# Resample to 16kHz if needed
|
43 |
+
if sr != 16000:
|
44 |
+
y = librosa.resample(y, orig_sr=sr, target_sr=16000)
|
45 |
+
sr = 16000
|
46 |
+
|
47 |
+
# Extract MFCC features
|
48 |
+
mfcc_features = mfcc(y, sr, numcep=13)
|
49 |
+
return np.mean(mfcc_features, axis=0)
|
50 |
+
|
51 |
+
def predict(self, audio):
|
52 |
+
try:
|
53 |
+
features = self.extract_features(audio).reshape(1, -1)
|
54 |
+
features = self.scaler.transform(features)
|
55 |
+
|
56 |
+
# Simple rule-based classifier (replace with actual trained model)
|
57 |
+
# This is just a placeholder - you should train a proper model
|
58 |
+
if features[0, 0] > 0.5:
|
59 |
+
return "happy", [{"label": "happy", "score": 0.8}]
|
60 |
+
elif features[0, 0] < -0.5:
|
61 |
+
return "sad", [{"label": "sad", "score": 0.7}]
|
62 |
+
else:
|
63 |
+
return "neutral", [{"label": "neutral", "score": 0.9}]
|
64 |
+
except Exception as e:
|
65 |
+
print(f"Voice analysis error: {e}")
|
66 |
+
return "neutral", [{"label": "neutral", "score": 1.0}]
|
67 |
+
|
68 |
+
# Initialize models
|
69 |
+
voice_classifier = VoiceEmotionClassifier()
|
70 |
+
|
71 |
# Global variables to store results
|
72 |
emotion_history = []
|
73 |
current_emotions = {"face": "neutral", "voice": "neutral"}
|
|
|
109 |
|
110 |
def analyze_voice(audio):
|
111 |
"""Analyze voice tone from audio"""
|
112 |
+
return voice_classifier.predict(audio)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
def update_emotion_history(face_emotion, voice_emotion):
|
115 |
"""Update the emotion history and current emotions"""
|
|
|
191 |
voice_emotion, voice_details = "neutral", {}
|
192 |
|
193 |
# Update history and get outputs
|
194 |
+
update_em
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|