yunusajib commited on
Commit
5eff629
·
verified ·
1 Parent(s): 655839f

Modify some app features

Browse files
Files changed (1) hide show
  1. app.py +48 -91
app.py CHANGED
@@ -4,27 +4,70 @@ import cv2
4
  import pandas as pd
5
  from datetime import datetime
6
  import time
7
- from transformers import pipeline
8
  import librosa
9
  from python_speech_features import mfcc
10
  import onnxruntime as ort
11
  import requests
12
  import os
 
 
13
 
14
  # Download emotion recognition ONNX model
15
  MODEL_URL = "https://github.com/onnx/models/raw/main/vision/body_analysis/emotion_ferplus/model/emotion-ferplus-8.onnx"
16
  MODEL_PATH = "emotion-ferplus-8.onnx"
17
 
18
  if not os.path.exists(MODEL_PATH):
 
19
  response = requests.get(MODEL_URL)
20
  with open(MODEL_PATH, "wb") as f:
21
  f.write(response.content)
22
 
23
- # Initialize models
24
- voice_classifier = pipeline("audio-classification", model="superb/hubert-base-superb-er")
25
  emotion_session = ort.InferenceSession(MODEL_PATH)
26
  emotion_labels = ['neutral', 'happy', 'surprise', 'sad', 'angry', 'disgust', 'fear', 'contempt']
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  # Global variables to store results
29
  emotion_history = []
30
  current_emotions = {"face": "neutral", "voice": "neutral"}
@@ -66,26 +109,7 @@ def analyze_face(frame):
66
 
67
  def analyze_voice(audio):
68
  """Analyze voice tone from audio"""
69
- try:
70
- sr, y = audio
71
- y = y.astype(np.float32)
72
-
73
- # Convert to mono if stereo
74
- if len(y.shape) > 1:
75
- y = np.mean(y, axis=0)
76
-
77
- # Resample to 16kHz if needed
78
- if sr != 16000:
79
- y = librosa.resample(y, orig_sr=sr, target_sr=16000)
80
- sr = 16000
81
-
82
- # Classify emotion
83
- result = voice_classifier({"sampling_rate": sr, "raw": y})
84
- dominant_emotion = result[0]['label']
85
- return dominant_emotion, result
86
- except Exception as e:
87
- print(f"Voice analysis error: {e}")
88
- return "neutral", [{"label": "neutral", "score": 1.0}]
89
 
90
  def update_emotion_history(face_emotion, voice_emotion):
91
  """Update the emotion history and current emotions"""
@@ -167,71 +191,4 @@ def process_input(video, audio):
167
  voice_emotion, voice_details = "neutral", {}
168
 
169
  # Update history and get outputs
170
- update_emotion_history(face_emotion, voice_emotion)
171
- timeline_df = get_emotion_timeline()
172
- advice = get_practitioner_advice(face_emotion, voice_emotion)
173
-
174
- # Prepare outputs
175
- outputs = {
176
- "current_face": face_emotion,
177
- "current_voice": voice_emotion,
178
- "timeline": timeline_df,
179
- "advice": advice,
180
- "face_details": str(face_details),
181
- "voice_details": str(voice_details)
182
- }
183
-
184
- return outputs
185
- except Exception as e:
186
- print(f"Processing error: {e}")
187
- return {
188
- "current_face": "Error",
189
- "current_voice": "Error",
190
- "timeline": pd.DataFrame(),
191
- "advice": "System error occurred",
192
- "face_details": "",
193
- "voice_details": ""
194
- }
195
-
196
- # Gradio interface
197
- with gr.Blocks(title="Patient Emotion Recognition", theme="soft") as demo:
198
- gr.Markdown("# Real-Time Patient Emotion Recognition")
199
- gr.Markdown("Analyze facial expressions and voice tone during medical consultations")
200
-
201
- with gr.Row():
202
- with gr.Column():
203
- video_input = gr.Image(label="Live Camera Feed", source="webcam", streaming=True)
204
- audio_input = gr.Audio(label="Voice Input", source="microphone", type="numpy")
205
- submit_btn = gr.Button("Analyze Emotions")
206
-
207
- with gr.Column():
208
- current_face = gr.Textbox(label="Current Facial Emotion")
209
- current_voice = gr.Textbox(label="Current Voice Emotion")
210
- advice_output = gr.Textbox(label="Practitioner Suggestions", lines=3)
211
- timeline_output = gr.Dataframe(label="Emotion Timeline", interactive=False)
212
- face_details = gr.Textbox(label="Face Analysis Details", visible=False)
213
- voice_details = gr.Textbox(label="Voice Analysis Details", visible=False)
214
-
215
- # Live processing
216
- video_input.change(
217
- process_input,
218
- inputs=[video_input, audio_input],
219
- outputs=[current_face, current_voice, timeline_output, advice_output, face_details, voice_details],
220
- show_progress="hidden"
221
- )
222
-
223
- audio_input.change(
224
- process_input,
225
- inputs=[video_input, audio_input],
226
- outputs=[current_face, current_voice, timeline_output, advice_output, face_details, voice_details],
227
- show_progress="hidden"
228
- )
229
-
230
- submit_btn.click(
231
- process_input,
232
- inputs=[video_input, audio_input],
233
- outputs=[current_face, current_voice, timeline_output, advice_output, face_details, voice_details]
234
- )
235
-
236
- if __name__ == "__main__":
237
- demo.launch(debug=True)
 
4
  import pandas as pd
5
  from datetime import datetime
6
  import time
 
7
  import librosa
8
  from python_speech_features import mfcc
9
  import onnxruntime as ort
10
  import requests
11
  import os
12
+ from sklearn.preprocessing import StandardScaler
13
+ import joblib
14
 
15
  # Download emotion recognition ONNX model
16
  MODEL_URL = "https://github.com/onnx/models/raw/main/vision/body_analysis/emotion_ferplus/model/emotion-ferplus-8.onnx"
17
  MODEL_PATH = "emotion-ferplus-8.onnx"
18
 
19
  if not os.path.exists(MODEL_PATH):
20
+ print("Downloading emotion recognition model...")
21
  response = requests.get(MODEL_URL)
22
  with open(MODEL_PATH, "wb") as f:
23
  f.write(response.content)
24
 
25
+ # Initialize face emotion detection
 
26
  emotion_session = ort.InferenceSession(MODEL_PATH)
27
  emotion_labels = ['neutral', 'happy', 'surprise', 'sad', 'angry', 'disgust', 'fear', 'contempt']
28
 
29
+ # Simple voice emotion classifier (replace with your own trained model if needed)
30
+ class VoiceEmotionClassifier:
31
+ def __init__(self):
32
+ self.scaler = StandardScaler()
33
+
34
+ def extract_features(self, audio):
35
+ sr, y = audio
36
+ y = y.astype(np.float32)
37
+
38
+ # Convert to mono if stereo
39
+ if len(y.shape) > 1:
40
+ y = np.mean(y, axis=0)
41
+
42
+ # Resample to 16kHz if needed
43
+ if sr != 16000:
44
+ y = librosa.resample(y, orig_sr=sr, target_sr=16000)
45
+ sr = 16000
46
+
47
+ # Extract MFCC features
48
+ mfcc_features = mfcc(y, sr, numcep=13)
49
+ return np.mean(mfcc_features, axis=0)
50
+
51
+ def predict(self, audio):
52
+ try:
53
+ features = self.extract_features(audio).reshape(1, -1)
54
+ features = self.scaler.transform(features)
55
+
56
+ # Simple rule-based classifier (replace with actual trained model)
57
+ # This is just a placeholder - you should train a proper model
58
+ if features[0, 0] > 0.5:
59
+ return "happy", [{"label": "happy", "score": 0.8}]
60
+ elif features[0, 0] < -0.5:
61
+ return "sad", [{"label": "sad", "score": 0.7}]
62
+ else:
63
+ return "neutral", [{"label": "neutral", "score": 0.9}]
64
+ except Exception as e:
65
+ print(f"Voice analysis error: {e}")
66
+ return "neutral", [{"label": "neutral", "score": 1.0}]
67
+
68
+ # Initialize models
69
+ voice_classifier = VoiceEmotionClassifier()
70
+
71
  # Global variables to store results
72
  emotion_history = []
73
  current_emotions = {"face": "neutral", "voice": "neutral"}
 
109
 
110
  def analyze_voice(audio):
111
  """Analyze voice tone from audio"""
112
+ return voice_classifier.predict(audio)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  def update_emotion_history(face_emotion, voice_emotion):
115
  """Update the emotion history and current emotions"""
 
191
  voice_emotion, voice_details = "neutral", {}
192
 
193
  # Update history and get outputs
194
+ update_em