yunusajib commited on
Commit
cf17ab8
·
verified ·
1 Parent(s): d287980

app changes

Browse files
Files changed (1) hide show
  1. app.py +135 -239
app.py CHANGED
@@ -1,268 +1,164 @@
1
  import gradio as gr
2
  import numpy as np
3
- import pandas as pd
4
- import plotly.graph_objects as go
5
- import plotly.express as px
6
- from datetime import datetime, timedelta
7
- import threading
8
- import queue
9
- import time
10
- from collections import deque
11
- import warnings
12
  import traceback
13
- warnings.filterwarnings("ignore")
 
 
14
 
15
- # Audio processing imports with fallbacks
16
- AUDIO_AVAILABLE = True
17
- try:
18
- import soundfile as sf
19
- import librosa
20
- LIBROSA_AVAILABLE = True
21
- except ImportError:
22
- LIBROSA_AVAILABLE = False
23
- print("Librosa not available - using basic audio processing")
24
-
25
- # Image processing imports with fallbacks
26
- CV2_AVAILABLE = True
27
- try:
28
- import cv2
29
- except ImportError:
30
- CV2_AVAILABLE = False
31
- print("OpenCV not available - using PIL for image processing")
32
-
33
- try:
34
- from PIL import Image, ImageDraw, ImageFont
35
- PIL_AVAILABLE = True
36
- except ImportError:
37
- PIL_AVAILABLE = False
38
- print("PIL not available - limited image processing")
39
-
40
- # AI model imports with fallbacks
41
- HF_AVAILABLE = True
42
- try:
43
- from transformers import pipeline
44
- import torch
45
- except ImportError:
46
- HF_AVAILABLE = False
47
- print("Transformers not available - using mock emotion detection")
48
-
49
- class EmotionRecognitionSystem:
50
  def __init__(self):
51
- self.emotion_history = deque(maxlen=100)
52
- self.audio_queue = queue.Queue()
53
- self.video_queue = queue.Queue()
54
- self.setup_models()
55
-
56
- self.alert_thresholds = {
57
- 'stress': 0.7,
58
- 'anxiety': 0.6,
59
- 'pain': 0.8,
60
- 'confusion': 0.5
61
- }
62
 
63
- def setup_models(self):
64
- """Initialize emotion recognition models with better error handling"""
65
- self.models_loaded = False
66
-
67
- if not HF_AVAILABLE:
68
- print("Skipping model loading - transformers not available")
69
- return
70
-
71
  try:
72
- # Facial emotion recognition
73
- self.face_emotion_pipeline = pipeline(
74
- "image-classification",
75
- model="j-hartmann/emotion-english-distilroberta-base",
76
- device=0 if torch.cuda.is_available() else -1
77
- )
78
-
79
- # Audio emotion recognition
80
- self.audio_emotion_pipeline = pipeline(
81
- "audio-classification",
82
- model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
83
- device=0 if torch.cuda.is_available() else -1
84
- )
85
- self.models_loaded = True
86
- except Exception as e:
87
- print(f"Error loading models: {e}")
88
- print(traceback.format_exc())
89
- self.models_loaded = False
90
-
91
- def validate_audio_input(self, audio_data):
92
- """Validate and standardize audio input format"""
93
- if audio_data is None:
94
- return None
95
 
 
96
  try:
97
- # Handle different audio input formats
98
- if isinstance(audio_data, tuple):
99
- audio_array, sample_rate = audio_data
100
- else:
101
- # Try to read audio file if not in tuple format
102
- if isinstance(audio_data, str):
103
- if LIBROSA_AVAILABLE:
104
- audio_array, sample_rate = librosa.load(audio_data, sr=None)
105
- else:
106
- # Fallback for when librosa is not available
107
- import wave
108
- with wave.open(audio_data, 'rb') as wf:
109
- sample_rate = wf.getframerate()
110
- audio_array = np.frombuffer(wf.readframes(wf.getnframes()), dtype=np.int16)
111
- audio_array = audio_array.astype(np.float32) / 32768.0
112
- else:
113
- return None
114
-
115
- # Resample if needed
116
- target_rate = 16000
117
- if sample_rate != target_rate:
118
- if LIBROSA_AVAILABLE:
119
- audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=target_rate)
120
- else:
121
- # Simple downsampling fallback
122
- step = int(sample_rate / target_rate)
123
- if step > 1:
124
- audio_array = audio_array[::step]
125
- sample_rate = target_rate
126
 
127
- return (audio_array, sample_rate)
 
 
 
 
 
128
 
129
- except Exception as e:
130
- print(f"Audio validation error: {e}")
131
- return None
132
 
133
- def detect_face_emotion(self, frame):
134
- """Detect emotions from facial expressions with better error handling"""
135
- if not self.models_loaded:
136
- # Mock emotion detection for demo
137
- emotions = ['neutral', 'happy', 'sad', 'angry', 'fear', 'surprise', 'disgust']
138
- scores = np.random.dirichlet(np.ones(len(emotions)))
139
- return dict(zip(emotions, scores))
140
-
141
- try:
142
- # Convert frame to RGB format
143
- if isinstance(frame, np.ndarray):
144
- if len(frame.shape) == 3:
145
- if frame.shape[2] == 4: # RGBA
146
- rgb_frame = frame[:, :, :3]
147
- elif frame.shape[2] == 3: # BGR or RGB?
148
- if CV2_AVAILABLE:
149
- rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
150
- else:
151
- rgb_frame = frame[:, :, ::-1] # Simple BGR to RGB
152
- else:
153
- rgb_frame = frame
154
  else:
155
- # Grayscale to RGB
156
- if CV2_AVAILABLE:
157
- rgb_frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
158
- else:
159
- rgb_frame = np.stack((frame,)*3, axis=-1)
160
- else:
161
- rgb_frame = frame
162
-
163
- # Use face emotion model
164
- results = self.face_emotion_pipeline(rgb_frame)
165
-
166
- # Convert to standardized format
167
- emotion_scores = {}
168
- for result in results:
169
- emotion_scores[result['label'].lower()] = result['score']
170
-
171
- return emotion_scores
172
-
173
- except Exception as e:
174
- print(f"Face emotion detection error: {e}")
175
- return {'neutral': 1.0}
176
 
177
- def detect_voice_emotion(self, audio_data):
178
- """Detect emotions from voice tone with better audio handling"""
179
- if not self.models_loaded or audio_data is None:
180
- # Mock emotion detection
181
- emotions = ['neutral', 'happy', 'sad', 'angry', 'fear']
182
- scores = np.random.dirichlet(np.ones(len(emotions)))
183
- return dict(zip(emotions, scores))
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  try:
186
- # Validate and standardize audio input
187
- validated_audio = self.validate_audio_input(audio_data)
188
- if validated_audio is None:
189
- return {'neutral': 1.0}
190
-
191
- audio_array, sample_rate = validated_audio
192
-
193
- # Process audio with the model
194
- results = self.audio_emotion_pipeline({
195
- "array": audio_array,
196
- "sampling_rate": sample_rate
197
- })
198
-
199
- emotion_scores = {}
200
- for result in results:
201
- emotion_scores[result['label'].lower()] = result['score']
202
 
203
- return emotion_scores
 
 
 
 
 
 
 
204
 
 
 
 
 
 
205
  except Exception as e:
206
- print(f"Voice emotion detection error: {e}")
207
- return {'neutral': 1.0}
 
 
 
208
 
209
- # [Rest of your existing methods...]
210
-
211
- def process_video_audio(video_frame, audio_data):
212
- """Process video frame and audio data with better error handling"""
213
- if video_frame is None:
214
- return None, "No video input", "", ""
215
-
216
- try:
217
- # Process the frame
218
- validated_audio = emotion_system.validate_audio_input(audio_data)
219
-
220
- # Get emotion analysis
221
- emotion_record = emotion_system.process_frame(
222
- video_frame,
223
- validated_audio[0] if validated_audio else None,
224
- validated_audio[1] if validated_audio else 16000
225
- )
226
-
227
- # Create visualization
228
- annotated_frame = create_emotion_overlay(video_frame, emotion_record)
229
 
230
- # Format results
231
- clinical_text = format_clinical_emotions(emotion_record['clinical_emotions'])
232
- alerts_text = "\n".join(emotion_record['alerts']) if emotion_record['alerts'] else "No alerts"
233
- suggestions_text = "\n".join(emotion_record['suggestions']) if emotion_record['suggestions'] else "No suggestions"
 
 
 
 
 
 
234
 
235
- return annotated_frame, clinical_text, alerts_text, suggestions_text
236
-
237
- except Exception as e:
238
- print(f"Processing error: {e}")
239
- traceback.print_exc()
240
- return video_frame, "Processing error", "System error", "Please try again"
241
-
242
- # [Rest of your existing functions...]
243
-
244
- def create_interface():
245
- with gr.Blocks(title="Patient Emotion Recognition System", theme=gr.themes.Soft()) as demo:
246
- # [Your existing interface code...]
247
 
248
- # Add audio format info
249
  gr.Markdown("""
250
- ### 🔊 Audio Input Notes:
251
- - System works best with clear microphone input
252
- - If you get audio errors, try:
253
- - Checking microphone permissions
254
- - Reducing background noise
255
- - Using a different microphone
 
 
 
 
 
256
  """)
257
 
258
  return demo
259
 
260
  if __name__ == "__main__":
261
- emotion_system = EmotionRecognitionSystem()
262
- demo = create_interface()
263
- demo.launch(
264
- share=True,
265
- server_name="0.0.0.0",
266
- server_port=7860,
267
- show_error=True
268
- )
 
1
  import gradio as gr
2
  import numpy as np
3
+ from datetime import datetime
 
 
 
 
 
 
 
 
4
  import traceback
5
+ import sounddevice as sd # Alternative audio backend
6
+ import tempfile
7
+ import os
8
 
9
+ # Enhanced Audio Processor Class
10
+ class AudioProcessor:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def __init__(self):
12
+ self.sample_rate = 16000
13
+ self.available_backends = self.detect_audio_backends()
 
 
 
 
 
 
 
 
 
14
 
15
+ def detect_audio_backends(self):
16
+ backends = []
17
+ # Test FFmpeg
 
 
 
 
 
18
  try:
19
+ import ffmpeg
20
+ backends.append('ffmpeg')
21
+ except:
22
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # Test SoundDevice
25
  try:
26
+ sd.check_input_settings()
27
+ backends.append('sounddevice')
28
+ except:
29
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # Test Librosa
32
+ try:
33
+ import librosa
34
+ backends.append('librosa')
35
+ except:
36
+ pass
37
 
38
+ return backends or ['numpy_fallback']
 
 
39
 
40
+ def process_audio(self, audio_input):
41
+ for backend in self.available_backends:
42
+ try:
43
+ if backend == 'ffmpeg':
44
+ return self._process_with_ffmpeg(audio_input)
45
+ elif backend == 'sounddevice':
46
+ return self._process_with_sounddevice(audio_input)
47
+ elif backend == 'librosa':
48
+ return self._process_with_librosa(audio_input)
 
 
 
 
 
 
 
 
 
 
 
 
49
  else:
50
+ return self._process_fallback(audio_input)
51
+ except Exception as e:
52
+ print(f"Failed with {backend}: {str(e)}")
53
+ continue
54
+
55
+ raise Exception("All audio backends failed")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
+ def _process_with_ffmpeg(self, audio_input):
58
+ # Your existing FFmpeg processing
59
+ pass
 
 
 
 
60
 
61
+ def _process_with_sounddevice(self, audio_input):
62
+ # Process using sounddevice
63
+ duration = 5 # seconds
64
+ print(f"Recording with sounddevice (rate={self.sample_rate})...")
65
+ audio_data = sd.rec(int(duration * self.sample_rate),
66
+ samplerate=self.sample_rate,
67
+ channels=1)
68
+ sd.wait()
69
+ return (audio_data.flatten(), self.sample_rate)
70
+
71
+ def _process_with_librosa(self, audio_input):
72
+ # Process using librosa
73
+ import librosa
74
+ if isinstance(audio_input, tuple):
75
+ return audio_input
76
+ elif isinstance(audio_input, str):
77
+ return librosa.load(audio_input, sr=self.sample_rate)
78
+ else:
79
+ # Handle other input types
80
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
81
+ tmp.write(audio_input)
82
+ tmp.flush()
83
+ data, sr = librosa.load(tmp.name, sr=self.sample_rate)
84
+ os.unlink(tmp.name)
85
+ return (data, sr)
86
+
87
+ def _process_fallback(self, audio_input):
88
+ # Simple numpy fallback
89
+ if isinstance(audio_input, tuple):
90
+ return audio_input
91
+ return (np.random.random(16000), 16000 # Mock data
92
+
93
+ # Modified Interface with Audio Debugging
94
+ def create_debug_interface():
95
+ audio_processor = AudioProcessor()
96
+
97
+ def process_audio_debug(audio):
98
  try:
99
+ processed = audio_processor.process_audio(audio)
100
+ waveform = processed[0]
101
+ sr = processed[1]
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
+ # Create debug info
104
+ debug_info = [
105
+ f"Audio Backends Available: {', '.join(audio_processor.available_backends)}",
106
+ f"Sample Rate: {sr} Hz",
107
+ f"Audio Length: {len(waveform)/sr:.2f} seconds",
108
+ f"Max Amplitude: {np.max(np.abs(waveform)):.4f}",
109
+ f"Processing Time: {datetime.now().strftime('%H:%M:%S')}"
110
+ ]
111
 
112
+ return {
113
+ "audio": audio,
114
+ "debug": "\n".join(debug_info),
115
+ "status": "✅ Successfully processed audio"
116
+ }
117
  except Exception as e:
118
+ return {
119
+ "audio": None,
120
+ "debug": traceback.format_exc(),
121
+ "status": f"❌ Error: {str(e)}"
122
+ }
123
 
124
+ with gr.Blocks() as demo:
125
+ gr.Markdown("## 🎤 Audio Debugging Interface")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ with gr.Row():
128
+ with gr.Column():
129
+ mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Microphone Input")
130
+ upload_input = gr.Audio(sources=["upload"], type="filepath", label="File Upload")
131
+ test_button = gr.Button("Test Audio Processing")
132
+
133
+ with gr.Column():
134
+ audio_output = gr.Audio(label="Processed Audio")
135
+ debug_output = gr.Textbox(label="Debug Information", lines=8)
136
+ status_output = gr.Textbox(label="Processing Status")
137
 
138
+ test_button.click(
139
+ fn=process_audio_debug,
140
+ inputs=[mic_input],
141
+ outputs=[audio_output, debug_output, status_output]
142
+ )
 
 
 
 
 
 
 
143
 
144
+ gr.Markdown("### Troubleshooting Tips")
145
  gr.Markdown("""
146
+ 1. **Check Physical Connections**:
147
+ - Ensure headphones/mic are properly plugged in
148
+ - Try different USB ports if using USB headphones
149
+
150
+ 2. **System Settings**:
151
+ - Make sure your headphones are set as default input device
152
+ - Check input volume levels
153
+
154
+ 3. **Browser Permissions**:
155
+ - Refresh the page and allow microphone access when prompted
156
+ - Check browser settings if prompt doesn't appear
157
  """)
158
 
159
  return demo
160
 
161
  if __name__ == "__main__":
162
+ # First run the debug interface
163
+ debug_interface = create_debug_interface()
164
+ debug_interface.launch()