Spaces:

yunusajib
/

Real-Time-Emotional-Detection

Sleeping

App Files Files Community

yunusajib commited on Jun 11

Commit

cf17ab8

verified ·

1 Parent(s): d287980

app changes

Browse files

Files changed (1) hide show

app.py +135 -239

app.py CHANGED Viewed

@@ -1,268 +1,164 @@
 import gradio as gr
 import numpy as np
-import pandas as pd
-import plotly.graph_objects as go
-import plotly.express as px
-from datetime import datetime, timedelta
-import threading
-import queue
-import time
-from collections import deque
-import warnings
 import traceback
-warnings.filterwarnings("ignore")
-# Audio processing imports with fallbacks
-AUDIO_AVAILABLE = True
-try:
-    import soundfile as sf
-    import librosa
-    LIBROSA_AVAILABLE = True
-except ImportError:
-    LIBROSA_AVAILABLE = False
-    print("Librosa not available - using basic audio processing")
-# Image processing imports with fallbacks
-CV2_AVAILABLE = True
-try:
-    import cv2
-except ImportError:
-    CV2_AVAILABLE = False
-    print("OpenCV not available - using PIL for image processing")
-try:
-    from PIL import Image, ImageDraw, ImageFont
-    PIL_AVAILABLE = True
-except ImportError:
-    PIL_AVAILABLE = False
-    print("PIL not available - limited image processing")
-# AI model imports with fallbacks
-HF_AVAILABLE = True
-try:
-    from transformers import pipeline
-    import torch
-except ImportError:
-    HF_AVAILABLE = False
-    print("Transformers not available - using mock emotion detection")
-class EmotionRecognitionSystem:
     def __init__(self):
-        self.emotion_history = deque(maxlen=100)
-        self.audio_queue = queue.Queue()
-        self.video_queue = queue.Queue()
-        self.setup_models()
-        self.alert_thresholds = {
-            'stress': 0.7,
-            'anxiety': 0.6,
-            'pain': 0.8,
-            'confusion': 0.5
-        }
-    def setup_models(self):
-        """Initialize emotion recognition models with better error handling"""
-        self.models_loaded = False
-        if not HF_AVAILABLE:
-            print("Skipping model loading - transformers not available")
-            return
         try:
-            # Facial emotion recognition
-            self.face_emotion_pipeline = pipeline(
-                "image-classification",
-                model="j-hartmann/emotion-english-distilroberta-base",
-                device=0 if torch.cuda.is_available() else -1
-            )
-            # Audio emotion recognition
-            self.audio_emotion_pipeline = pipeline(
-                "audio-classification",
-                model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
-                device=0 if torch.cuda.is_available() else -1
-            )
-            self.models_loaded = True
-        except Exception as e:
-            print(f"Error loading models: {e}")
-            print(traceback.format_exc())
-            self.models_loaded = False
-    def validate_audio_input(self, audio_data):
-        """Validate and standardize audio input format"""
-        if audio_data is None:
-            return None
         try:
-            # Handle different audio input formats
-            if isinstance(audio_data, tuple):
-                audio_array, sample_rate = audio_data
-            else:
-                # Try to read audio file if not in tuple format
-                if isinstance(audio_data, str):
-                    if LIBROSA_AVAILABLE:
-                        audio_array, sample_rate = librosa.load(audio_data, sr=None)
-                    else:
-                        # Fallback for when librosa is not available
-                        import wave
-                        with wave.open(audio_data, 'rb') as wf:
-                            sample_rate = wf.getframerate()
-                            audio_array = np.frombuffer(wf.readframes(wf.getnframes()), dtype=np.int16)
-                            audio_array = audio_array.astype(np.float32) / 32768.0
-                else:
-                    return None
-            # Resample if needed
-            target_rate = 16000
-            if sample_rate != target_rate:
-                if LIBROSA_AVAILABLE:
-                    audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=target_rate)
-                else:
-                    # Simple downsampling fallback
-                    step = int(sample_rate / target_rate)
-                    if step > 1:
-                        audio_array = audio_array[::step]
-                    sample_rate = target_rate
-            return (audio_array, sample_rate)
-        except Exception as e:
-            print(f"Audio validation error: {e}")
-            return None
-    def detect_face_emotion(self, frame):
-        """Detect emotions from facial expressions with better error handling"""
-        if not self.models_loaded:
-            # Mock emotion detection for demo
-            emotions = ['neutral', 'happy', 'sad', 'angry', 'fear', 'surprise', 'disgust']
-            scores = np.random.dirichlet(np.ones(len(emotions)))
-            return dict(zip(emotions, scores))
-        try:
-            # Convert frame to RGB format
-            if isinstance(frame, np.ndarray):
-                if len(frame.shape) == 3:
-                    if frame.shape[2] == 4:  # RGBA
-                        rgb_frame = frame[:, :, :3]
-                    elif frame.shape[2] == 3:  # BGR or RGB?
-                        if CV2_AVAILABLE:
-                            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-                        else:
-                            rgb_frame = frame[:, :, ::-1]  # Simple BGR to RGB
-                    else:
-                        rgb_frame = frame
                 else:
-                    # Grayscale to RGB
-                    if CV2_AVAILABLE:
-                        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
-                    else:
-                        rgb_frame = np.stack((frame,)*3, axis=-1)
-            else:
-                rgb_frame = frame
-            # Use face emotion model
-            results = self.face_emotion_pipeline(rgb_frame)
-            # Convert to standardized format
-            emotion_scores = {}
-            for result in results:
-                emotion_scores[result['label'].lower()] = result['score']
-            return emotion_scores
-        except Exception as e:
-            print(f"Face emotion detection error: {e}")
-            return {'neutral': 1.0}
-    def detect_voice_emotion(self, audio_data):
-        """Detect emotions from voice tone with better audio handling"""
-        if not self.models_loaded or audio_data is None:
-            # Mock emotion detection
-            emotions = ['neutral', 'happy', 'sad', 'angry', 'fear']
-            scores = np.random.dirichlet(np.ones(len(emotions)))
-            return dict(zip(emotions, scores))
         try:
-            # Validate and standardize audio input
-            validated_audio = self.validate_audio_input(audio_data)
-            if validated_audio is None:
-                return {'neutral': 1.0}
-            audio_array, sample_rate = validated_audio
-            # Process audio with the model
-            results = self.audio_emotion_pipeline({
-                "array": audio_array,
-                "sampling_rate": sample_rate
-            })
-            emotion_scores = {}
-            for result in results:
-                emotion_scores[result['label'].lower()] = result['score']
-            return emotion_scores
         except Exception as e:
-            print(f"Voice emotion detection error: {e}")
-            return {'neutral': 1.0}
-    # [Rest of your existing methods...]
-def process_video_audio(video_frame, audio_data):
-    """Process video frame and audio data with better error handling"""
-    if video_frame is None:
-        return None, "No video input", "", ""
-    try:
-        # Process the frame
-        validated_audio = emotion_system.validate_audio_input(audio_data)
-        # Get emotion analysis
-        emotion_record = emotion_system.process_frame(
-            video_frame,
-            validated_audio[0] if validated_audio else None,
-            validated_audio[1] if validated_audio else 16000
-        )
-        # Create visualization
-        annotated_frame = create_emotion_overlay(video_frame, emotion_record)
-        # Format results
-        clinical_text = format_clinical_emotions(emotion_record['clinical_emotions'])
-        alerts_text = "\n".join(emotion_record['alerts']) if emotion_record['alerts'] else "No alerts"
-        suggestions_text = "\n".join(emotion_record['suggestions']) if emotion_record['suggestions'] else "No suggestions"
-        return annotated_frame, clinical_text, alerts_text, suggestions_text
-    except Exception as e:
-        print(f"Processing error: {e}")
-        traceback.print_exc()
-        return video_frame, "Processing error", "System error", "Please try again"
-# [Rest of your existing functions...]
-def create_interface():
-    with gr.Blocks(title="Patient Emotion Recognition System", theme=gr.themes.Soft()) as demo:
-        # [Your existing interface code...]
-        # Add audio format info
         gr.Markdown("""
-        ### 🔊 Audio Input Notes:
-        - System works best with clear microphone input
-        - If you get audio errors, try:
-          - Checking microphone permissions
-          - Reducing background noise
-          - Using a different microphone
         """)
     return demo
 if __name__ == "__main__":
-    emotion_system = EmotionRecognitionSystem()
-    demo = create_interface()
-    demo.launch(
-        share=True,
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True
-    )

 import gradio as gr
 import numpy as np
+from datetime import datetime
 import traceback
+import sounddevice as sd  # Alternative audio backend
+import tempfile
+import os
+# Enhanced Audio Processor Class
+class AudioProcessor:
     def __init__(self):
+        self.sample_rate = 16000
+        self.available_backends = self.detect_audio_backends()
+    def detect_audio_backends(self):
+        backends = []
+        # Test FFmpeg
         try:
+            import ffmpeg
+            backends.append('ffmpeg')
+        except:
+            pass
+        # Test SoundDevice
         try:
+            sd.check_input_settings()
+            backends.append('sounddevice')
+        except:
+            pass
+        # Test Librosa
+        try:
+            import librosa
+            backends.append('librosa')
+        except:
+            pass
+        return backends or ['numpy_fallback']
+    def process_audio(self, audio_input):
+        for backend in self.available_backends:
+            try:
+                if backend == 'ffmpeg':
+                    return self._process_with_ffmpeg(audio_input)
+                elif backend == 'sounddevice':
+                    return self._process_with_sounddevice(audio_input)
+                elif backend == 'librosa':
+                    return self._process_with_librosa(audio_input)
                 else:
+                    return self._process_fallback(audio_input)
+            except Exception as e:
+                print(f"Failed with {backend}: {str(e)}")
+                continue
+        raise Exception("All audio backends failed")
+    def _process_with_ffmpeg(self, audio_input):
+        # Your existing FFmpeg processing
+        pass
+    def _process_with_sounddevice(self, audio_input):
+        # Process using sounddevice
+        duration = 5  # seconds
+        print(f"Recording with sounddevice (rate={self.sample_rate})...")
+        audio_data = sd.rec(int(duration * self.sample_rate),
+                          samplerate=self.sample_rate,
+                          channels=1)
+        sd.wait()
+        return (audio_data.flatten(), self.sample_rate)
+    def _process_with_librosa(self, audio_input):
+        # Process using librosa
+        import librosa
+        if isinstance(audio_input, tuple):
+            return audio_input
+        elif isinstance(audio_input, str):
+            return librosa.load(audio_input, sr=self.sample_rate)
+        else:
+            # Handle other input types
+            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
+                tmp.write(audio_input)
+                tmp.flush()
+                data, sr = librosa.load(tmp.name, sr=self.sample_rate)
+                os.unlink(tmp.name)
+                return (data, sr)
+    def _process_fallback(self, audio_input):
+        # Simple numpy fallback
+        if isinstance(audio_input, tuple):
+            return audio_input
+        return (np.random.random(16000), 16000  # Mock data
+# Modified Interface with Audio Debugging
+def create_debug_interface():
+    audio_processor = AudioProcessor()
+    def process_audio_debug(audio):
         try:
+            processed = audio_processor.process_audio(audio)
+            waveform = processed[0]
+            sr = processed[1]
+            # Create debug info
+            debug_info = [
+                f"Audio Backends Available: {', '.join(audio_processor.available_backends)}",
+                f"Sample Rate: {sr} Hz",
+                f"Audio Length: {len(waveform)/sr:.2f} seconds",
+                f"Max Amplitude: {np.max(np.abs(waveform)):.4f}",
+                f"Processing Time: {datetime.now().strftime('%H:%M:%S')}"
+            ]
+            return {
+                "audio": audio,
+                "debug": "\n".join(debug_info),
+                "status": "✅ Successfully processed audio"
+            }
         except Exception as e:
+            return {
+                "audio": None,
+                "debug": traceback.format_exc(),
+                "status": f"❌ Error: {str(e)}"
+            }
+    with gr.Blocks() as demo:
+        gr.Markdown("## 🎤 Audio Debugging Interface")
+        with gr.Row():
+            with gr.Column():
+                mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Microphone Input")
+                upload_input = gr.Audio(sources=["upload"], type="filepath", label="File Upload")
+                test_button = gr.Button("Test Audio Processing")
+            with gr.Column():
+                audio_output = gr.Audio(label="Processed Audio")
+                debug_output = gr.Textbox(label="Debug Information", lines=8)
+                status_output = gr.Textbox(label="Processing Status")
+        test_button.click(
+            fn=process_audio_debug,
+            inputs=[mic_input],
+            outputs=[audio_output, debug_output, status_output]
+        )
+        gr.Markdown("### Troubleshooting Tips")
         gr.Markdown("""
+        1. **Check Physical Connections**:
+           - Ensure headphones/mic are properly plugged in
+           - Try different USB ports if using USB headphones
+        2. **System Settings**:
+           - Make sure your headphones are set as default input device
+           - Check input volume levels
+        3. **Browser Permissions**:
+           - Refresh the page and allow microphone access when prompted
+           - Check browser settings if prompt doesn't appear
         """)
     return demo
 if __name__ == "__main__":
+    # First run the debug interface
+    debug_interface = create_debug_interface()
+    debug_interface.launch()