import gradio as gr import numpy as np from datetime import datetime import traceback import sounddevice as sd # Alternative audio backend import tempfile import os # Enhanced Audio Processor Class class AudioProcessor: def __init__(self): self.sample_rate = 16000 self.available_backends = self.detect_audio_backends() def detect_audio_backends(self): backends = [] # Test FFmpeg try: import ffmpeg backends.append('ffmpeg') except: pass # Test SoundDevice try: sd.check_input_settings() backends.append('sounddevice') except: pass # Test Librosa try: import librosa backends.append('librosa') except: pass return backends or ['numpy_fallback'] def process_audio(self, audio_input): for backend in self.available_backends: try: if backend == 'ffmpeg': return self._process_with_ffmpeg(audio_input) elif backend == 'sounddevice': return self._process_with_sounddevice(audio_input) elif backend == 'librosa': return self._process_with_librosa(audio_input) else: return self._process_fallback(audio_input) except Exception as e: print(f"Failed with {backend}: {str(e)}") continue raise Exception("All audio backends failed") def _process_with_ffmpeg(self, audio_input): # Your existing FFmpeg processing if isinstance(audio_input, tuple): return audio_input try: import ffmpeg # Process audio file with ffmpeg with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp: tmp.write(audio_input) tmp.flush() out, _ = ( ffmpeg.input(tmp.name) .output('pipe:', format='f32le', ac=1, ar=self.sample_rate) .run(capture_stdout=True) ) os.unlink(tmp.name) return (np.frombuffer(out, dtype=np.float32), self.sample_rate) except Exception as e: raise Exception(f"FFmpeg processing failed: {str(e)}") def _process_with_sounddevice(self, audio_input): # Process using sounddevice if isinstance(audio_input, tuple): return audio_input try: duration = 5 # seconds print(f"Recording with sounddevice (rate={self.sample_rate})...") audio_data = sd.rec(int(duration * self.sample_rate), samplerate=self.sample_rate, channels=1) sd.wait() return (audio_data.flatten(), self.sample_rate) except Exception as e: raise Exception(f"Sounddevice processing failed: {str(e)}") def _process_with_librosa(self, audio_input): # Process using librosa try: import librosa if isinstance(audio_input, tuple): return audio_input elif isinstance(audio_input, str): return librosa.load(audio_input, sr=self.sample_rate) else: # Handle other input types with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp: tmp.write(audio_input) tmp.flush() data, sr = librosa.load(tmp.name, sr=self.sample_rate) os.unlink(tmp.name) return (data, sr) except Exception as e: raise Exception(f"Librosa processing failed: {str(e)}") def _process_fallback(self, audio_input): # Simple numpy fallback with proper error handling if isinstance(audio_input, tuple): return audio_input return (np.random.random(16000), 16000) # Now properly closed # Modified Interface with Audio Debugging def create_debug_interface(): audio_processor = AudioProcessor() def process_audio_debug(audio): try: processed = audio_processor.process_audio(audio) waveform = processed[0] sr = processed[1] # Create debug info debug_info = [ f"Audio Backends Available: {', '.join(audio_processor.available_backends)}", f"Sample Rate: {sr} Hz", f"Audio Length: {len(waveform)/sr:.2f} seconds", f"Max Amplitude: {np.max(np.abs(waveform)):.4f}", f"Processing Time: {datetime.now().strftime('%H:%M:%S')}" ] return { "audio": audio, "debug": "\n".join(debug_info), "status": "✅ Successfully processed audio" } except Exception as e: return { "audio": None, "debug": traceback.format_exc(), "status": f"❌ Error: {str(e)}" } with gr.Blocks() as demo: gr.Markdown("## 🎤 Audio Debugging Interface") with gr.Row(): with gr.Column(): mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Microphone Input") upload_input = gr.Audio(sources=["upload"], type="filepath", label="File Upload") test_button = gr.Button("Test Audio Processing") with gr.Column(): audio_output = gr.Audio(label="Processed Audio") debug_output = gr.Textbox(label="Debug Information", lines=8) status_output = gr.Textbox(label="Processing Status") test_button.click( fn=process_audio_debug, inputs=[mic_input], outputs=[audio_output, debug_output, status_output] ) gr.Markdown("### Troubleshooting Tips") gr.Markdown(""" 1. **Check Physical Connections**: - Ensure headphones/mic are properly plugged in - Try different USB ports if using USB headphones 2. **System Settings**: - Make sure your headphones are set as default input device - Check input volume levels 3. **Browser Permissions**: - Refresh the page and allow microphone access when prompted - Check browser settings if prompt doesn't appear """) return demo if __name__ == "__main__": # First run the debug interface debug_interface = create_debug_interface() debug_interface.launch()