|
import gradio as gr |
|
import numpy as np |
|
from datetime import datetime |
|
import traceback |
|
import sounddevice as sd |
|
import tempfile |
|
import os |
|
|
|
|
|
class AudioProcessor: |
|
def __init__(self): |
|
self.sample_rate = 16000 |
|
self.available_backends = self.detect_audio_backends() |
|
|
|
def detect_audio_backends(self): |
|
backends = [] |
|
|
|
try: |
|
import ffmpeg |
|
backends.append('ffmpeg') |
|
except: |
|
pass |
|
|
|
|
|
try: |
|
sd.check_input_settings() |
|
backends.append('sounddevice') |
|
except: |
|
pass |
|
|
|
|
|
try: |
|
import librosa |
|
backends.append('librosa') |
|
except: |
|
pass |
|
|
|
return backends or ['numpy_fallback'] |
|
|
|
def process_audio(self, audio_input): |
|
for backend in self.available_backends: |
|
try: |
|
if backend == 'ffmpeg': |
|
return self._process_with_ffmpeg(audio_input) |
|
elif backend == 'sounddevice': |
|
return self._process_with_sounddevice(audio_input) |
|
elif backend == 'librosa': |
|
return self._process_with_librosa(audio_input) |
|
else: |
|
return self._process_fallback(audio_input) |
|
except Exception as e: |
|
print(f"Failed with {backend}: {str(e)}") |
|
continue |
|
|
|
raise Exception("All audio backends failed") |
|
|
|
def _process_with_ffmpeg(self, audio_input): |
|
|
|
if isinstance(audio_input, tuple): |
|
return audio_input |
|
try: |
|
import ffmpeg |
|
|
|
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp: |
|
tmp.write(audio_input) |
|
tmp.flush() |
|
out, _ = ( |
|
ffmpeg.input(tmp.name) |
|
.output('pipe:', format='f32le', ac=1, ar=self.sample_rate) |
|
.run(capture_stdout=True) |
|
) |
|
os.unlink(tmp.name) |
|
return (np.frombuffer(out, dtype=np.float32), self.sample_rate) |
|
except Exception as e: |
|
raise Exception(f"FFmpeg processing failed: {str(e)}") |
|
|
|
def _process_with_sounddevice(self, audio_input): |
|
|
|
if isinstance(audio_input, tuple): |
|
return audio_input |
|
try: |
|
duration = 5 |
|
print(f"Recording with sounddevice (rate={self.sample_rate})...") |
|
audio_data = sd.rec(int(duration * self.sample_rate), |
|
samplerate=self.sample_rate, |
|
channels=1) |
|
sd.wait() |
|
return (audio_data.flatten(), self.sample_rate) |
|
except Exception as e: |
|
raise Exception(f"Sounddevice processing failed: {str(e)}") |
|
|
|
def _process_with_librosa(self, audio_input): |
|
|
|
try: |
|
import librosa |
|
if isinstance(audio_input, tuple): |
|
return audio_input |
|
elif isinstance(audio_input, str): |
|
return librosa.load(audio_input, sr=self.sample_rate) |
|
else: |
|
|
|
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp: |
|
tmp.write(audio_input) |
|
tmp.flush() |
|
data, sr = librosa.load(tmp.name, sr=self.sample_rate) |
|
os.unlink(tmp.name) |
|
return (data, sr) |
|
except Exception as e: |
|
raise Exception(f"Librosa processing failed: {str(e)}") |
|
|
|
def _process_fallback(self, audio_input): |
|
|
|
if isinstance(audio_input, tuple): |
|
return audio_input |
|
return (np.random.random(16000), 16000) |
|
|
|
def create_debug_interface(): |
|
audio_processor = AudioProcessor() |
|
|
|
def process_audio_debug(audio): |
|
try: |
|
processed = audio_processor.process_audio(audio) |
|
waveform = processed[0] |
|
sr = processed[1] |
|
|
|
|
|
debug_info = [ |
|
f"Audio Backends Available: {', '.join(audio_processor.available_backends)}", |
|
f"Sample Rate: {sr} Hz", |
|
f"Audio Length: {len(waveform)/sr:.2f} seconds", |
|
f"Max Amplitude: {np.max(np.abs(waveform)):.4f}", |
|
f"Processing Time: {datetime.now().strftime('%H:%M:%S')}" |
|
] |
|
|
|
return { |
|
"audio": audio, |
|
"debug": "\n".join(debug_info), |
|
"status": "β
Successfully processed audio" |
|
} |
|
except Exception as e: |
|
return { |
|
"audio": None, |
|
"debug": traceback.format_exc(), |
|
"status": f"β Error: {str(e)}" |
|
} |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## π€ Audio Debugging Interface") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Microphone Input") |
|
upload_input = gr.Audio(sources=["upload"], type="filepath", label="File Upload") |
|
test_button = gr.Button("Test Audio Processing") |
|
|
|
with gr.Column(): |
|
audio_output = gr.Audio(label="Processed Audio") |
|
debug_output = gr.Textbox(label="Debug Information", lines=8) |
|
status_output = gr.Textbox(label="Processing Status") |
|
|
|
test_button.click( |
|
fn=process_audio_debug, |
|
inputs=[mic_input], |
|
outputs=[audio_output, debug_output, status_output] |
|
) |
|
|
|
gr.Markdown("### Troubleshooting Tips") |
|
gr.Markdown(""" |
|
1. **Check Physical Connections**: |
|
- Ensure headphones/mic are properly plugged in |
|
- Try different USB ports if using USB headphones |
|
|
|
2. **System Settings**: |
|
- Make sure your headphones are set as default input device |
|
- Check input volume levels |
|
|
|
3. **Browser Permissions**: |
|
- Refresh the page and allow microphone access when prompted |
|
- Check browser settings if prompt doesn't appear |
|
""") |
|
|
|
return demo |
|
|
|
if __name__ == "__main__": |
|
|
|
debug_interface = create_debug_interface() |
|
debug_interface.launch() |