File size: 5,751 Bytes
d61cd9f ef2be41 cf17ab8 d287980 cf17ab8 2754fd7 cf17ab8 d61cd9f cf17ab8 d61cd9f cf17ab8 d287980 cf17ab8 d287980 cf17ab8 d287980 cf17ab8 d287980 cf17ab8 d287980 cf17ab8 d287980 cf17ab8 3253f05 cf17ab8 d61cd9f cf17ab8 d61cd9f cf17ab8 d61cd9f cf17ab8 d61cd9f cf17ab8 d61cd9f cf17ab8 d61cd9f cf17ab8 d61cd9f cf17ab8 d287980 cf17ab8 d287980 cf17ab8 d61cd9f cf17ab8 d61cd9f cf17ab8 d61cd9f 3baa918 cf17ab8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import gradio as gr
import numpy as np
from datetime import datetime
import traceback
import sounddevice as sd # Alternative audio backend
import tempfile
import os
# Enhanced Audio Processor Class
class AudioProcessor:
def __init__(self):
self.sample_rate = 16000
self.available_backends = self.detect_audio_backends()
def detect_audio_backends(self):
backends = []
# Test FFmpeg
try:
import ffmpeg
backends.append('ffmpeg')
except:
pass
# Test SoundDevice
try:
sd.check_input_settings()
backends.append('sounddevice')
except:
pass
# Test Librosa
try:
import librosa
backends.append('librosa')
except:
pass
return backends or ['numpy_fallback']
def process_audio(self, audio_input):
for backend in self.available_backends:
try:
if backend == 'ffmpeg':
return self._process_with_ffmpeg(audio_input)
elif backend == 'sounddevice':
return self._process_with_sounddevice(audio_input)
elif backend == 'librosa':
return self._process_with_librosa(audio_input)
else:
return self._process_fallback(audio_input)
except Exception as e:
print(f"Failed with {backend}: {str(e)}")
continue
raise Exception("All audio backends failed")
def _process_with_ffmpeg(self, audio_input):
# Your existing FFmpeg processing
pass
def _process_with_sounddevice(self, audio_input):
# Process using sounddevice
duration = 5 # seconds
print(f"Recording with sounddevice (rate={self.sample_rate})...")
audio_data = sd.rec(int(duration * self.sample_rate),
samplerate=self.sample_rate,
channels=1)
sd.wait()
return (audio_data.flatten(), self.sample_rate)
def _process_with_librosa(self, audio_input):
# Process using librosa
import librosa
if isinstance(audio_input, tuple):
return audio_input
elif isinstance(audio_input, str):
return librosa.load(audio_input, sr=self.sample_rate)
else:
# Handle other input types
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
tmp.write(audio_input)
tmp.flush()
data, sr = librosa.load(tmp.name, sr=self.sample_rate)
os.unlink(tmp.name)
return (data, sr)
def _process_fallback(self, audio_input):
# Simple numpy fallback
if isinstance(audio_input, tuple):
return audio_input
return (np.random.random(16000), 16000 # Mock data
# Modified Interface with Audio Debugging
def create_debug_interface():
audio_processor = AudioProcessor()
def process_audio_debug(audio):
try:
processed = audio_processor.process_audio(audio)
waveform = processed[0]
sr = processed[1]
# Create debug info
debug_info = [
f"Audio Backends Available: {', '.join(audio_processor.available_backends)}",
f"Sample Rate: {sr} Hz",
f"Audio Length: {len(waveform)/sr:.2f} seconds",
f"Max Amplitude: {np.max(np.abs(waveform)):.4f}",
f"Processing Time: {datetime.now().strftime('%H:%M:%S')}"
]
return {
"audio": audio,
"debug": "\n".join(debug_info),
"status": "β
Successfully processed audio"
}
except Exception as e:
return {
"audio": None,
"debug": traceback.format_exc(),
"status": f"β Error: {str(e)}"
}
with gr.Blocks() as demo:
gr.Markdown("## π€ Audio Debugging Interface")
with gr.Row():
with gr.Column():
mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Microphone Input")
upload_input = gr.Audio(sources=["upload"], type="filepath", label="File Upload")
test_button = gr.Button("Test Audio Processing")
with gr.Column():
audio_output = gr.Audio(label="Processed Audio")
debug_output = gr.Textbox(label="Debug Information", lines=8)
status_output = gr.Textbox(label="Processing Status")
test_button.click(
fn=process_audio_debug,
inputs=[mic_input],
outputs=[audio_output, debug_output, status_output]
)
gr.Markdown("### Troubleshooting Tips")
gr.Markdown("""
1. **Check Physical Connections**:
- Ensure headphones/mic are properly plugged in
- Try different USB ports if using USB headphones
2. **System Settings**:
- Make sure your headphones are set as default input device
- Check input volume levels
3. **Browser Permissions**:
- Refresh the page and allow microphone access when prompted
- Check browser settings if prompt doesn't appear
""")
return demo
if __name__ == "__main__":
# First run the debug interface
debug_interface = create_debug_interface()
debug_interface.launch() |