File size: 6,892 Bytes
d61cd9f ef2be41 cf17ab8 d287980 cf17ab8 2754fd7 cf17ab8 d61cd9f cf17ab8 d61cd9f cf17ab8 d287980 cf17ab8 d287980 cf17ab8 d287980 cf17ab8 d287980 cf17ab8 d287980 cf17ab8 d287980 cf17ab8 3253f05 cf17ab8 d61cd9f cf17ab8 a5ec76f cf17ab8 a5ec76f cf17ab8 a5ec76f cf17ab8 a5ec76f cf17ab8 a5ec76f cf17ab8 d61cd9f cf17ab8 d61cd9f cf17ab8 d61cd9f cf17ab8 d61cd9f cf17ab8 d61cd9f cf17ab8 d287980 cf17ab8 d287980 cf17ab8 d61cd9f cf17ab8 d61cd9f cf17ab8 d61cd9f 3baa918 cf17ab8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
import gradio as gr
import numpy as np
from datetime import datetime
import traceback
import sounddevice as sd # Alternative audio backend
import tempfile
import os
# Enhanced Audio Processor Class
class AudioProcessor:
def __init__(self):
self.sample_rate = 16000
self.available_backends = self.detect_audio_backends()
def detect_audio_backends(self):
backends = []
# Test FFmpeg
try:
import ffmpeg
backends.append('ffmpeg')
except:
pass
# Test SoundDevice
try:
sd.check_input_settings()
backends.append('sounddevice')
except:
pass
# Test Librosa
try:
import librosa
backends.append('librosa')
except:
pass
return backends or ['numpy_fallback']
def process_audio(self, audio_input):
for backend in self.available_backends:
try:
if backend == 'ffmpeg':
return self._process_with_ffmpeg(audio_input)
elif backend == 'sounddevice':
return self._process_with_sounddevice(audio_input)
elif backend == 'librosa':
return self._process_with_librosa(audio_input)
else:
return self._process_fallback(audio_input)
except Exception as e:
print(f"Failed with {backend}: {str(e)}")
continue
raise Exception("All audio backends failed")
def _process_with_ffmpeg(self, audio_input):
# Your existing FFmpeg processing
if isinstance(audio_input, tuple):
return audio_input
try:
import ffmpeg
# Process audio file with ffmpeg
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
tmp.write(audio_input)
tmp.flush()
out, _ = (
ffmpeg.input(tmp.name)
.output('pipe:', format='f32le', ac=1, ar=self.sample_rate)
.run(capture_stdout=True)
)
os.unlink(tmp.name)
return (np.frombuffer(out, dtype=np.float32), self.sample_rate)
except Exception as e:
raise Exception(f"FFmpeg processing failed: {str(e)}")
def _process_with_sounddevice(self, audio_input):
# Process using sounddevice
if isinstance(audio_input, tuple):
return audio_input
try:
duration = 5 # seconds
print(f"Recording with sounddevice (rate={self.sample_rate})...")
audio_data = sd.rec(int(duration * self.sample_rate),
samplerate=self.sample_rate,
channels=1)
sd.wait()
return (audio_data.flatten(), self.sample_rate)
except Exception as e:
raise Exception(f"Sounddevice processing failed: {str(e)}")
def _process_with_librosa(self, audio_input):
# Process using librosa
try:
import librosa
if isinstance(audio_input, tuple):
return audio_input
elif isinstance(audio_input, str):
return librosa.load(audio_input, sr=self.sample_rate)
else:
# Handle other input types
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
tmp.write(audio_input)
tmp.flush()
data, sr = librosa.load(tmp.name, sr=self.sample_rate)
os.unlink(tmp.name)
return (data, sr)
except Exception as e:
raise Exception(f"Librosa processing failed: {str(e)}")
def _process_fallback(self, audio_input):
# Simple numpy fallback with proper error handling
if isinstance(audio_input, tuple):
return audio_input
return (np.random.random(16000), 16000) # Now properly closed
# Modified Interface with Audio Debugging
def create_debug_interface():
audio_processor = AudioProcessor()
def process_audio_debug(audio):
try:
processed = audio_processor.process_audio(audio)
waveform = processed[0]
sr = processed[1]
# Create debug info
debug_info = [
f"Audio Backends Available: {', '.join(audio_processor.available_backends)}",
f"Sample Rate: {sr} Hz",
f"Audio Length: {len(waveform)/sr:.2f} seconds",
f"Max Amplitude: {np.max(np.abs(waveform)):.4f}",
f"Processing Time: {datetime.now().strftime('%H:%M:%S')}"
]
return {
"audio": audio,
"debug": "\n".join(debug_info),
"status": "β
Successfully processed audio"
}
except Exception as e:
return {
"audio": None,
"debug": traceback.format_exc(),
"status": f"β Error: {str(e)}"
}
with gr.Blocks() as demo:
gr.Markdown("## π€ Audio Debugging Interface")
with gr.Row():
with gr.Column():
mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Microphone Input")
upload_input = gr.Audio(sources=["upload"], type="filepath", label="File Upload")
test_button = gr.Button("Test Audio Processing")
with gr.Column():
audio_output = gr.Audio(label="Processed Audio")
debug_output = gr.Textbox(label="Debug Information", lines=8)
status_output = gr.Textbox(label="Processing Status")
test_button.click(
fn=process_audio_debug,
inputs=[mic_input],
outputs=[audio_output, debug_output, status_output]
)
gr.Markdown("### Troubleshooting Tips")
gr.Markdown("""
1. **Check Physical Connections**:
- Ensure headphones/mic are properly plugged in
- Try different USB ports if using USB headphones
2. **System Settings**:
- Make sure your headphones are set as default input device
- Check input volume levels
3. **Browser Permissions**:
- Refresh the page and allow microphone access when prompted
- Check browser settings if prompt doesn't appear
""")
return demo
if __name__ == "__main__":
# First run the debug interface
debug_interface = create_debug_interface()
debug_interface.launch() |