yunusajib's picture
update code
a5ec76f verified
raw
history blame
6.89 kB
import gradio as gr
import numpy as np
from datetime import datetime
import traceback
import sounddevice as sd # Alternative audio backend
import tempfile
import os
# Enhanced Audio Processor Class
class AudioProcessor:
def __init__(self):
self.sample_rate = 16000
self.available_backends = self.detect_audio_backends()
def detect_audio_backends(self):
backends = []
# Test FFmpeg
try:
import ffmpeg
backends.append('ffmpeg')
except:
pass
# Test SoundDevice
try:
sd.check_input_settings()
backends.append('sounddevice')
except:
pass
# Test Librosa
try:
import librosa
backends.append('librosa')
except:
pass
return backends or ['numpy_fallback']
def process_audio(self, audio_input):
for backend in self.available_backends:
try:
if backend == 'ffmpeg':
return self._process_with_ffmpeg(audio_input)
elif backend == 'sounddevice':
return self._process_with_sounddevice(audio_input)
elif backend == 'librosa':
return self._process_with_librosa(audio_input)
else:
return self._process_fallback(audio_input)
except Exception as e:
print(f"Failed with {backend}: {str(e)}")
continue
raise Exception("All audio backends failed")
def _process_with_ffmpeg(self, audio_input):
# Your existing FFmpeg processing
if isinstance(audio_input, tuple):
return audio_input
try:
import ffmpeg
# Process audio file with ffmpeg
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
tmp.write(audio_input)
tmp.flush()
out, _ = (
ffmpeg.input(tmp.name)
.output('pipe:', format='f32le', ac=1, ar=self.sample_rate)
.run(capture_stdout=True)
)
os.unlink(tmp.name)
return (np.frombuffer(out, dtype=np.float32), self.sample_rate)
except Exception as e:
raise Exception(f"FFmpeg processing failed: {str(e)}")
def _process_with_sounddevice(self, audio_input):
# Process using sounddevice
if isinstance(audio_input, tuple):
return audio_input
try:
duration = 5 # seconds
print(f"Recording with sounddevice (rate={self.sample_rate})...")
audio_data = sd.rec(int(duration * self.sample_rate),
samplerate=self.sample_rate,
channels=1)
sd.wait()
return (audio_data.flatten(), self.sample_rate)
except Exception as e:
raise Exception(f"Sounddevice processing failed: {str(e)}")
def _process_with_librosa(self, audio_input):
# Process using librosa
try:
import librosa
if isinstance(audio_input, tuple):
return audio_input
elif isinstance(audio_input, str):
return librosa.load(audio_input, sr=self.sample_rate)
else:
# Handle other input types
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
tmp.write(audio_input)
tmp.flush()
data, sr = librosa.load(tmp.name, sr=self.sample_rate)
os.unlink(tmp.name)
return (data, sr)
except Exception as e:
raise Exception(f"Librosa processing failed: {str(e)}")
def _process_fallback(self, audio_input):
# Simple numpy fallback with proper error handling
if isinstance(audio_input, tuple):
return audio_input
return (np.random.random(16000), 16000) # Now properly closed
# Modified Interface with Audio Debugging
def create_debug_interface():
audio_processor = AudioProcessor()
def process_audio_debug(audio):
try:
processed = audio_processor.process_audio(audio)
waveform = processed[0]
sr = processed[1]
# Create debug info
debug_info = [
f"Audio Backends Available: {', '.join(audio_processor.available_backends)}",
f"Sample Rate: {sr} Hz",
f"Audio Length: {len(waveform)/sr:.2f} seconds",
f"Max Amplitude: {np.max(np.abs(waveform)):.4f}",
f"Processing Time: {datetime.now().strftime('%H:%M:%S')}"
]
return {
"audio": audio,
"debug": "\n".join(debug_info),
"status": "βœ… Successfully processed audio"
}
except Exception as e:
return {
"audio": None,
"debug": traceback.format_exc(),
"status": f"❌ Error: {str(e)}"
}
with gr.Blocks() as demo:
gr.Markdown("## 🎀 Audio Debugging Interface")
with gr.Row():
with gr.Column():
mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Microphone Input")
upload_input = gr.Audio(sources=["upload"], type="filepath", label="File Upload")
test_button = gr.Button("Test Audio Processing")
with gr.Column():
audio_output = gr.Audio(label="Processed Audio")
debug_output = gr.Textbox(label="Debug Information", lines=8)
status_output = gr.Textbox(label="Processing Status")
test_button.click(
fn=process_audio_debug,
inputs=[mic_input],
outputs=[audio_output, debug_output, status_output]
)
gr.Markdown("### Troubleshooting Tips")
gr.Markdown("""
1. **Check Physical Connections**:
- Ensure headphones/mic are properly plugged in
- Try different USB ports if using USB headphones
2. **System Settings**:
- Make sure your headphones are set as default input device
- Check input volume levels
3. **Browser Permissions**:
- Refresh the page and allow microphone access when prompted
- Check browser settings if prompt doesn't appear
""")
return demo
if __name__ == "__main__":
# First run the debug interface
debug_interface = create_debug_interface()
debug_interface.launch()