File size: 6,892 Bytes
d61cd9f
ef2be41
cf17ab8
d287980
cf17ab8
 
 
2754fd7
cf17ab8
 
d61cd9f
cf17ab8
 
d61cd9f
cf17ab8
 
 
d287980
cf17ab8
 
 
 
d287980
cf17ab8
d287980
cf17ab8
 
 
 
d287980
cf17ab8
 
 
 
 
 
d287980
cf17ab8
d287980
cf17ab8
 
 
 
 
 
 
 
 
3253f05
cf17ab8
 
 
 
 
 
d61cd9f
cf17ab8
 
 
 
a5ec76f
 
 
cf17ab8
 
 
a5ec76f
 
 
 
 
cf17ab8
a5ec76f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf17ab8
 
a5ec76f
cf17ab8
 
a5ec76f
cf17ab8
 
 
 
 
d61cd9f
cf17ab8
 
 
d61cd9f
cf17ab8
 
 
 
 
 
 
 
d61cd9f
cf17ab8
 
 
 
 
d61cd9f
cf17ab8
 
 
 
 
d61cd9f
cf17ab8
 
d287980
cf17ab8
 
 
 
 
 
 
 
 
 
d287980
cf17ab8
 
 
 
 
d61cd9f
cf17ab8
d61cd9f
cf17ab8
 
 
 
 
 
 
 
 
 
 
d61cd9f
 
 
3baa918
 
cf17ab8
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import gradio as gr
import numpy as np
from datetime import datetime
import traceback
import sounddevice as sd  # Alternative audio backend
import tempfile
import os

# Enhanced Audio Processor Class
class AudioProcessor:
    def __init__(self):
        self.sample_rate = 16000
        self.available_backends = self.detect_audio_backends()
        
    def detect_audio_backends(self):
        backends = []
        # Test FFmpeg
        try:
            import ffmpeg
            backends.append('ffmpeg')
        except:
            pass
            
        # Test SoundDevice
        try:
            sd.check_input_settings()
            backends.append('sounddevice')
        except:
            pass
            
        # Test Librosa
        try:
            import librosa
            backends.append('librosa')
        except:
            pass
            
        return backends or ['numpy_fallback']
    
    def process_audio(self, audio_input):
        for backend in self.available_backends:
            try:
                if backend == 'ffmpeg':
                    return self._process_with_ffmpeg(audio_input)
                elif backend == 'sounddevice':
                    return self._process_with_sounddevice(audio_input)
                elif backend == 'librosa':
                    return self._process_with_librosa(audio_input)
                else:
                    return self._process_fallback(audio_input)
            except Exception as e:
                print(f"Failed with {backend}: {str(e)}")
                continue
                
        raise Exception("All audio backends failed")
    
    def _process_with_ffmpeg(self, audio_input):
        # Your existing FFmpeg processing
        if isinstance(audio_input, tuple):
            return audio_input
        try:
            import ffmpeg
            # Process audio file with ffmpeg
            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
                tmp.write(audio_input)
                tmp.flush()
                out, _ = (
                    ffmpeg.input(tmp.name)
                    .output('pipe:', format='f32le', ac=1, ar=self.sample_rate)
                    .run(capture_stdout=True)
                )
                os.unlink(tmp.name)
                return (np.frombuffer(out, dtype=np.float32), self.sample_rate)
        except Exception as e:
            raise Exception(f"FFmpeg processing failed: {str(e)}")
        
    def _process_with_sounddevice(self, audio_input):
        # Process using sounddevice
        if isinstance(audio_input, tuple):
            return audio_input
        try:
            duration = 5  # seconds
            print(f"Recording with sounddevice (rate={self.sample_rate})...")
            audio_data = sd.rec(int(duration * self.sample_rate), 
                              samplerate=self.sample_rate, 
                              channels=1)
            sd.wait()
            return (audio_data.flatten(), self.sample_rate)
        except Exception as e:
            raise Exception(f"Sounddevice processing failed: {str(e)}")
        
    def _process_with_librosa(self, audio_input):
        # Process using librosa
        try:
            import librosa
            if isinstance(audio_input, tuple):
                return audio_input
            elif isinstance(audio_input, str):
                return librosa.load(audio_input, sr=self.sample_rate)
            else:
                # Handle other input types
                with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
                    tmp.write(audio_input)
                    tmp.flush()
                    data, sr = librosa.load(tmp.name, sr=self.sample_rate)
                    os.unlink(tmp.name)
                    return (data, sr)
        except Exception as e:
            raise Exception(f"Librosa processing failed: {str(e)}")
    
    def _process_fallback(self, audio_input):
        # Simple numpy fallback with proper error handling
        if isinstance(audio_input, tuple):
            return audio_input
        return (np.random.random(16000), 16000)  # Now properly closed
# Modified Interface with Audio Debugging
def create_debug_interface():
    audio_processor = AudioProcessor()
    
    def process_audio_debug(audio):
        try:
            processed = audio_processor.process_audio(audio)
            waveform = processed[0]
            sr = processed[1]
            
            # Create debug info
            debug_info = [
                f"Audio Backends Available: {', '.join(audio_processor.available_backends)}",
                f"Sample Rate: {sr} Hz",
                f"Audio Length: {len(waveform)/sr:.2f} seconds",
                f"Max Amplitude: {np.max(np.abs(waveform)):.4f}",
                f"Processing Time: {datetime.now().strftime('%H:%M:%S')}"
            ]
            
            return {
                "audio": audio,
                "debug": "\n".join(debug_info),
                "status": "βœ… Successfully processed audio"
            }
        except Exception as e:
            return {
                "audio": None,
                "debug": traceback.format_exc(),
                "status": f"❌ Error: {str(e)}"
            }
    
    with gr.Blocks() as demo:
        gr.Markdown("## 🎀 Audio Debugging Interface")
        
        with gr.Row():
            with gr.Column():
                mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Microphone Input")
                upload_input = gr.Audio(sources=["upload"], type="filepath", label="File Upload")
                test_button = gr.Button("Test Audio Processing")
                
            with gr.Column():
                audio_output = gr.Audio(label="Processed Audio")
                debug_output = gr.Textbox(label="Debug Information", lines=8)
                status_output = gr.Textbox(label="Processing Status")
        
        test_button.click(
            fn=process_audio_debug,
            inputs=[mic_input],
            outputs=[audio_output, debug_output, status_output]
        )
        
        gr.Markdown("### Troubleshooting Tips")
        gr.Markdown("""
        1. **Check Physical Connections**:
           - Ensure headphones/mic are properly plugged in
           - Try different USB ports if using USB headphones
           
        2. **System Settings**:
           - Make sure your headphones are set as default input device
           - Check input volume levels
           
        3. **Browser Permissions**:
           - Refresh the page and allow microphone access when prompted
           - Check browser settings if prompt doesn't appear
        """)
    
    return demo

if __name__ == "__main__":
    # First run the debug interface
    debug_interface = create_debug_interface()
    debug_interface.launch()