yunusajib commited on
Commit
3d0e791
·
verified ·
1 Parent(s): a5ec76f
Files changed (1) hide show
  1. app.py +100 -113
app.py CHANGED
@@ -2,49 +2,65 @@ import gradio as gr
2
  import numpy as np
3
  from datetime import datetime
4
  import traceback
5
- import sounddevice as sd # Alternative audio backend
6
  import tempfile
7
  import os
8
 
9
- # Enhanced Audio Processor Class
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  class AudioProcessor:
11
  def __init__(self):
12
  self.sample_rate = 16000
13
  self.available_backends = self.detect_audio_backends()
 
14
 
15
  def detect_audio_backends(self):
16
  backends = []
17
- # Test FFmpeg
18
- try:
19
- import ffmpeg
20
  backends.append('ffmpeg')
21
- except:
22
- pass
23
 
24
- # Test SoundDevice
25
- try:
26
- sd.check_input_settings()
27
- backends.append('sounddevice')
28
- except:
29
- pass
30
-
31
- # Test Librosa
32
- try:
33
- import librosa
34
  backends.append('librosa')
35
- except:
36
- pass
37
 
38
  return backends or ['numpy_fallback']
39
 
40
  def process_audio(self, audio_input):
 
 
 
 
41
  for backend in self.available_backends:
42
  try:
43
- if backend == 'ffmpeg':
 
44
  return self._process_with_ffmpeg(audio_input)
45
- elif backend == 'sounddevice':
46
  return self._process_with_sounddevice(audio_input)
47
- elif backend == 'librosa':
48
  return self._process_with_librosa(audio_input)
49
  else:
50
  return self._process_fallback(audio_input)
@@ -52,136 +68,107 @@ class AudioProcessor:
52
  print(f"Failed with {backend}: {str(e)}")
53
  continue
54
 
55
- raise Exception("All audio backends failed")
56
 
57
  def _process_with_ffmpeg(self, audio_input):
58
- # Your existing FFmpeg processing
59
- if isinstance(audio_input, tuple):
60
- return audio_input
61
  try:
62
- import ffmpeg
63
- # Process audio file with ffmpeg
64
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
65
- tmp.write(audio_input)
 
 
 
 
66
  tmp.flush()
 
67
  out, _ = (
68
  ffmpeg.input(tmp.name)
69
  .output('pipe:', format='f32le', ac=1, ar=self.sample_rate)
70
- .run(capture_stdout=True)
71
  )
 
 
 
 
72
  os.unlink(tmp.name)
73
- return (np.frombuffer(out, dtype=np.float32), self.sample_rate)
74
- except Exception as e:
75
- raise Exception(f"FFmpeg processing failed: {str(e)}")
76
-
77
  def _process_with_sounddevice(self, audio_input):
78
- # Process using sounddevice
79
  if isinstance(audio_input, tuple):
80
  return audio_input
81
- try:
82
- duration = 5 # seconds
83
- print(f"Recording with sounddevice (rate={self.sample_rate})...")
84
- audio_data = sd.rec(int(duration * self.sample_rate),
85
- samplerate=self.sample_rate,
86
- channels=1)
87
- sd.wait()
88
- return (audio_data.flatten(), self.sample_rate)
89
- except Exception as e:
90
- raise Exception(f"Sounddevice processing failed: {str(e)}")
91
 
92
  def _process_with_librosa(self, audio_input):
93
- # Process using librosa
94
- try:
95
- import librosa
96
- if isinstance(audio_input, tuple):
97
- return audio_input
98
- elif isinstance(audio_input, str):
99
- return librosa.load(audio_input, sr=self.sample_rate)
100
- else:
101
- # Handle other input types
102
- with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
103
- tmp.write(audio_input)
104
- tmp.flush()
105
- data, sr = librosa.load(tmp.name, sr=self.sample_rate)
106
- os.unlink(tmp.name)
107
- return (data, sr)
108
- except Exception as e:
109
- raise Exception(f"Librosa processing failed: {str(e)}")
110
 
111
  def _process_fallback(self, audio_input):
112
- # Simple numpy fallback with proper error handling
113
  if isinstance(audio_input, tuple):
114
  return audio_input
115
- return (np.random.random(16000), 16000) # Now properly closed
116
- # Modified Interface with Audio Debugging
117
- def create_debug_interface():
118
  audio_processor = AudioProcessor()
119
 
120
- def process_audio_debug(audio):
121
  try:
122
  processed = audio_processor.process_audio(audio)
123
- waveform = processed[0]
124
- sr = processed[1]
125
-
126
- # Create debug info
127
- debug_info = [
128
- f"Audio Backends Available: {', '.join(audio_processor.available_backends)}",
129
- f"Sample Rate: {sr} Hz",
130
- f"Audio Length: {len(waveform)/sr:.2f} seconds",
131
- f"Max Amplitude: {np.max(np.abs(waveform)):.4f}",
132
- f"Processing Time: {datetime.now().strftime('%H:%M:%S')}"
133
- ]
134
-
135
  return {
136
- "audio": audio,
137
- "debug": "\n".join(debug_info),
138
- "status": "✅ Successfully processed audio"
139
  }
140
  except Exception as e:
141
  return {
142
  "audio": None,
143
- "debug": traceback.format_exc(),
144
- "status": f"❌ Error: {str(e)}"
145
  }
146
 
147
  with gr.Blocks() as demo:
148
- gr.Markdown("## 🎤 Audio Debugging Interface")
149
 
150
  with gr.Row():
151
- with gr.Column():
152
- mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Microphone Input")
153
- upload_input = gr.Audio(sources=["upload"], type="filepath", label="File Upload")
154
- test_button = gr.Button("Test Audio Processing")
155
-
156
- with gr.Column():
157
- audio_output = gr.Audio(label="Processed Audio")
158
- debug_output = gr.Textbox(label="Debug Information", lines=8)
159
- status_output = gr.Textbox(label="Processing Status")
160
-
161
- test_button.click(
162
- fn=process_audio_debug,
163
  inputs=[mic_input],
164
- outputs=[audio_output, debug_output, status_output]
165
  )
166
 
167
- gr.Markdown("### Troubleshooting Tips")
168
  gr.Markdown("""
169
- 1. **Check Physical Connections**:
170
- - Ensure headphones/mic are properly plugged in
171
- - Try different USB ports if using USB headphones
172
-
173
- 2. **System Settings**:
174
- - Make sure your headphones are set as default input device
175
- - Check input volume levels
176
-
177
- 3. **Browser Permissions**:
178
- - Refresh the page and allow microphone access when prompted
179
- - Check browser settings if prompt doesn't appear
180
  """)
181
 
182
  return demo
183
 
184
  if __name__ == "__main__":
185
- # First run the debug interface
186
- debug_interface = create_debug_interface()
187
- debug_interface.launch()
 
2
  import numpy as np
3
  from datetime import datetime
4
  import traceback
 
5
  import tempfile
6
  import os
7
 
8
+ # Optional imports with fallbacks
9
+ try:
10
+ import sounddevice as sd
11
+ SD_AVAILABLE = True
12
+ except ImportError:
13
+ SD_AVAILABLE = False
14
+
15
+ try:
16
+ import librosa
17
+ LIBROSA_AVAILABLE = True
18
+ except ImportError:
19
+ LIBROSA_AVAILABLE = False
20
+
21
+ try:
22
+ import ffmpeg
23
+ FFMPEG_AVAILABLE = True
24
+ except ImportError:
25
+ FFMPEG_AVAILABLE = False
26
+
27
  class AudioProcessor:
28
  def __init__(self):
29
  self.sample_rate = 16000
30
  self.available_backends = self.detect_audio_backends()
31
+ print(f"Available audio backends: {self.available_backends}")
32
 
33
  def detect_audio_backends(self):
34
  backends = []
35
+
36
+ if FFMPEG_AVAILABLE:
 
37
  backends.append('ffmpeg')
 
 
38
 
39
+ if SD_AVAILABLE:
40
+ try:
41
+ sd.check_input_settings()
42
+ backends.append('sounddevice')
43
+ except:
44
+ pass
45
+
46
+ if LIBROSA_AVAILABLE:
 
 
47
  backends.append('librosa')
 
 
48
 
49
  return backends or ['numpy_fallback']
50
 
51
  def process_audio(self, audio_input):
52
+ # First check if input is already processed
53
+ if isinstance(audio_input, tuple) and len(audio_input) == 2:
54
+ return audio_input
55
+
56
  for backend in self.available_backends:
57
  try:
58
+ print(f"Trying backend: {backend}")
59
+ if backend == 'ffmpeg' and FFMPEG_AVAILABLE:
60
  return self._process_with_ffmpeg(audio_input)
61
+ elif backend == 'sounddevice' and SD_AVAILABLE:
62
  return self._process_with_sounddevice(audio_input)
63
+ elif backend == 'librosa' and LIBROSA_AVAILABLE:
64
  return self._process_with_librosa(audio_input)
65
  else:
66
  return self._process_fallback(audio_input)
 
68
  print(f"Failed with {backend}: {str(e)}")
69
  continue
70
 
71
+ return self._process_fallback(audio_input)
72
 
73
  def _process_with_ffmpeg(self, audio_input):
 
 
 
74
  try:
 
 
75
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
76
+ if isinstance(audio_input, bytes):
77
+ tmp.write(audio_input)
78
+ elif isinstance(audio_input, str):
79
+ with open(audio_input, 'rb') as f:
80
+ tmp.write(f.read())
81
  tmp.flush()
82
+
83
  out, _ = (
84
  ffmpeg.input(tmp.name)
85
  .output('pipe:', format='f32le', ac=1, ar=self.sample_rate)
86
+ .run(capture_stdout=True, quiet=True)
87
  )
88
+ audio_data = np.frombuffer(out, dtype=np.float32)
89
+ return (audio_data, self.sample_rate)
90
+ finally:
91
+ try:
92
  os.unlink(tmp.name)
93
+ except:
94
+ pass
95
+
 
96
  def _process_with_sounddevice(self, audio_input):
 
97
  if isinstance(audio_input, tuple):
98
  return audio_input
99
+
100
+ duration = 3 # seconds
101
+ print("Recording with sounddevice...")
102
+ audio_data = sd.rec(int(duration * self.sample_rate),
103
+ samplerate=self.sample_rate,
104
+ channels=1)
105
+ sd.wait()
106
+ return (audio_data.flatten(), self.sample_rate)
 
 
107
 
108
  def _process_with_librosa(self, audio_input):
109
+ if isinstance(audio_input, tuple):
110
+ return audio_input
111
+
112
+ if isinstance(audio_input, str):
113
+ return librosa.load(audio_input, sr=self.sample_rate)
114
+ else:
115
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
116
+ tmp.write(audio_input)
117
+ tmp.flush()
118
+ audio_data, _ = librosa.load(tmp.name, sr=self.sample_rate)
119
+ return (audio_data, self.sample_rate)
 
 
 
 
 
 
120
 
121
  def _process_fallback(self, audio_input):
122
+ print("Using numpy fallback")
123
  if isinstance(audio_input, tuple):
124
  return audio_input
125
+ return (np.random.random(self.sample_rate * 3), self.sample_rate) # 3 seconds of mock audio
126
+
127
+ def create_interface():
128
  audio_processor = AudioProcessor()
129
 
130
+ def process_audio(audio):
131
  try:
132
  processed = audio_processor.process_audio(audio)
 
 
 
 
 
 
 
 
 
 
 
 
133
  return {
134
+ "audio": processed,
135
+ "status": "Success",
136
+ "backend": str(audio_processor.available_backends)
137
  }
138
  except Exception as e:
139
  return {
140
  "audio": None,
141
+ "status": f"Error: {str(e)}",
142
+ "backend": "None"
143
  }
144
 
145
  with gr.Blocks() as demo:
146
+ gr.Markdown("## Audio Processing Test")
147
 
148
  with gr.Row():
149
+ mic_input = gr.Audio(sources=["microphone"], type="filepath")
150
+ test_btn = gr.Button("Process Audio")
151
+
152
+ with gr.Row():
153
+ status_out = gr.Textbox(label="Status")
154
+ backend_out = gr.Textbox(label="Backend Used")
155
+
156
+ test_btn.click(
157
+ fn=process_audio,
 
 
 
158
  inputs=[mic_input],
159
+ outputs=[status_out, backend_out]
160
  )
161
 
162
+ gr.Markdown("### Installation Instructions")
163
  gr.Markdown("""
164
+ If you're missing audio backends, install them with:
165
+ ```bash
166
+ pip install sounddevice librosa ffmpeg-python
167
+ ```
 
 
 
 
 
 
 
168
  """)
169
 
170
  return demo
171
 
172
  if __name__ == "__main__":
173
+ demo = create_interface()
174
+ demo.launch()