Athspi commited on
Commit
f028775
·
verified ·
1 Parent(s): dff986d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py CHANGED
@@ -2,8 +2,10 @@ import gradio as gr
2
  import whisper
3
  import torch
4
  import os
 
5
  from pydub import AudioSegment, silence
6
  from faster_whisper import WhisperModel # Import faster-whisper
 
7
 
8
  # Mapping of model names to Whisper model sizes
9
  MODELS = {
@@ -185,6 +187,45 @@ def remove_silence(audio_file, silence_threshold=-40, min_silence_len=500):
185
 
186
  return output_path
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
189
  """Transcribe the audio file."""
190
  # Convert audio to 16kHz mono for better compatibility
@@ -276,6 +317,17 @@ with gr.Blocks() as demo:
276
  silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
277
  silence_button = gr.Button("Remove Silence")
278
 
 
 
 
 
 
 
 
 
 
 
 
279
  # Link buttons to functions
280
  detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
281
  transcribe_button.click(
@@ -288,6 +340,11 @@ with gr.Blocks() as demo:
288
  inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
289
  outputs=silence_output
290
  )
 
 
 
 
 
291
 
292
  # Launch the Gradio interface
293
  demo.launch()
 
2
  import whisper
3
  import torch
4
  import os
5
+ import numpy as np
6
  from pydub import AudioSegment, silence
7
  from faster_whisper import WhisperModel # Import faster-whisper
8
+ import noisereduce as nr # Import noisereduce for background noise removal
9
 
10
  # Mapping of model names to Whisper model sizes
11
  MODELS = {
 
187
 
188
  return output_path
189
 
190
+ def remove_background_noise(audio_file, noise_reduce_level=0.5):
191
+ """
192
+ Remove background noise from the audio file using AI-based noise reduction.
193
+
194
+ Args:
195
+ audio_file (str): Path to the input audio file.
196
+ noise_reduce_level (float): Noise reduction level (0.0 to 1.0). Default is 0.5.
197
+
198
+ Returns:
199
+ str: Path to the output audio file with background noise removed.
200
+ """
201
+ # Load the audio file
202
+ audio = AudioSegment.from_file(audio_file)
203
+
204
+ # Convert audio to numpy array for noisereduce
205
+ samples = np.array(audio.get_array_of_samples())
206
+ sample_rate = audio.frame_rate
207
+
208
+ # Perform noise reduction
209
+ reduced_noise = nr.reduce_noise(
210
+ y=samples,
211
+ sr=sample_rate,
212
+ prop_decrease=noise_reduce_level
213
+ )
214
+
215
+ # Convert back to AudioSegment
216
+ reduced_audio = AudioSegment(
217
+ reduced_noise.tobytes(),
218
+ frame_rate=sample_rate,
219
+ sample_width=audio.sample_width,
220
+ channels=audio.channels
221
+ )
222
+
223
+ # Export the processed audio
224
+ output_path = "noise_reduced_audio.wav"
225
+ reduced_audio.export(output_path, format="wav")
226
+
227
+ return output_path
228
+
229
  def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
230
  """Transcribe the audio file."""
231
  # Convert audio to 16kHz mono for better compatibility
 
317
  silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
318
  silence_button = gr.Button("Remove Silence")
319
 
320
+ with gr.Tab("Remove Background Noise"):
321
+ gr.Markdown("Upload an audio file to remove background noise.")
322
+ noise_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
323
+ noise_reduce_slider = gr.Slider(
324
+ minimum=0.0, maximum=1.0, value=0.5, step=0.1,
325
+ label="Noise Reduction Level",
326
+ info="Higher values remove more noise."
327
+ )
328
+ noise_output = gr.Audio(label="Processed Audio (Noise Removed)", type="filepath")
329
+ noise_button = gr.Button("Remove Background Noise")
330
+
331
  # Link buttons to functions
332
  detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
333
  transcribe_button.click(
 
340
  inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
341
  outputs=silence_output
342
  )
343
+ noise_button.click(
344
+ remove_background_noise,
345
+ inputs=[noise_audio_input, noise_reduce_slider],
346
+ outputs=noise_output
347
+ )
348
 
349
  # Launch the Gradio interface
350
  demo.launch()