Athspi commited on
Commit
06326e5
·
verified ·
1 Parent(s): c210b2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -13
app.py CHANGED
@@ -2,8 +2,10 @@ import gradio as gr
2
  import whisper
3
  import torch
4
  import os
 
5
  from pydub import AudioSegment, silence
6
  from faster_whisper import WhisperModel # Import faster-whisper
 
7
  from spleeter.separator import Separator # Import Spleeter for music separation
8
 
9
  # Mapping of model names to Whisper model sizes
@@ -186,6 +188,45 @@ def remove_silence(audio_file, silence_threshold=-40, min_silence_len=500):
186
 
187
  return output_path
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  def remove_background_music(audio_file):
190
  """
191
  Remove background music from the audio file using Spleeter.
@@ -199,16 +240,13 @@ def remove_background_music(audio_file):
199
  # Initialize Spleeter separator (2 stems: vocals and accompaniment)
200
  separator = Separator('spleeter:2stems')
201
 
202
- # Separate the audio into vocals and accompaniment
203
- output_folder = "output"
204
- separator.separate_to_file(audio_file, output_folder)
205
 
206
  # Load the separated vocals
207
- base_name = os.path.splitext(os.path.basename(audio_file))[0]
208
- vocals_path = os.path.join(output_folder, base_name, "vocals.wav")
209
 
210
- # Return the path to the vocals file
211
- return vocals_path
212
 
213
  def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
214
  """Transcribe the audio file."""
@@ -301,11 +339,22 @@ with gr.Blocks() as demo:
301
  silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
302
  silence_button = gr.Button("Remove Silence")
303
 
 
 
 
 
 
 
 
 
 
 
 
304
  with gr.Tab("Remove Background Music"):
305
  gr.Markdown("Upload an audio file to remove background music.")
306
- bg_music_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
307
- bg_music_output = gr.Audio(label="Processed Audio (Background Music Removed)", type="filepath")
308
- bg_music_button = gr.Button("Remove Background Music")
309
 
310
  # Link buttons to functions
311
  detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
@@ -319,10 +368,15 @@ with gr.Blocks() as demo:
319
  inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
320
  outputs=silence_output
321
  )
322
- bg_music_button.click(
 
 
 
 
 
323
  remove_background_music,
324
- inputs=bg_music_audio_input,
325
- outputs=bg_music_output
326
  )
327
 
328
  # Launch the Gradio interface
 
2
  import whisper
3
  import torch
4
  import os
5
+ import numpy as np
6
  from pydub import AudioSegment, silence
7
  from faster_whisper import WhisperModel # Import faster-whisper
8
+ import noisereduce as nr # Import noisereduce for background noise removal
9
  from spleeter.separator import Separator # Import Spleeter for music separation
10
 
11
  # Mapping of model names to Whisper model sizes
 
188
 
189
  return output_path
190
 
191
+ def remove_background_noise(audio_file, noise_reduce_level=0.5):
192
+ """
193
+ Remove background noise from the audio file using AI-based noise reduction.
194
+
195
+ Args:
196
+ audio_file (str): Path to the input audio file.
197
+ noise_reduce_level (float): Noise reduction level (0.0 to 1.0). Default is 0.5.
198
+
199
+ Returns:
200
+ str: Path to the output audio file with background noise removed.
201
+ """
202
+ # Load the audio file
203
+ audio = AudioSegment.from_file(audio_file)
204
+
205
+ # Convert audio to numpy array for noisereduce
206
+ samples = np.array(audio.get_array_of_samples())
207
+ sample_rate = audio.frame_rate
208
+
209
+ # Perform noise reduction
210
+ reduced_noise = nr.reduce_noise(
211
+ y=samples,
212
+ sr=sample_rate,
213
+ prop_decrease=noise_reduce_level
214
+ )
215
+
216
+ # Convert back to AudioSegment
217
+ reduced_audio = AudioSegment(
218
+ reduced_noise.tobytes(),
219
+ frame_rate=sample_rate,
220
+ sample_width=audio.sample_width,
221
+ channels=audio.channels
222
+ )
223
+
224
+ # Export the processed audio
225
+ output_path = "noise_reduced_audio.wav"
226
+ reduced_audio.export(output_path, format="wav")
227
+
228
+ return output_path
229
+
230
  def remove_background_music(audio_file):
231
  """
232
  Remove background music from the audio file using Spleeter.
 
240
  # Initialize Spleeter separator (2 stems: vocals and accompaniment)
241
  separator = Separator('spleeter:2stems')
242
 
243
+ # Separate vocals from background music
244
+ separator.separate_to_file(audio_file, "output")
 
245
 
246
  # Load the separated vocals
247
+ output_path = os.path.join("output", os.path.basename(audio_file).replace(".wav", ""), "vocals.wav")
 
248
 
249
+ return output_path
 
250
 
251
  def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
252
  """Transcribe the audio file."""
 
339
  silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
340
  silence_button = gr.Button("Remove Silence")
341
 
342
+ with gr.Tab("Remove Background Noise"):
343
+ gr.Markdown("Upload an audio file to remove background noise.")
344
+ noise_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
345
+ noise_reduce_slider = gr.Slider(
346
+ minimum=0.0, maximum=1.0, value=0.5, step=0.1,
347
+ label="Noise Reduction Level",
348
+ info="Higher values remove more noise."
349
+ )
350
+ noise_output = gr.Audio(label="Processed Audio (Noise Removed)", type="filepath")
351
+ noise_button = gr.Button("Remove Background Noise")
352
+
353
  with gr.Tab("Remove Background Music"):
354
  gr.Markdown("Upload an audio file to remove background music.")
355
+ music_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
356
+ music_output = gr.Audio(label="Processed Audio (Music Removed)", type="filepath")
357
+ music_button = gr.Button("Remove Background Music")
358
 
359
  # Link buttons to functions
360
  detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
 
368
  inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
369
  outputs=silence_output
370
  )
371
+ noise_button.click(
372
+ remove_background_noise,
373
+ inputs=[noise_audio_input, noise_reduce_slider],
374
+ outputs=noise_output
375
+ )
376
+ music_button.click(
377
  remove_background_music,
378
+ inputs=music_audio_input,
379
+ outputs=music_output
380
  )
381
 
382
  # Launch the Gradio interface