Update app.py
Browse files
app.py
CHANGED
@@ -2,8 +2,10 @@ import gradio as gr
|
|
2 |
import whisper
|
3 |
import torch
|
4 |
import os
|
|
|
5 |
from pydub import AudioSegment, silence
|
6 |
from faster_whisper import WhisperModel # Import faster-whisper
|
|
|
7 |
from spleeter.separator import Separator # Import Spleeter for music separation
|
8 |
|
9 |
# Mapping of model names to Whisper model sizes
|
@@ -186,6 +188,45 @@ def remove_silence(audio_file, silence_threshold=-40, min_silence_len=500):
|
|
186 |
|
187 |
return output_path
|
188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
def remove_background_music(audio_file):
|
190 |
"""
|
191 |
Remove background music from the audio file using Spleeter.
|
@@ -199,16 +240,13 @@ def remove_background_music(audio_file):
|
|
199 |
# Initialize Spleeter separator (2 stems: vocals and accompaniment)
|
200 |
separator = Separator('spleeter:2stems')
|
201 |
|
202 |
-
# Separate
|
203 |
-
|
204 |
-
separator.separate_to_file(audio_file, output_folder)
|
205 |
|
206 |
# Load the separated vocals
|
207 |
-
|
208 |
-
vocals_path = os.path.join(output_folder, base_name, "vocals.wav")
|
209 |
|
210 |
-
|
211 |
-
return vocals_path
|
212 |
|
213 |
def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
|
214 |
"""Transcribe the audio file."""
|
@@ -301,11 +339,22 @@ with gr.Blocks() as demo:
|
|
301 |
silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
|
302 |
silence_button = gr.Button("Remove Silence")
|
303 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
with gr.Tab("Remove Background Music"):
|
305 |
gr.Markdown("Upload an audio file to remove background music.")
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
|
310 |
# Link buttons to functions
|
311 |
detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
|
@@ -319,10 +368,15 @@ with gr.Blocks() as demo:
|
|
319 |
inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
|
320 |
outputs=silence_output
|
321 |
)
|
322 |
-
|
|
|
|
|
|
|
|
|
|
|
323 |
remove_background_music,
|
324 |
-
inputs=
|
325 |
-
outputs=
|
326 |
)
|
327 |
|
328 |
# Launch the Gradio interface
|
|
|
2 |
import whisper
|
3 |
import torch
|
4 |
import os
|
5 |
+
import numpy as np
|
6 |
from pydub import AudioSegment, silence
|
7 |
from faster_whisper import WhisperModel # Import faster-whisper
|
8 |
+
import noisereduce as nr # Import noisereduce for background noise removal
|
9 |
from spleeter.separator import Separator # Import Spleeter for music separation
|
10 |
|
11 |
# Mapping of model names to Whisper model sizes
|
|
|
188 |
|
189 |
return output_path
|
190 |
|
191 |
+
def remove_background_noise(audio_file, noise_reduce_level=0.5):
|
192 |
+
"""
|
193 |
+
Remove background noise from the audio file using AI-based noise reduction.
|
194 |
+
|
195 |
+
Args:
|
196 |
+
audio_file (str): Path to the input audio file.
|
197 |
+
noise_reduce_level (float): Noise reduction level (0.0 to 1.0). Default is 0.5.
|
198 |
+
|
199 |
+
Returns:
|
200 |
+
str: Path to the output audio file with background noise removed.
|
201 |
+
"""
|
202 |
+
# Load the audio file
|
203 |
+
audio = AudioSegment.from_file(audio_file)
|
204 |
+
|
205 |
+
# Convert audio to numpy array for noisereduce
|
206 |
+
samples = np.array(audio.get_array_of_samples())
|
207 |
+
sample_rate = audio.frame_rate
|
208 |
+
|
209 |
+
# Perform noise reduction
|
210 |
+
reduced_noise = nr.reduce_noise(
|
211 |
+
y=samples,
|
212 |
+
sr=sample_rate,
|
213 |
+
prop_decrease=noise_reduce_level
|
214 |
+
)
|
215 |
+
|
216 |
+
# Convert back to AudioSegment
|
217 |
+
reduced_audio = AudioSegment(
|
218 |
+
reduced_noise.tobytes(),
|
219 |
+
frame_rate=sample_rate,
|
220 |
+
sample_width=audio.sample_width,
|
221 |
+
channels=audio.channels
|
222 |
+
)
|
223 |
+
|
224 |
+
# Export the processed audio
|
225 |
+
output_path = "noise_reduced_audio.wav"
|
226 |
+
reduced_audio.export(output_path, format="wav")
|
227 |
+
|
228 |
+
return output_path
|
229 |
+
|
230 |
def remove_background_music(audio_file):
|
231 |
"""
|
232 |
Remove background music from the audio file using Spleeter.
|
|
|
240 |
# Initialize Spleeter separator (2 stems: vocals and accompaniment)
|
241 |
separator = Separator('spleeter:2stems')
|
242 |
|
243 |
+
# Separate vocals from background music
|
244 |
+
separator.separate_to_file(audio_file, "output")
|
|
|
245 |
|
246 |
# Load the separated vocals
|
247 |
+
output_path = os.path.join("output", os.path.basename(audio_file).replace(".wav", ""), "vocals.wav")
|
|
|
248 |
|
249 |
+
return output_path
|
|
|
250 |
|
251 |
def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
|
252 |
"""Transcribe the audio file."""
|
|
|
339 |
silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
|
340 |
silence_button = gr.Button("Remove Silence")
|
341 |
|
342 |
+
with gr.Tab("Remove Background Noise"):
|
343 |
+
gr.Markdown("Upload an audio file to remove background noise.")
|
344 |
+
noise_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
|
345 |
+
noise_reduce_slider = gr.Slider(
|
346 |
+
minimum=0.0, maximum=1.0, value=0.5, step=0.1,
|
347 |
+
label="Noise Reduction Level",
|
348 |
+
info="Higher values remove more noise."
|
349 |
+
)
|
350 |
+
noise_output = gr.Audio(label="Processed Audio (Noise Removed)", type="filepath")
|
351 |
+
noise_button = gr.Button("Remove Background Noise")
|
352 |
+
|
353 |
with gr.Tab("Remove Background Music"):
|
354 |
gr.Markdown("Upload an audio file to remove background music.")
|
355 |
+
music_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
|
356 |
+
music_output = gr.Audio(label="Processed Audio (Music Removed)", type="filepath")
|
357 |
+
music_button = gr.Button("Remove Background Music")
|
358 |
|
359 |
# Link buttons to functions
|
360 |
detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
|
|
|
368 |
inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
|
369 |
outputs=silence_output
|
370 |
)
|
371 |
+
noise_button.click(
|
372 |
+
remove_background_noise,
|
373 |
+
inputs=[noise_audio_input, noise_reduce_slider],
|
374 |
+
outputs=noise_output
|
375 |
+
)
|
376 |
+
music_button.click(
|
377 |
remove_background_music,
|
378 |
+
inputs=music_audio_input,
|
379 |
+
outputs=music_output
|
380 |
)
|
381 |
|
382 |
# Launch the Gradio interface
|