Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,10 +2,9 @@ import gradio as gr
|
|
| 2 |
import whisper
|
| 3 |
import torch
|
| 4 |
import os
|
| 5 |
-
import numpy as np
|
| 6 |
from pydub import AudioSegment, silence
|
| 7 |
from faster_whisper import WhisperModel # Import faster-whisper
|
| 8 |
-
|
| 9 |
|
| 10 |
# Mapping of model names to Whisper model sizes
|
| 11 |
MODELS = {
|
|
@@ -187,44 +186,29 @@ def remove_silence(audio_file, silence_threshold=-40, min_silence_len=500):
|
|
| 187 |
|
| 188 |
return output_path
|
| 189 |
|
| 190 |
-
def
|
| 191 |
"""
|
| 192 |
-
Remove background
|
| 193 |
|
| 194 |
Args:
|
| 195 |
audio_file (str): Path to the input audio file.
|
| 196 |
-
noise_reduce_level (float): Noise reduction level (0.0 to 1.0). Default is 0.5.
|
| 197 |
|
| 198 |
Returns:
|
| 199 |
-
str: Path to the output audio file with background
|
| 200 |
"""
|
| 201 |
-
#
|
| 202 |
-
|
| 203 |
|
| 204 |
-
#
|
| 205 |
-
|
| 206 |
-
|
| 207 |
|
| 208 |
-
#
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
sr=sample_rate,
|
| 212 |
-
prop_decrease=noise_reduce_level
|
| 213 |
-
)
|
| 214 |
-
|
| 215 |
-
# Convert back to AudioSegment
|
| 216 |
-
reduced_audio = AudioSegment(
|
| 217 |
-
reduced_noise.tobytes(),
|
| 218 |
-
frame_rate=sample_rate,
|
| 219 |
-
sample_width=audio.sample_width,
|
| 220 |
-
channels=audio.channels
|
| 221 |
-
)
|
| 222 |
|
| 223 |
-
#
|
| 224 |
-
|
| 225 |
-
reduced_audio.export(output_path, format="wav")
|
| 226 |
-
|
| 227 |
-
return output_path
|
| 228 |
|
| 229 |
def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
|
| 230 |
"""Transcribe the audio file."""
|
|
@@ -317,16 +301,11 @@ with gr.Blocks() as demo:
|
|
| 317 |
silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
|
| 318 |
silence_button = gr.Button("Remove Silence")
|
| 319 |
|
| 320 |
-
with gr.Tab("Remove Background
|
| 321 |
-
gr.Markdown("Upload an audio file to remove background
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
label="Noise Reduction Level",
|
| 326 |
-
info="Higher values remove more noise."
|
| 327 |
-
)
|
| 328 |
-
noise_output = gr.Audio(label="Processed Audio (Noise Removed)", type="filepath")
|
| 329 |
-
noise_button = gr.Button("Remove Background Noise")
|
| 330 |
|
| 331 |
# Link buttons to functions
|
| 332 |
detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
|
|
@@ -340,10 +319,10 @@ with gr.Blocks() as demo:
|
|
| 340 |
inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
|
| 341 |
outputs=silence_output
|
| 342 |
)
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
inputs=
|
| 346 |
-
outputs=
|
| 347 |
)
|
| 348 |
|
| 349 |
# Launch the Gradio interface
|
|
|
|
| 2 |
import whisper
|
| 3 |
import torch
|
| 4 |
import os
|
|
|
|
| 5 |
from pydub import AudioSegment, silence
|
| 6 |
from faster_whisper import WhisperModel # Import faster-whisper
|
| 7 |
+
from spleeter.separator import Separator # Import Spleeter for music separation
|
| 8 |
|
| 9 |
# Mapping of model names to Whisper model sizes
|
| 10 |
MODELS = {
|
|
|
|
| 186 |
|
| 187 |
return output_path
|
| 188 |
|
| 189 |
+
def remove_background_music(audio_file):
|
| 190 |
"""
|
| 191 |
+
Remove background music from the audio file using Spleeter.
|
| 192 |
|
| 193 |
Args:
|
| 194 |
audio_file (str): Path to the input audio file.
|
|
|
|
| 195 |
|
| 196 |
Returns:
|
| 197 |
+
str: Path to the output audio file with background music removed.
|
| 198 |
"""
|
| 199 |
+
# Initialize Spleeter separator (2 stems: vocals and accompaniment)
|
| 200 |
+
separator = Separator('spleeter:2stems')
|
| 201 |
|
| 202 |
+
# Separate the audio into vocals and accompaniment
|
| 203 |
+
output_folder = "output"
|
| 204 |
+
separator.separate_to_file(audio_file, output_folder)
|
| 205 |
|
| 206 |
+
# Load the separated vocals
|
| 207 |
+
base_name = os.path.splitext(os.path.basename(audio_file))[0]
|
| 208 |
+
vocals_path = os.path.join(output_folder, base_name, "vocals.wav")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
+
# Return the path to the vocals file
|
| 211 |
+
return vocals_path
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
|
| 214 |
"""Transcribe the audio file."""
|
|
|
|
| 301 |
silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
|
| 302 |
silence_button = gr.Button("Remove Silence")
|
| 303 |
|
| 304 |
+
with gr.Tab("Remove Background Music"):
|
| 305 |
+
gr.Markdown("Upload an audio file to remove background music.")
|
| 306 |
+
bg_music_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
|
| 307 |
+
bg_music_output = gr.Audio(label="Processed Audio (Background Music Removed)", type="filepath")
|
| 308 |
+
bg_music_button = gr.Button("Remove Background Music")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
|
| 310 |
# Link buttons to functions
|
| 311 |
detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
|
|
|
|
| 319 |
inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
|
| 320 |
outputs=silence_output
|
| 321 |
)
|
| 322 |
+
bg_music_button.click(
|
| 323 |
+
remove_background_music,
|
| 324 |
+
inputs=bg_music_audio_input,
|
| 325 |
+
outputs=bg_music_output
|
| 326 |
)
|
| 327 |
|
| 328 |
# Launch the Gradio interface
|