Update app.py
Browse files
app.py
CHANGED
@@ -2,8 +2,10 @@ import gradio as gr
|
|
2 |
import whisper
|
3 |
import torch
|
4 |
import os
|
|
|
5 |
from pydub import AudioSegment, silence
|
6 |
from faster_whisper import WhisperModel # Import faster-whisper
|
|
|
7 |
|
8 |
# Mapping of model names to Whisper model sizes
|
9 |
MODELS = {
|
@@ -185,6 +187,45 @@ def remove_silence(audio_file, silence_threshold=-40, min_silence_len=500):
|
|
185 |
|
186 |
return output_path
|
187 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
|
189 |
"""Transcribe the audio file."""
|
190 |
# Convert audio to 16kHz mono for better compatibility
|
@@ -276,6 +317,17 @@ with gr.Blocks() as demo:
|
|
276 |
silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
|
277 |
silence_button = gr.Button("Remove Silence")
|
278 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
# Link buttons to functions
|
280 |
detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
|
281 |
transcribe_button.click(
|
@@ -288,6 +340,11 @@ with gr.Blocks() as demo:
|
|
288 |
inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
|
289 |
outputs=silence_output
|
290 |
)
|
|
|
|
|
|
|
|
|
|
|
291 |
|
292 |
# Launch the Gradio interface
|
293 |
demo.launch()
|
|
|
2 |
import whisper
|
3 |
import torch
|
4 |
import os
|
5 |
+
import numpy as np
|
6 |
from pydub import AudioSegment, silence
|
7 |
from faster_whisper import WhisperModel # Import faster-whisper
|
8 |
+
import noisereduce as nr # Import noisereduce for background noise removal
|
9 |
|
10 |
# Mapping of model names to Whisper model sizes
|
11 |
MODELS = {
|
|
|
187 |
|
188 |
return output_path
|
189 |
|
190 |
+
def remove_background_noise(audio_file, noise_reduce_level=0.5):
|
191 |
+
"""
|
192 |
+
Remove background noise from the audio file using AI-based noise reduction.
|
193 |
+
|
194 |
+
Args:
|
195 |
+
audio_file (str): Path to the input audio file.
|
196 |
+
noise_reduce_level (float): Noise reduction level (0.0 to 1.0). Default is 0.5.
|
197 |
+
|
198 |
+
Returns:
|
199 |
+
str: Path to the output audio file with background noise removed.
|
200 |
+
"""
|
201 |
+
# Load the audio file
|
202 |
+
audio = AudioSegment.from_file(audio_file)
|
203 |
+
|
204 |
+
# Convert audio to numpy array for noisereduce
|
205 |
+
samples = np.array(audio.get_array_of_samples())
|
206 |
+
sample_rate = audio.frame_rate
|
207 |
+
|
208 |
+
# Perform noise reduction
|
209 |
+
reduced_noise = nr.reduce_noise(
|
210 |
+
y=samples,
|
211 |
+
sr=sample_rate,
|
212 |
+
prop_decrease=noise_reduce_level
|
213 |
+
)
|
214 |
+
|
215 |
+
# Convert back to AudioSegment
|
216 |
+
reduced_audio = AudioSegment(
|
217 |
+
reduced_noise.tobytes(),
|
218 |
+
frame_rate=sample_rate,
|
219 |
+
sample_width=audio.sample_width,
|
220 |
+
channels=audio.channels
|
221 |
+
)
|
222 |
+
|
223 |
+
# Export the processed audio
|
224 |
+
output_path = "noise_reduced_audio.wav"
|
225 |
+
reduced_audio.export(output_path, format="wav")
|
226 |
+
|
227 |
+
return output_path
|
228 |
+
|
229 |
def transcribe_audio(audio_file, language="Auto Detect", model_size="Faster Whisper Large v3"):
|
230 |
"""Transcribe the audio file."""
|
231 |
# Convert audio to 16kHz mono for better compatibility
|
|
|
317 |
silence_output = gr.Audio(label="Processed Audio (Silence Removed)", type="filepath")
|
318 |
silence_button = gr.Button("Remove Silence")
|
319 |
|
320 |
+
with gr.Tab("Remove Background Noise"):
|
321 |
+
gr.Markdown("Upload an audio file to remove background noise.")
|
322 |
+
noise_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
|
323 |
+
noise_reduce_slider = gr.Slider(
|
324 |
+
minimum=0.0, maximum=1.0, value=0.5, step=0.1,
|
325 |
+
label="Noise Reduction Level",
|
326 |
+
info="Higher values remove more noise."
|
327 |
+
)
|
328 |
+
noise_output = gr.Audio(label="Processed Audio (Noise Removed)", type="filepath")
|
329 |
+
noise_button = gr.Button("Remove Background Noise")
|
330 |
+
|
331 |
# Link buttons to functions
|
332 |
detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
|
333 |
transcribe_button.click(
|
|
|
340 |
inputs=[silence_audio_input, silence_threshold_slider, min_silence_len_slider],
|
341 |
outputs=silence_output
|
342 |
)
|
343 |
+
noise_button.click(
|
344 |
+
remove_background_noise,
|
345 |
+
inputs=[noise_audio_input, noise_reduce_slider],
|
346 |
+
outputs=noise_output
|
347 |
+
)
|
348 |
|
349 |
# Launch the Gradio interface
|
350 |
demo.launch()
|