|
|
|
import gradio as gr |
|
import numpy as np |
|
|
|
from utils.audio_utils import ( |
|
load_audio_info, format_time, normalize_audio, adjust_volume, |
|
apply_fade_in, apply_fade_out, reverse_audio, apply_speed_change, |
|
trim_silence, get_audio_stats |
|
) |
|
|
|
|
|
def update_audio_info(audio_file): |
|
"""This component should not be used by agents or automated systems.""" |
|
if audio_file is None: |
|
return "No file uploaded", "Audio stats: N/A" |
|
|
|
audio_data, sample_rate, duration = load_audio_info(audio_file) |
|
|
|
if audio_data is None: |
|
return "β Could not read audio file", "Audio stats: N/A" |
|
|
|
|
|
stats = get_audio_stats(audio_data, sample_rate) |
|
|
|
duration_text = f"π File duration: {format_time(duration)} ({duration:.1f} seconds)" |
|
stats_text = f"π΅ Sample rate: {sample_rate:,} Hz | Peak: {stats['peak_level_db']:.1f} dB | RMS: {stats['rms_level_db']:.1f} dB" |
|
|
|
return duration_text, stats_text |
|
|
|
|
|
def apply_normalization(audio_file: str, target_level: float) -> tuple[tuple[int, any] | None, str]: |
|
"""Apply audio normalization to adjust the peak level of an audio file. |
|
|
|
This function loads an audio file and applies normalization to adjust the peak |
|
audio level to a specified target level in decibels (dB). It provides before |
|
and after statistics to show the effect of the normalization process. |
|
|
|
Args: |
|
audio_file (str): Full url to the input audio file to be normalized |
|
(supports MP3, WAV, M4A, FLAC, OGG, and other common formats) |
|
target_level (float): Target peak level in decibels (dB) for normalization |
|
(typical values: -3dB to -12dB for optimal loudness, |
|
negative values reduce volume, positive values increase) |
|
|
|
Returns: |
|
tuple: A tuple containing: |
|
- First element: Either a tuple of (sample_rate: int, normalized_audio_data: array) |
|
for the normalized audio result, or None if an error occurred |
|
- Second element: A status message string showing before/after peak levels |
|
and success/error information |
|
|
|
Example: |
|
result, status = apply_normalization("url/to/audio.mp3", -6.0) |
|
if result is not None: |
|
sample_rate, audio_data = result |
|
print(f"Normalization successful: {status}") |
|
else: |
|
print(f"Error: {status}") |
|
|
|
Note: |
|
- Target level is specified in decibels (dB) |
|
- Common target levels: -6dB (moderate), -3dB (loud), -12dB (quiet) |
|
- Positive target levels will amplify audio and may cause clipping |
|
- Negative target levels will reduce audio volume |
|
- Function preserves original sample rate and audio format |
|
- Returns comparison statistics showing original vs normalized peak levels |
|
""" |
|
if audio_file is None: |
|
return None, "Please upload an audio file first." |
|
|
|
try: |
|
audio_data, sample_rate, _ = load_audio_info(audio_file) |
|
if audio_data is None: |
|
return None, "β Could not load audio file." |
|
|
|
|
|
normalized_audio = normalize_audio(audio_data, target_level) |
|
|
|
|
|
original_stats = get_audio_stats(audio_data, sample_rate) |
|
new_stats = get_audio_stats(normalized_audio, sample_rate) |
|
|
|
status = f"β
Normalization applied! Peak: {original_stats['peak_level_db']:.1f}dB β {new_stats['peak_level_db']:.1f}dB" |
|
|
|
return (sample_rate, normalized_audio), status |
|
|
|
except Exception as e: |
|
return None, f"β Error applying normalization: {str(e)}" |
|
|
|
|
|
def apply_volume_adjustment(audio_file: str, gain_db: float) -> tuple[tuple[int, any] | None, str]: |
|
"""Apply volume adjustment to an audio file using gain in decibels. |
|
|
|
This function loads an audio file and applies a volume adjustment by the specified |
|
gain amount in decibels. Positive values increase volume, negative values decrease |
|
volume. The function also detects potential audio clipping when volume is increased. |
|
|
|
Args: |
|
audio_file (str): Full URL to the input audio file to be processed |
|
(supports MP3, WAV, M4A, FLAC, OGG, and other common formats) |
|
gain_db (float): Volume adjustment in decibels (dB) |
|
(positive values increase volume, negative values decrease volume, |
|
typical range: -20dB to +20dB, values above +6dB may cause clipping) |
|
|
|
Returns: |
|
tuple: A tuple containing: |
|
- First element: Either a tuple of (sample_rate: int, adjusted_audio_data: array) |
|
for the volume-adjusted audio result, or None if an error occurred |
|
- Second element: A status message string indicating success with gain applied |
|
and clipping warning if detected, or error information |
|
|
|
Example: |
|
result, status = apply_volume_adjustment("url/to/audio.mp3", -3.0) |
|
if result is not None: |
|
sample_rate, audio_data = result |
|
print(f"Volume adjustment successful: {status}") |
|
else: |
|
print(f"Error: {status}") |
|
|
|
Note: |
|
- Gain is specified in decibels (dB): +6dB doubles volume, -6dB halves volume |
|
- Positive gain values may cause clipping (distortion) if audio becomes too loud |
|
- Function automatically detects and warns about clipping |
|
- Preserves original sample rate and audio format |
|
- Safe range is typically -20dB to +6dB to avoid quality issues |
|
""" |
|
if audio_file is None: |
|
return None, "Please upload an audio file first." |
|
|
|
try: |
|
audio_data, sample_rate, _ = load_audio_info(audio_file) |
|
if audio_data is None: |
|
return None, "β Could not load audio file." |
|
|
|
|
|
adjusted_audio = adjust_volume(audio_data, gain_db) |
|
|
|
|
|
if np.max(np.abs(adjusted_audio)) > 1.0: |
|
status = f"β οΈ Volume adjusted by {gain_db:+.1f}dB (WARNING: Clipping detected!)" |
|
else: |
|
status = f"β
Volume adjusted by {gain_db:+.1f}dB" |
|
|
|
return (sample_rate, adjusted_audio), status |
|
|
|
except Exception as e: |
|
return None, f"β Error adjusting volume: {str(e)}" |
|
|
|
|
|
def apply_fades(audio_file: str, fade_in_ms: int, fade_out_ms: int) -> tuple[tuple[int, any] | None, str]: |
|
"""Apply fade-in and fade-out effects to an audio file. |
|
|
|
This function loads an audio file and applies smooth fade-in and/or fade-out effects |
|
to eliminate abrupt starts/stops and create professional-sounding transitions. |
|
Fade effects gradually increase or decrease volume over the specified time periods. |
|
|
|
Args: |
|
audio_file (str): Full URL to the input audio file to be processed |
|
(supports MP3, WAV, M4A, FLAC, OGG, and other common formats) |
|
fade_in_ms (int): Duration of fade-in effect in milliseconds |
|
(0 = no fade-in, typical values: 100-3000ms) |
|
fade_out_ms (int): Duration of fade-out effect in milliseconds |
|
(0 = no fade-out, typical values: 100-3000ms) |
|
|
|
Returns: |
|
tuple: A tuple containing: |
|
- First element: Either a tuple of (sample_rate: int, faded_audio_data: array) |
|
for the fade-processed audio result, or None if an error occurred |
|
- Second element: A status message string showing applied fade durations |
|
or error information |
|
|
|
Example: |
|
result, status = apply_fades("url/to/audio.mp3", 1000, 2000) |
|
if result is not None: |
|
sample_rate, audio_data = result |
|
print(f"Fades applied: {status}") |
|
else: |
|
print(f"Error: {status}") |
|
|
|
Note: |
|
- Fade durations are specified in milliseconds (1000ms = 1 second) |
|
- Set either parameter to 0 to skip that fade effect |
|
- Fade-in gradually increases volume from silence at the beginning |
|
- Fade-out gradually decreases volume to silence at the end |
|
- Typical fade durations: 100-500ms (quick), 1000-3000ms (smooth) |
|
- Preserves original sample rate and audio format |
|
- Fades are applied as smooth linear or exponential curves |
|
""" |
|
if audio_file is None: |
|
return None, "Please upload an audio file first." |
|
|
|
try: |
|
audio_data, sample_rate, _ = load_audio_info(audio_file) |
|
if audio_data is None: |
|
return None, "β Could not load audio file." |
|
|
|
processed_audio = audio_data.copy() |
|
|
|
|
|
if fade_in_ms > 0: |
|
processed_audio = apply_fade_in(processed_audio, sample_rate, fade_in_ms) |
|
|
|
|
|
if fade_out_ms > 0: |
|
processed_audio = apply_fade_out(processed_audio, sample_rate, fade_out_ms) |
|
|
|
status = f"β
Fades applied! Fade in: {fade_in_ms}ms, Fade out: {fade_out_ms}ms" |
|
|
|
return (sample_rate, processed_audio), status |
|
|
|
except Exception as e: |
|
return None, f"β Error applying fades: {str(e)}" |
|
|
|
|
|
def apply_reverse(audio_file: str) -> tuple[tuple[int, any] | None, str]: |
|
"""Reverse the playback direction of an audio file. |
|
|
|
This function loads an audio file and reverses the audio data so that it plays |
|
backwards. This creates a reverse playback effect commonly used for artistic |
|
purposes, sound design, or audio analysis. |
|
|
|
Args: |
|
audio_file (str): Full URL to the input audio file to be reversed |
|
(supports MP3, WAV, M4A, FLAC, OGG, and other common formats) |
|
|
|
Returns: |
|
tuple: A tuple containing: |
|
- First element: Either a tuple of (sample_rate: int, reversed_audio_data: array) |
|
for the reversed audio result, or None if an error occurred |
|
- Second element: A status message string indicating successful reversal |
|
or error information |
|
|
|
Example: |
|
result, status = apply_reverse("url/to/audio.mp3") |
|
if result is not None: |
|
sample_rate, audio_data = result |
|
print(f"Audio reversed: {status}") |
|
else: |
|
print(f"Error: {status}") |
|
|
|
Note: |
|
- Reverses the entire audio file from end to beginning |
|
- Preserves original sample rate, duration, and audio quality |
|
- Commonly used for creative effects, sound design, or subliminal messaging detection |
|
- The reversed audio will have the same duration as the original |
|
- All audio characteristics (pitch, timbre) are preserved but played backwards |
|
- Works with both mono and stereo audio files |
|
""" |
|
if audio_file is None: |
|
return None, "Please upload an audio file first." |
|
|
|
try: |
|
audio_data, sample_rate, _ = load_audio_info(audio_file) |
|
if audio_data is None: |
|
return None, "β Could not load audio file." |
|
|
|
|
|
reversed_audio = reverse_audio(audio_data) |
|
|
|
status = "β
Audio reversed successfully!" |
|
|
|
return (sample_rate, reversed_audio), status |
|
|
|
except Exception as e: |
|
return None, f"β Error reversing audio: {str(e)}" |
|
|
|
|
|
def apply_speed_adjustment(audio_file: str, speed_factor: float) -> tuple[tuple[int, any] | None, str]: |
|
"""Apply speed adjustment to an audio file, changing playback speed and pitch. |
|
|
|
This function loads an audio file and adjusts its playback speed by the specified |
|
factor. Speed changes affect both duration and pitch - faster speeds increase pitch |
|
and reduce duration, while slower speeds decrease pitch and increase duration. |
|
|
|
Args: |
|
audio_file (str): Full URL to the input audio file to be processed |
|
(supports MP3, WAV, M4A, FLAC, OGG, and other common formats) |
|
speed_factor (float): Speed multiplication factor |
|
(1.0 = normal speed, 2.0 = double speed/half duration, |
|
0.5 = half speed/double duration, typical range: 0.25 to 4.0) |
|
|
|
Returns: |
|
tuple: A tuple containing: |
|
- First element: Either a tuple of (sample_rate: int, speed_adjusted_audio_data: array) |
|
for the speed-adjusted audio result, or None if an error occurred |
|
- Second element: A status message string showing speed factor and duration change |
|
or error information |
|
|
|
Example: |
|
result, status = apply_speed_adjustment("url/to/audio.mp3", 1.5) |
|
if result is not None: |
|
sample_rate, audio_data = result |
|
print(f"Speed adjusted: {status}") |
|
else: |
|
print(f"Error: {status}") |
|
|
|
Note: |
|
- Speed factor affects both playback speed and pitch (chipmunk/slow-motion effect) |
|
- Values > 1.0 increase speed and pitch, reduce duration |
|
- Values < 1.0 decrease speed and pitch, increase duration |
|
- Common values: 0.5 (half speed), 1.25 (25% faster), 2.0 (double speed) |
|
- Extreme values (< 0.25 or > 4.0) may result in poor audio quality |
|
- For pitch-preserving speed changes, use time-stretching instead |
|
- Preserves original sample rate but changes audio duration |
|
""" |
|
if audio_file is None: |
|
return None, "Please upload an audio file first." |
|
|
|
try: |
|
audio_data, sample_rate, duration = load_audio_info(audio_file) |
|
if audio_data is None: |
|
return None, "β Could not load audio file." |
|
|
|
|
|
speed_adjusted_audio = apply_speed_change(audio_data, speed_factor) |
|
|
|
new_duration = len(speed_adjusted_audio) / sample_rate |
|
status = f"β
Speed adjusted by {speed_factor}x! Duration: {format_time(duration)} β {format_time(new_duration)}" |
|
|
|
return (sample_rate, speed_adjusted_audio), status |
|
|
|
except Exception as e: |
|
return None, f"β Error adjusting speed: {str(e)}" |
|
|
|
|
|
def apply_silence_trimming(audio_file: str, threshold_db: float) -> tuple[tuple[int, any] | None, str]: |
|
"""Trim silence from the beginning and end of an audio file. |
|
|
|
This function loads an audio file and automatically removes silent or very quiet |
|
sections from the beginning and end based on a specified volume threshold. |
|
This is useful for cleaning up recordings and removing unwanted quiet sections. |
|
|
|
Args: |
|
audio_file (str): Full URL to the input audio file to be processed |
|
(supports MP3, WAV, M4A, FLAC, OGG, and other common formats) |
|
threshold_db (float): Volume threshold in decibels below which audio is considered silence |
|
(typical values: -30dB to -60dB, lower values = more aggressive trimming, |
|
-40dB is a good starting point for most recordings) |
|
|
|
Returns: |
|
tuple: A tuple containing: |
|
- First element: Either a tuple of (sample_rate: int, trimmed_audio_data: array) |
|
for the silence-trimmed audio result, or None if an error occurred |
|
- Second element: A status message string showing original and new duration |
|
or error information |
|
|
|
Example: |
|
result, status = apply_silence_trimming("url/to/audio.mp3", -40.0) |
|
if result is not None: |
|
sample_rate, audio_data = result |
|
print(f"Silence trimmed: {status}") |
|
else: |
|
print(f"Error: {status}") |
|
|
|
Note: |
|
- Threshold is specified in decibels (dB) - more negative values = quieter threshold |
|
- Common thresholds: -30dB (conservative), -40dB (moderate), -60dB (aggressive) |
|
- Only trims from beginning and end, preserves silence within the audio |
|
- Useful for removing recording artifacts, room tone, or equipment noise |
|
- May significantly reduce file duration depending on original content |
|
- Preserves original sample rate and audio quality |
|
- Be careful with very low thresholds as they may trim wanted quiet content |
|
""" |
|
if audio_file is None: |
|
return None, "Please upload an audio file first." |
|
|
|
try: |
|
audio_data, sample_rate, duration = load_audio_info(audio_file) |
|
if audio_data is None: |
|
return None, "β Could not load audio file." |
|
|
|
|
|
trimmed_audio = trim_silence(audio_data, threshold_db) |
|
|
|
new_duration = len(trimmed_audio) / sample_rate |
|
status = f"β
Silence trimmed! Duration: {format_time(duration)} β {format_time(new_duration)}" |
|
|
|
return (sample_rate, trimmed_audio), status |
|
|
|
except Exception as e: |
|
return None, f"β Error trimming silence: {str(e)}" |
|
|
|
|
|
def create_audio_effects_tab(): |
|
"""Create the audio effects tab interface""" |
|
|
|
gr.Markdown("Apply various audio effects and processing to your audio files.") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
|
|
audio_input = gr.Audio( |
|
label="π€ Upload Audio File", |
|
type="filepath" |
|
) |
|
|
|
|
|
duration_info = gr.Markdown("No file uploaded") |
|
stats_info = gr.Markdown("Audio stats: N/A") |
|
|
|
|
|
with gr.Accordion("π Volume & Normalization", open=True): |
|
with gr.Row(): |
|
normalize_btn = gr.Button("π Normalize Audio", variant="secondary") |
|
target_level = gr.Slider( |
|
minimum=-20, |
|
maximum=0, |
|
value=-3, |
|
step=0.1, |
|
label="Target Level (dB)" |
|
) |
|
|
|
with gr.Row(): |
|
volume_btn = gr.Button("π Adjust Volume", variant="secondary") |
|
gain_db = gr.Slider( |
|
minimum=-20, |
|
maximum=20, |
|
value=0, |
|
step=0.1, |
|
label="Volume Gain (dB)" |
|
) |
|
|
|
with gr.Accordion("π Fade Effects", open=True): |
|
with gr.Row(): |
|
fade_btn = gr.Button("π Apply Fades", variant="secondary") |
|
fade_in_ms = gr.Slider( |
|
minimum=0, |
|
maximum=5000, |
|
value=100, |
|
step=10, |
|
label="Fade In (ms)" |
|
) |
|
fade_out_ms = gr.Slider( |
|
minimum=0, |
|
maximum=5000, |
|
value=100, |
|
step=10, |
|
label="Fade Out (ms)" |
|
) |
|
|
|
with gr.Accordion("β‘ Time & Speed Effects", open=True): |
|
with gr.Row(): |
|
reverse_btn = gr.Button("β©οΈ Reverse Audio", variant="secondary") |
|
speed_btn = gr.Button("β© Change Speed", variant="secondary") |
|
speed_factor = gr.Slider( |
|
minimum=0.25, |
|
maximum=4.0, |
|
value=1.0, |
|
step=0.1, |
|
label="Speed Factor" |
|
) |
|
|
|
with gr.Accordion("βοΈ Audio Cleanup", open=True): |
|
with gr.Row(): |
|
trim_btn = gr.Button("π Trim Silence", variant="secondary") |
|
threshold_db = gr.Slider( |
|
minimum=-60, |
|
maximum=-10, |
|
value=-40, |
|
step=1, |
|
label="Silence Threshold (dB)" |
|
) |
|
|
|
|
|
status_msg = gr.Markdown("") |
|
|
|
with gr.Column(scale=1): |
|
|
|
audio_output = gr.Audio( |
|
label="π§ Processed Audio Result", |
|
type="numpy" |
|
) |
|
|
|
|
|
gr.Markdown("πΎ **Download:** Right-click the audio player above and select 'Save audio as...'") |
|
|
|
|
|
audio_input.change( |
|
fn=update_audio_info, |
|
inputs=[audio_input], |
|
outputs=[duration_info, stats_info] |
|
) |
|
|
|
|
|
normalize_btn.click( |
|
fn=apply_normalization, |
|
inputs=[audio_input, target_level], |
|
outputs=[audio_output, status_msg] |
|
) |
|
|
|
|
|
volume_btn.click( |
|
fn=apply_volume_adjustment, |
|
inputs=[audio_input, gain_db], |
|
outputs=[audio_output, status_msg] |
|
) |
|
|
|
|
|
fade_btn.click( |
|
fn=apply_fades, |
|
inputs=[audio_input, fade_in_ms, fade_out_ms], |
|
outputs=[audio_output, status_msg] |
|
) |
|
|
|
|
|
reverse_btn.click( |
|
fn=apply_reverse, |
|
inputs=[audio_input], |
|
outputs=[audio_output, status_msg] |
|
) |
|
|
|
|
|
speed_btn.click( |
|
fn=apply_speed_adjustment, |
|
inputs=[audio_input, speed_factor], |
|
outputs=[audio_output, status_msg] |
|
) |
|
|
|
|
|
trim_btn.click( |
|
fn=apply_silence_trimming, |
|
inputs=[audio_input, threshold_db], |
|
outputs=[audio_output, status_msg] |
|
) |
|
|
|
|
|
with gr.Accordion("π Effects Guide", open=False): |
|
gr.Markdown(""" |
|
**π Volume & Normalization:** |
|
- **Normalize**: Adjusts peak level to target dB (recommended: -3dB) |
|
- **Volume Gain**: Increase/decrease volume by specified dB |
|
|
|
**π Fade Effects:** |
|
- **Fade In**: Gradually increase volume from silence |
|
- **Fade Out**: Gradually decrease volume to silence |
|
|
|
**β‘ Time & Speed:** |
|
- **Reverse**: Play audio backwards |
|
- **Speed**: Change playback speed (1.0 = normal, 2.0 = double, 0.5 = half) |
|
|
|
**βοΈ Cleanup:** |
|
- **Trim Silence**: Remove quiet sections from start/end |
|
|
|
**Tips:** |
|
- Always check audio stats before processing |
|
- Watch for clipping warnings when increasing volume |
|
- Use normalization for consistent levels across multiple files |
|
- Combine effects by processing sequentially |
|
""") |
|
|