Spaces:

avans06
/

Audio-to-CUE-Generator

Sleeping

File size: 13,526 Bytes

fc8a81e

import os
import re
import librosa
import gradio as gr

# --- Helper Functions ---

def seconds_to_cue_time(t):
    """Converts a time in seconds to the CUE sheet format (MM:SS:FF)."""
    t = max(0, t)
    minutes = int(t // 60)
    seconds = int(t % 60)
    frames = int((t - minutes * 60 - seconds) * 75)
    return f'{minutes:02d}:{seconds:02d}:{frames:02d}'

def parse_cue_time_to_seconds(time_str):
    """Parses MM:SS:FF into seconds. Returns None on failure."""
    if not time_str:
        return None
    match = re.match(r'(\d+):(\d{1,2}):(\d{1,2})', time_str)
    if match:
        m, s, f = map(int, match.groups())
        return m * 60 + s + f / 75.0
    return None

def format_cue_text(times, audio_filename="CDImage.wav"):
    """Generates the final CUE sheet string from a list of times."""
    if not times:
        return ""
    filename_no_ext = os.path.splitext(audio_filename)[0]
    cue_text = f'PERFORMER "Unknown Artist"\n'
    cue_text += f'TITLE "{filename_no_ext}"\n'
    cue_text += f'FILE "{audio_filename}" WAVE\n'

    # Always sort times before formatting to handle out-of-order additions from splitting
    sorted_times = sorted(list(set(times)))
    for idx, t in enumerate(sorted_times):
        cue_time_str = seconds_to_cue_time(t)
        cue_text += f'  TRACK {idx+1:02d} AUDIO\n'
        cue_text += f'    TITLE "Track {idx+1:02d}"\n'
        cue_text += f'    INDEX 01 {cue_time_str}\n'
    return cue_text

def generate_track_labels(times, audio_duration):
    """Creates descriptive labels for the checklist, including track length."""
    if not times:
        return []
    sorted_times = sorted(list(set(times)))
    track_choices = []
    for i, t in enumerate(sorted_times):
        track_length = (sorted_times[i+1] - t) if i < len(sorted_times) - 1 else (audio_duration - t)
        label = f"Track {i+1:02d} (Starts: {seconds_to_cue_time(t)}) [Length: {seconds_to_cue_time(track_length)}]"
        track_choices.append(label)
    return track_choices

# --- Core Gradio Functions ---
def analyze_audio_to_cue(audio_file, top_db, min_segment_len, merge_threshold, merge_protection_len):
    """Workflow 1: Analyzes an uploaded audio file to generate the initial CUE text."""
    if not audio_file:
        raise gr.Error("Please upload an audio file first.")
    
    # --- 1. Load Audio File ---
    try:
        y, sr = librosa.load(audio_file, sr=None)
        audio_duration = librosa.get_duration(y=y, sr=sr)
    except Exception as e:
        raise gr.Error(f"Could not load audio file: {e}")
    
    # --- 2. Detect Segments using Silence Detection ---
    intervals = librosa.effects.split(y, top_db=top_db)

    # Corrected way to check if NumPy array is empty
    times = [iv[0] / sr for iv in intervals if (iv[1] - iv[0]) / sr >= min_segment_len] if intervals.size > 0 else []

    # --- 3. Post-process Tracks (Add Start, Auto-Merge) ---
    if not times or times[0] > 0.5:
        times.insert(0, 0.0)
        
    # Auto-merging logic
    if len(times) > 1:
        final_times = [times[0]]
        i = 0
        while i < len(times) - 1:
            track_length = times[i+1] - times[i]
            
            # Merge if track is shorter than threshold AND not longer than protection length
            if (track_length < merge_threshold) and (track_length <= merge_protection_len):
                # Condition to MERGE is met. Skip adding the next timestamp.
                pass
            else:
                # Condition to KEEP is met.
                final_times.append(times[i+1])
            
            i += 1

        if len(final_times) > 1 and (audio_duration - final_times[-1]) < merge_threshold:
            final_times.pop()
        times = final_times

    # --- 4. Prepare Outputs for Gradio ---
    times = sorted(list(set(times)))
    audio_filename = os.path.basename(audio_file)
    initial_cue_text = format_cue_text(times, audio_filename)
    track_labels = generate_track_labels(times, audio_duration)
    
    # This function now returns everything needed to update the entire UI in one step.
    return (
        initial_cue_text, audio_filename, times, audio_duration,
        gr.update(choices=track_labels, value=[]), gr.update(visible=True)
    )

def parse_cue_and_update_ui(cue_text):
    """Workflow 2: Parses pasted CUE text. NOW returns the text itself to populate the output box."""
    if not cue_text or "INDEX 01" not in cue_text:
        return cue_text, "CDImage.wav", None, 0, gr.update(choices=[], value=[]), gr.update(visible=False)
    
    file_match = re.search(r'FILE\s+"([^"]+)"', cue_text, re.IGNORECASE)
    audio_filename = file_match.group(1) if file_match else "CDImage.wav"
    
    index_matches = re.findall(r'INDEX\s+\d+\s+([\d:]{7,8})', cue_text)
    times = [parse_cue_time_to_seconds(t) for t in index_matches if parse_cue_time_to_seconds(t) is not None]
    
    if not times:
        return cue_text, audio_filename, None, 0, gr.update(choices=[], value=[]), gr.update(visible=False)
        
    times = sorted(list(set(times)))
    # Estimate duration for UI labels. It's the last track's start time.
    # This is a limitation of text-only mode, but makes the tool usable.
    audio_duration = times[-1] if times else 0
    track_labels = generate_track_labels(times, audio_duration)
    
    return cue_text, audio_filename, times, audio_duration, gr.update(choices=track_labels, value=[]), gr.update(visible=True)

def update_editing_tools(selected_tracks, current_times, audio_duration):
    """Dynamically shows/hides Merge or Split tools based on selection count."""
    num_selected = len(selected_tracks)
    
    if num_selected == 1:
        # Configure and show Split UI
        # --- 1. Get track boundaries ---
        track_idx = int(selected_tracks[0].split(' ')[1]) - 1
        start_time = current_times[track_idx]
        end_time = audio_duration if (track_idx + 1) >= len(current_times) else current_times[track_idx + 1]

        # --- 2. [CORRECTION] Add padding to prevent splitting at the exact edges ---
        # A CUE sheet frame is 1/75s (~0.013s). We use a slightly larger padding.
        padding = 0.02
        
        new_min_time = start_time + padding
        new_max_time = end_time

        # --- 3. [CORRECTION] Check if the track is too short to be split ---
        if new_min_time >= new_max_time:
            # If the track is too short, splitting is not possible. Hide the tools.
            return (
                gr.update(visible=False), # Hide Merge button
                gr.update(visible=False), # Hide Split Group
                None,
                None
            )

        # --- 4. Configure and show the Split UI with the corrected range ---
        mid_point = start_time + (end_time - start_time) / 2
        
        return (
            gr.update(visible=False), # Hide Merge button
            gr.update(visible=True),  # Show Split Group
            # Use the new padded min/max values for the slider
            gr.update(minimum=new_min_time, maximum=new_max_time, value=mid_point), # Configure Slider 
            gr.update(value=f"Split at: {seconds_to_cue_time(mid_point)}") # Update slider label
        )
        
    elif num_selected > 1:
        # Show Merge UI
        return gr.update(visible=True), gr.update(visible=False), None, None
    else:
        # Hide everything
        return gr.update(visible=False), gr.update(visible=False), None, None

def perform_manual_merge(selected_tracks, original_times, audio_duration, audio_filename):
    """Merges selected tracks. The internal logic is robust and unchanged."""

    # --- 1. Identify which track start times to remove ---
    indices_to_merge = {int(label.split(' ')[1]) - 1 for label in selected_tracks}

    # --- 2. Create the new list of times ---
    # --- This logic correctly handles all merge cases. ---
    new_times = []
    # We iterate through the original times and decide which ones to KEEP.
    for i, time in enumerate(original_times):
        is_selected = i in indices_to_merge
        
        # Condition to KEEP a track's start time:
        # 1. It was NOT selected.
        # OR
        # 2. It WAS selected, BUT it's the start of a merge block.
        #    (This means it's the very first track, OR the track before it was NOT selected).
        if not is_selected or (i == 0) or ((i - 1) not in indices_to_merge):
            new_times.append(time)
    
    # --- 3. Prepare all the outputs to update the UI ---
    # The new CUE text for the textbox
    final_cue_text = format_cue_text(new_times, audio_filename)
    new_track_labels = generate_track_labels(new_times, audio_duration)
    
    # Return a tuple that will update the textbox, the state, and the checklist
    return final_cue_text, new_times, gr.update(choices=new_track_labels, value=[])


def perform_manual_split(split_time_sec, original_times, audio_duration, audio_filename):
    """Splits a track at the time specified by the slider."""
    if split_time_sec in original_times:
        raise gr.Error("This exact timestamp already exists.")
    
    new_times = sorted(original_times + [split_time_sec])
    final_cue_text = format_cue_text(new_times, audio_filename)
    new_track_labels = generate_track_labels(new_times, audio_duration)
    return final_cue_text, new_times, gr.update(choices=new_track_labels, value=[])


# --- Gradio User Interface Definition ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🎵 Advanced CUE Sheet Generator")
    
    # --- Hidden State Variables ---
    track_times_state = gr.State([])
    audio_duration_state = gr.State(0)
    audio_filename_state = gr.State("CDImage.wav")

    with gr.Tabs():
        with gr.TabItem("Start with Audio File"):
            gr.Markdown("Upload an audio file to automatically detect track points.")
            audio_input = gr.Audio(type="filepath", label="Upload Audio File")
            with gr.Accordion("Analysis Parameters", open=False):
                threshold_slider = gr.Slider(10, 80, 40, step=1, label="Silence Threshold (dB)")
                min_length_slider = gr.Slider(0.5, 30, 2, step=0.1, label="Min. Segment Length (s)")
                merge_length_slider = gr.Slider(1, 60, 15, step=1, label="Auto-Merge Threshold (s)")
                min_silence_length_slider = gr.Slider(0.5, 60, 5, step=0.1, label="Merge Protection Length (s)")
            generate_button = gr.Button("Analyze Audio", variant="primary")
        
        with gr.TabItem("Start with CUE Text"):
            gr.Markdown("Or paste CUE text below and click outside the box. The editing tools will appear automatically.")
            cue_text_input_for_paste = gr.Textbox(label="Paste CUE Text Here", lines=8)

    # The main output textbox is now outside the tabs, serving as a central display.
    output_text = gr.Textbox(label="CUE Sheet Output", lines=15, show_copy_button=True, interactive=True)

    with gr.Group(visible=False) as manual_editing_group:
        gr.Markdown("### Manual Editing Tools")
        track_checkboxes = gr.CheckboxGroup(label="Select Tracks to Edit")

        with gr.Row(visible=False) as merge_tools:
            merge_button = gr.Button("Merge Selected Tracks", variant="secondary", size="lg")

        with gr.Group(visible=False) as split_tools:
            split_slider_label = gr.Textbox(label="Current Split Time", interactive=False)
            split_slider = gr.Slider(label="Drag to select split point")
            split_button = gr.Button("Split Track at Selected Time", variant="secondary")

    # --- Event Wiring ---
    
    # Workflow 1: Audio analysis button now updates everything, including the editing tools.
    generate_button.click(
        fn=analyze_audio_to_cue,
        inputs=[audio_input, threshold_slider, min_length_slider, merge_length_slider, min_silence_length_slider],
        outputs=[output_text, audio_filename_state, track_times_state, audio_duration_state, track_checkboxes, manual_editing_group]
    )
    
    # Workflow 2: Pasting text in the dedicated input box populates the main output and enables tools.
    # The `.change` event now updates all necessary outputs in a single, direct step.
    cue_text_input_for_paste.change(
        fn=parse_cue_and_update_ui,
        inputs=[cue_text_input_for_paste],
        outputs=[output_text, audio_filename_state, track_times_state, audio_duration_state, track_checkboxes, manual_editing_group]
    )

    # Dynamic UI controller for showing/hiding Merge/Split tools
    track_checkboxes.change(
        fn=update_editing_tools,
        inputs=[track_checkboxes, track_times_state, audio_duration_state],
        outputs=[merge_tools, split_tools, split_slider, split_slider_label]
    )

    # Live update for the split slider's time display
    split_slider.input(
        fn=lambda t: f"Split at: {seconds_to_cue_time(t)}",
        inputs=[split_slider],
        outputs=[split_slider_label]
    )

    # Action buttons
    merge_button.click(
        fn=perform_manual_merge,
        inputs=[track_checkboxes, track_times_state, audio_duration_state, audio_filename_state],
        outputs=[output_text, track_times_state, track_checkboxes]
    )

    split_button.click(
        fn=perform_manual_split,
        inputs=[split_slider, track_times_state, audio_duration_state, audio_filename_state],
        outputs=[output_text, track_times_state, track_checkboxes]
    )

if __name__ == "__main__":
    demo.launch(inbrowser=True)