File size: 13,526 Bytes
fc8a81e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
import os
import re
import librosa
import gradio as gr

# --- Helper Functions ---

def seconds_to_cue_time(t):
    """Converts a time in seconds to the CUE sheet format (MM:SS:FF)."""
    t = max(0, t)
    minutes = int(t // 60)
    seconds = int(t % 60)
    frames = int((t - minutes * 60 - seconds) * 75)
    return f'{minutes:02d}:{seconds:02d}:{frames:02d}'

def parse_cue_time_to_seconds(time_str):
    """Parses MM:SS:FF into seconds. Returns None on failure."""
    if not time_str:
        return None
    match = re.match(r'(\d+):(\d{1,2}):(\d{1,2})', time_str)
    if match:
        m, s, f = map(int, match.groups())
        return m * 60 + s + f / 75.0
    return None

def format_cue_text(times, audio_filename="CDImage.wav"):
    """Generates the final CUE sheet string from a list of times."""
    if not times:
        return ""
    filename_no_ext = os.path.splitext(audio_filename)[0]
    cue_text = f'PERFORMER "Unknown Artist"\n'
    cue_text += f'TITLE "{filename_no_ext}"\n'
    cue_text += f'FILE "{audio_filename}" WAVE\n'

    # Always sort times before formatting to handle out-of-order additions from splitting
    sorted_times = sorted(list(set(times)))
    for idx, t in enumerate(sorted_times):
        cue_time_str = seconds_to_cue_time(t)
        cue_text += f'  TRACK {idx+1:02d} AUDIO\n'
        cue_text += f'    TITLE "Track {idx+1:02d}"\n'
        cue_text += f'    INDEX 01 {cue_time_str}\n'
    return cue_text

def generate_track_labels(times, audio_duration):
    """Creates descriptive labels for the checklist, including track length."""
    if not times:
        return []
    sorted_times = sorted(list(set(times)))
    track_choices = []
    for i, t in enumerate(sorted_times):
        track_length = (sorted_times[i+1] - t) if i < len(sorted_times) - 1 else (audio_duration - t)
        label = f"Track {i+1:02d} (Starts: {seconds_to_cue_time(t)}) [Length: {seconds_to_cue_time(track_length)}]"
        track_choices.append(label)
    return track_choices

# --- Core Gradio Functions ---
def analyze_audio_to_cue(audio_file, top_db, min_segment_len, merge_threshold, merge_protection_len):
    """Workflow 1: Analyzes an uploaded audio file to generate the initial CUE text."""
    if not audio_file:
        raise gr.Error("Please upload an audio file first.")
    
    # --- 1. Load Audio File ---
    try:
        y, sr = librosa.load(audio_file, sr=None)
        audio_duration = librosa.get_duration(y=y, sr=sr)
    except Exception as e:
        raise gr.Error(f"Could not load audio file: {e}")
    
    # --- 2. Detect Segments using Silence Detection ---
    intervals = librosa.effects.split(y, top_db=top_db)

    # Corrected way to check if NumPy array is empty
    times = [iv[0] / sr for iv in intervals if (iv[1] - iv[0]) / sr >= min_segment_len] if intervals.size > 0 else []

    # --- 3. Post-process Tracks (Add Start, Auto-Merge) ---
    if not times or times[0] > 0.5:
        times.insert(0, 0.0)
        
    # Auto-merging logic
    if len(times) > 1:
        final_times = [times[0]]
        i = 0
        while i < len(times) - 1:
            track_length = times[i+1] - times[i]
            
            # Merge if track is shorter than threshold AND not longer than protection length
            if (track_length < merge_threshold) and (track_length <= merge_protection_len):
                # Condition to MERGE is met. Skip adding the next timestamp.
                pass
            else:
                # Condition to KEEP is met.
                final_times.append(times[i+1])
            
            i += 1

        if len(final_times) > 1 and (audio_duration - final_times[-1]) < merge_threshold:
            final_times.pop()
        times = final_times

    # --- 4. Prepare Outputs for Gradio ---
    times = sorted(list(set(times)))
    audio_filename = os.path.basename(audio_file)
    initial_cue_text = format_cue_text(times, audio_filename)
    track_labels = generate_track_labels(times, audio_duration)
    
    # This function now returns everything needed to update the entire UI in one step.
    return (
        initial_cue_text, audio_filename, times, audio_duration,
        gr.update(choices=track_labels, value=[]), gr.update(visible=True)
    )

def parse_cue_and_update_ui(cue_text):
    """Workflow 2: Parses pasted CUE text. NOW returns the text itself to populate the output box."""
    if not cue_text or "INDEX 01" not in cue_text:
        return cue_text, "CDImage.wav", None, 0, gr.update(choices=[], value=[]), gr.update(visible=False)
    
    file_match = re.search(r'FILE\s+"([^"]+)"', cue_text, re.IGNORECASE)
    audio_filename = file_match.group(1) if file_match else "CDImage.wav"
    
    index_matches = re.findall(r'INDEX\s+\d+\s+([\d:]{7,8})', cue_text)
    times = [parse_cue_time_to_seconds(t) for t in index_matches if parse_cue_time_to_seconds(t) is not None]
    
    if not times:
        return cue_text, audio_filename, None, 0, gr.update(choices=[], value=[]), gr.update(visible=False)
        
    times = sorted(list(set(times)))
    # Estimate duration for UI labels. It's the last track's start time.
    # This is a limitation of text-only mode, but makes the tool usable.
    audio_duration = times[-1] if times else 0
    track_labels = generate_track_labels(times, audio_duration)
    
    return cue_text, audio_filename, times, audio_duration, gr.update(choices=track_labels, value=[]), gr.update(visible=True)

def update_editing_tools(selected_tracks, current_times, audio_duration):
    """Dynamically shows/hides Merge or Split tools based on selection count."""
    num_selected = len(selected_tracks)
    
    if num_selected == 1:
        # Configure and show Split UI
        # --- 1. Get track boundaries ---
        track_idx = int(selected_tracks[0].split(' ')[1]) - 1
        start_time = current_times[track_idx]
        end_time = audio_duration if (track_idx + 1) >= len(current_times) else current_times[track_idx + 1]

        # --- 2. [CORRECTION] Add padding to prevent splitting at the exact edges ---
        # A CUE sheet frame is 1/75s (~0.013s). We use a slightly larger padding.
        padding = 0.02
        
        new_min_time = start_time + padding
        new_max_time = end_time

        # --- 3. [CORRECTION] Check if the track is too short to be split ---
        if new_min_time >= new_max_time:
            # If the track is too short, splitting is not possible. Hide the tools.
            return (
                gr.update(visible=False), # Hide Merge button
                gr.update(visible=False), # Hide Split Group
                None,
                None
            )

        # --- 4. Configure and show the Split UI with the corrected range ---
        mid_point = start_time + (end_time - start_time) / 2
        
        return (
            gr.update(visible=False), # Hide Merge button
            gr.update(visible=True),  # Show Split Group
            # Use the new padded min/max values for the slider
            gr.update(minimum=new_min_time, maximum=new_max_time, value=mid_point), # Configure Slider 
            gr.update(value=f"Split at: {seconds_to_cue_time(mid_point)}") # Update slider label
        )
        
    elif num_selected > 1:
        # Show Merge UI
        return gr.update(visible=True), gr.update(visible=False), None, None
    else:
        # Hide everything
        return gr.update(visible=False), gr.update(visible=False), None, None

def perform_manual_merge(selected_tracks, original_times, audio_duration, audio_filename):
    """Merges selected tracks. The internal logic is robust and unchanged."""

    # --- 1. Identify which track start times to remove ---
    indices_to_merge = {int(label.split(' ')[1]) - 1 for label in selected_tracks}

    # --- 2. Create the new list of times ---
    # --- This logic correctly handles all merge cases. ---
    new_times = []
    # We iterate through the original times and decide which ones to KEEP.
    for i, time in enumerate(original_times):
        is_selected = i in indices_to_merge
        
        # Condition to KEEP a track's start time:
        # 1. It was NOT selected.
        # OR
        # 2. It WAS selected, BUT it's the start of a merge block.
        #    (This means it's the very first track, OR the track before it was NOT selected).
        if not is_selected or (i == 0) or ((i - 1) not in indices_to_merge):
            new_times.append(time)
    
    # --- 3. Prepare all the outputs to update the UI ---
    # The new CUE text for the textbox
    final_cue_text = format_cue_text(new_times, audio_filename)
    new_track_labels = generate_track_labels(new_times, audio_duration)
    
    # Return a tuple that will update the textbox, the state, and the checklist
    return final_cue_text, new_times, gr.update(choices=new_track_labels, value=[])


def perform_manual_split(split_time_sec, original_times, audio_duration, audio_filename):
    """Splits a track at the time specified by the slider."""
    if split_time_sec in original_times:
        raise gr.Error("This exact timestamp already exists.")
    
    new_times = sorted(original_times + [split_time_sec])
    final_cue_text = format_cue_text(new_times, audio_filename)
    new_track_labels = generate_track_labels(new_times, audio_duration)
    return final_cue_text, new_times, gr.update(choices=new_track_labels, value=[])


# --- Gradio User Interface Definition ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🎵 Advanced CUE Sheet Generator")
    
    # --- Hidden State Variables ---
    track_times_state = gr.State([])
    audio_duration_state = gr.State(0)
    audio_filename_state = gr.State("CDImage.wav")

    with gr.Tabs():
        with gr.TabItem("Start with Audio File"):
            gr.Markdown("Upload an audio file to automatically detect track points.")
            audio_input = gr.Audio(type="filepath", label="Upload Audio File")
            with gr.Accordion("Analysis Parameters", open=False):
                threshold_slider = gr.Slider(10, 80, 40, step=1, label="Silence Threshold (dB)")
                min_length_slider = gr.Slider(0.5, 30, 2, step=0.1, label="Min. Segment Length (s)")
                merge_length_slider = gr.Slider(1, 60, 15, step=1, label="Auto-Merge Threshold (s)")
                min_silence_length_slider = gr.Slider(0.5, 60, 5, step=0.1, label="Merge Protection Length (s)")
            generate_button = gr.Button("Analyze Audio", variant="primary")
        
        with gr.TabItem("Start with CUE Text"):
            gr.Markdown("Or paste CUE text below and click outside the box. The editing tools will appear automatically.")
            cue_text_input_for_paste = gr.Textbox(label="Paste CUE Text Here", lines=8)

    # The main output textbox is now outside the tabs, serving as a central display.
    output_text = gr.Textbox(label="CUE Sheet Output", lines=15, show_copy_button=True, interactive=True)

    with gr.Group(visible=False) as manual_editing_group:
        gr.Markdown("### Manual Editing Tools")
        track_checkboxes = gr.CheckboxGroup(label="Select Tracks to Edit")

        with gr.Row(visible=False) as merge_tools:
            merge_button = gr.Button("Merge Selected Tracks", variant="secondary", size="lg")

        with gr.Group(visible=False) as split_tools:
            split_slider_label = gr.Textbox(label="Current Split Time", interactive=False)
            split_slider = gr.Slider(label="Drag to select split point")
            split_button = gr.Button("Split Track at Selected Time", variant="secondary")

    # --- Event Wiring ---
    
    # Workflow 1: Audio analysis button now updates everything, including the editing tools.
    generate_button.click(
        fn=analyze_audio_to_cue,
        inputs=[audio_input, threshold_slider, min_length_slider, merge_length_slider, min_silence_length_slider],
        outputs=[output_text, audio_filename_state, track_times_state, audio_duration_state, track_checkboxes, manual_editing_group]
    )
    
    # Workflow 2: Pasting text in the dedicated input box populates the main output and enables tools.
    # The `.change` event now updates all necessary outputs in a single, direct step.
    cue_text_input_for_paste.change(
        fn=parse_cue_and_update_ui,
        inputs=[cue_text_input_for_paste],
        outputs=[output_text, audio_filename_state, track_times_state, audio_duration_state, track_checkboxes, manual_editing_group]
    )

    # Dynamic UI controller for showing/hiding Merge/Split tools
    track_checkboxes.change(
        fn=update_editing_tools,
        inputs=[track_checkboxes, track_times_state, audio_duration_state],
        outputs=[merge_tools, split_tools, split_slider, split_slider_label]
    )

    # Live update for the split slider's time display
    split_slider.input(
        fn=lambda t: f"Split at: {seconds_to_cue_time(t)}",
        inputs=[split_slider],
        outputs=[split_slider_label]
    )

    # Action buttons
    merge_button.click(
        fn=perform_manual_merge,
        inputs=[track_checkboxes, track_times_state, audio_duration_state, audio_filename_state],
        outputs=[output_text, track_times_state, track_checkboxes]
    )

    split_button.click(
        fn=perform_manual_split,
        inputs=[split_slider, track_times_state, audio_duration_state, audio_filename_state],
        outputs=[output_text, track_times_state, track_checkboxes]
    )

if __name__ == "__main__":
    demo.launch(inbrowser=True)