Spaces:
Sleeping
Sleeping
File size: 13,526 Bytes
fc8a81e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 |
import os
import re
import librosa
import gradio as gr
# --- Helper Functions ---
def seconds_to_cue_time(t):
"""Converts a time in seconds to the CUE sheet format (MM:SS:FF)."""
t = max(0, t)
minutes = int(t // 60)
seconds = int(t % 60)
frames = int((t - minutes * 60 - seconds) * 75)
return f'{minutes:02d}:{seconds:02d}:{frames:02d}'
def parse_cue_time_to_seconds(time_str):
"""Parses MM:SS:FF into seconds. Returns None on failure."""
if not time_str:
return None
match = re.match(r'(\d+):(\d{1,2}):(\d{1,2})', time_str)
if match:
m, s, f = map(int, match.groups())
return m * 60 + s + f / 75.0
return None
def format_cue_text(times, audio_filename="CDImage.wav"):
"""Generates the final CUE sheet string from a list of times."""
if not times:
return ""
filename_no_ext = os.path.splitext(audio_filename)[0]
cue_text = f'PERFORMER "Unknown Artist"\n'
cue_text += f'TITLE "{filename_no_ext}"\n'
cue_text += f'FILE "{audio_filename}" WAVE\n'
# Always sort times before formatting to handle out-of-order additions from splitting
sorted_times = sorted(list(set(times)))
for idx, t in enumerate(sorted_times):
cue_time_str = seconds_to_cue_time(t)
cue_text += f' TRACK {idx+1:02d} AUDIO\n'
cue_text += f' TITLE "Track {idx+1:02d}"\n'
cue_text += f' INDEX 01 {cue_time_str}\n'
return cue_text
def generate_track_labels(times, audio_duration):
"""Creates descriptive labels for the checklist, including track length."""
if not times:
return []
sorted_times = sorted(list(set(times)))
track_choices = []
for i, t in enumerate(sorted_times):
track_length = (sorted_times[i+1] - t) if i < len(sorted_times) - 1 else (audio_duration - t)
label = f"Track {i+1:02d} (Starts: {seconds_to_cue_time(t)}) [Length: {seconds_to_cue_time(track_length)}]"
track_choices.append(label)
return track_choices
# --- Core Gradio Functions ---
def analyze_audio_to_cue(audio_file, top_db, min_segment_len, merge_threshold, merge_protection_len):
"""Workflow 1: Analyzes an uploaded audio file to generate the initial CUE text."""
if not audio_file:
raise gr.Error("Please upload an audio file first.")
# --- 1. Load Audio File ---
try:
y, sr = librosa.load(audio_file, sr=None)
audio_duration = librosa.get_duration(y=y, sr=sr)
except Exception as e:
raise gr.Error(f"Could not load audio file: {e}")
# --- 2. Detect Segments using Silence Detection ---
intervals = librosa.effects.split(y, top_db=top_db)
# Corrected way to check if NumPy array is empty
times = [iv[0] / sr for iv in intervals if (iv[1] - iv[0]) / sr >= min_segment_len] if intervals.size > 0 else []
# --- 3. Post-process Tracks (Add Start, Auto-Merge) ---
if not times or times[0] > 0.5:
times.insert(0, 0.0)
# Auto-merging logic
if len(times) > 1:
final_times = [times[0]]
i = 0
while i < len(times) - 1:
track_length = times[i+1] - times[i]
# Merge if track is shorter than threshold AND not longer than protection length
if (track_length < merge_threshold) and (track_length <= merge_protection_len):
# Condition to MERGE is met. Skip adding the next timestamp.
pass
else:
# Condition to KEEP is met.
final_times.append(times[i+1])
i += 1
if len(final_times) > 1 and (audio_duration - final_times[-1]) < merge_threshold:
final_times.pop()
times = final_times
# --- 4. Prepare Outputs for Gradio ---
times = sorted(list(set(times)))
audio_filename = os.path.basename(audio_file)
initial_cue_text = format_cue_text(times, audio_filename)
track_labels = generate_track_labels(times, audio_duration)
# This function now returns everything needed to update the entire UI in one step.
return (
initial_cue_text, audio_filename, times, audio_duration,
gr.update(choices=track_labels, value=[]), gr.update(visible=True)
)
def parse_cue_and_update_ui(cue_text):
"""Workflow 2: Parses pasted CUE text. NOW returns the text itself to populate the output box."""
if not cue_text or "INDEX 01" not in cue_text:
return cue_text, "CDImage.wav", None, 0, gr.update(choices=[], value=[]), gr.update(visible=False)
file_match = re.search(r'FILE\s+"([^"]+)"', cue_text, re.IGNORECASE)
audio_filename = file_match.group(1) if file_match else "CDImage.wav"
index_matches = re.findall(r'INDEX\s+\d+\s+([\d:]{7,8})', cue_text)
times = [parse_cue_time_to_seconds(t) for t in index_matches if parse_cue_time_to_seconds(t) is not None]
if not times:
return cue_text, audio_filename, None, 0, gr.update(choices=[], value=[]), gr.update(visible=False)
times = sorted(list(set(times)))
# Estimate duration for UI labels. It's the last track's start time.
# This is a limitation of text-only mode, but makes the tool usable.
audio_duration = times[-1] if times else 0
track_labels = generate_track_labels(times, audio_duration)
return cue_text, audio_filename, times, audio_duration, gr.update(choices=track_labels, value=[]), gr.update(visible=True)
def update_editing_tools(selected_tracks, current_times, audio_duration):
"""Dynamically shows/hides Merge or Split tools based on selection count."""
num_selected = len(selected_tracks)
if num_selected == 1:
# Configure and show Split UI
# --- 1. Get track boundaries ---
track_idx = int(selected_tracks[0].split(' ')[1]) - 1
start_time = current_times[track_idx]
end_time = audio_duration if (track_idx + 1) >= len(current_times) else current_times[track_idx + 1]
# --- 2. [CORRECTION] Add padding to prevent splitting at the exact edges ---
# A CUE sheet frame is 1/75s (~0.013s). We use a slightly larger padding.
padding = 0.02
new_min_time = start_time + padding
new_max_time = end_time
# --- 3. [CORRECTION] Check if the track is too short to be split ---
if new_min_time >= new_max_time:
# If the track is too short, splitting is not possible. Hide the tools.
return (
gr.update(visible=False), # Hide Merge button
gr.update(visible=False), # Hide Split Group
None,
None
)
# --- 4. Configure and show the Split UI with the corrected range ---
mid_point = start_time + (end_time - start_time) / 2
return (
gr.update(visible=False), # Hide Merge button
gr.update(visible=True), # Show Split Group
# Use the new padded min/max values for the slider
gr.update(minimum=new_min_time, maximum=new_max_time, value=mid_point), # Configure Slider
gr.update(value=f"Split at: {seconds_to_cue_time(mid_point)}") # Update slider label
)
elif num_selected > 1:
# Show Merge UI
return gr.update(visible=True), gr.update(visible=False), None, None
else:
# Hide everything
return gr.update(visible=False), gr.update(visible=False), None, None
def perform_manual_merge(selected_tracks, original_times, audio_duration, audio_filename):
"""Merges selected tracks. The internal logic is robust and unchanged."""
# --- 1. Identify which track start times to remove ---
indices_to_merge = {int(label.split(' ')[1]) - 1 for label in selected_tracks}
# --- 2. Create the new list of times ---
# --- This logic correctly handles all merge cases. ---
new_times = []
# We iterate through the original times and decide which ones to KEEP.
for i, time in enumerate(original_times):
is_selected = i in indices_to_merge
# Condition to KEEP a track's start time:
# 1. It was NOT selected.
# OR
# 2. It WAS selected, BUT it's the start of a merge block.
# (This means it's the very first track, OR the track before it was NOT selected).
if not is_selected or (i == 0) or ((i - 1) not in indices_to_merge):
new_times.append(time)
# --- 3. Prepare all the outputs to update the UI ---
# The new CUE text for the textbox
final_cue_text = format_cue_text(new_times, audio_filename)
new_track_labels = generate_track_labels(new_times, audio_duration)
# Return a tuple that will update the textbox, the state, and the checklist
return final_cue_text, new_times, gr.update(choices=new_track_labels, value=[])
def perform_manual_split(split_time_sec, original_times, audio_duration, audio_filename):
"""Splits a track at the time specified by the slider."""
if split_time_sec in original_times:
raise gr.Error("This exact timestamp already exists.")
new_times = sorted(original_times + [split_time_sec])
final_cue_text = format_cue_text(new_times, audio_filename)
new_track_labels = generate_track_labels(new_times, audio_duration)
return final_cue_text, new_times, gr.update(choices=new_track_labels, value=[])
# --- Gradio User Interface Definition ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🎵 Advanced CUE Sheet Generator")
# --- Hidden State Variables ---
track_times_state = gr.State([])
audio_duration_state = gr.State(0)
audio_filename_state = gr.State("CDImage.wav")
with gr.Tabs():
with gr.TabItem("Start with Audio File"):
gr.Markdown("Upload an audio file to automatically detect track points.")
audio_input = gr.Audio(type="filepath", label="Upload Audio File")
with gr.Accordion("Analysis Parameters", open=False):
threshold_slider = gr.Slider(10, 80, 40, step=1, label="Silence Threshold (dB)")
min_length_slider = gr.Slider(0.5, 30, 2, step=0.1, label="Min. Segment Length (s)")
merge_length_slider = gr.Slider(1, 60, 15, step=1, label="Auto-Merge Threshold (s)")
min_silence_length_slider = gr.Slider(0.5, 60, 5, step=0.1, label="Merge Protection Length (s)")
generate_button = gr.Button("Analyze Audio", variant="primary")
with gr.TabItem("Start with CUE Text"):
gr.Markdown("Or paste CUE text below and click outside the box. The editing tools will appear automatically.")
cue_text_input_for_paste = gr.Textbox(label="Paste CUE Text Here", lines=8)
# The main output textbox is now outside the tabs, serving as a central display.
output_text = gr.Textbox(label="CUE Sheet Output", lines=15, show_copy_button=True, interactive=True)
with gr.Group(visible=False) as manual_editing_group:
gr.Markdown("### Manual Editing Tools")
track_checkboxes = gr.CheckboxGroup(label="Select Tracks to Edit")
with gr.Row(visible=False) as merge_tools:
merge_button = gr.Button("Merge Selected Tracks", variant="secondary", size="lg")
with gr.Group(visible=False) as split_tools:
split_slider_label = gr.Textbox(label="Current Split Time", interactive=False)
split_slider = gr.Slider(label="Drag to select split point")
split_button = gr.Button("Split Track at Selected Time", variant="secondary")
# --- Event Wiring ---
# Workflow 1: Audio analysis button now updates everything, including the editing tools.
generate_button.click(
fn=analyze_audio_to_cue,
inputs=[audio_input, threshold_slider, min_length_slider, merge_length_slider, min_silence_length_slider],
outputs=[output_text, audio_filename_state, track_times_state, audio_duration_state, track_checkboxes, manual_editing_group]
)
# Workflow 2: Pasting text in the dedicated input box populates the main output and enables tools.
# The `.change` event now updates all necessary outputs in a single, direct step.
cue_text_input_for_paste.change(
fn=parse_cue_and_update_ui,
inputs=[cue_text_input_for_paste],
outputs=[output_text, audio_filename_state, track_times_state, audio_duration_state, track_checkboxes, manual_editing_group]
)
# Dynamic UI controller for showing/hiding Merge/Split tools
track_checkboxes.change(
fn=update_editing_tools,
inputs=[track_checkboxes, track_times_state, audio_duration_state],
outputs=[merge_tools, split_tools, split_slider, split_slider_label]
)
# Live update for the split slider's time display
split_slider.input(
fn=lambda t: f"Split at: {seconds_to_cue_time(t)}",
inputs=[split_slider],
outputs=[split_slider_label]
)
# Action buttons
merge_button.click(
fn=perform_manual_merge,
inputs=[track_checkboxes, track_times_state, audio_duration_state, audio_filename_state],
outputs=[output_text, track_times_state, track_checkboxes]
)
split_button.click(
fn=perform_manual_split,
inputs=[split_slider, track_times_state, audio_duration_state, audio_filename_state],
outputs=[output_text, track_times_state, track_checkboxes]
)
if __name__ == "__main__":
demo.launch(inbrowser=True) |