Spaces:
Sleeping
Sleeping
import os | |
import re | |
import librosa | |
import gradio as gr | |
# --- Helper Functions --- | |
def seconds_to_cue_time(t): | |
"""Converts a time in seconds to the CUE sheet format (MM:SS:FF).""" | |
t = max(0, t) | |
minutes = int(t // 60) | |
seconds = int(t % 60) | |
frames = int((t - minutes * 60 - seconds) * 75) | |
return f'{minutes:02d}:{seconds:02d}:{frames:02d}' | |
def parse_cue_time_to_seconds(time_str): | |
"""Parses MM:SS:FF into seconds. Returns None on failure.""" | |
if not time_str: | |
return None | |
match = re.match(r'(\d+):(\d{1,2}):(\d{1,2})', time_str) | |
if match: | |
m, s, f = map(int, match.groups()) | |
return m * 60 + s + f / 75.0 | |
return None | |
def format_cue_text(times, audio_filename="CDImage.wav"): | |
"""Generates the final CUE sheet string from a list of times.""" | |
if not times: | |
return "" | |
filename_no_ext = os.path.splitext(audio_filename)[0] | |
cue_text = f'PERFORMER "Unknown Artist"\n' | |
cue_text += f'TITLE "{filename_no_ext}"\n' | |
cue_text += f'FILE "{audio_filename}" WAVE\n' | |
# Always sort times before formatting to handle out-of-order additions from splitting | |
sorted_times = sorted(list(set(times))) | |
for idx, t in enumerate(sorted_times): | |
cue_time_str = seconds_to_cue_time(t) | |
cue_text += f' TRACK {idx+1:02d} AUDIO\n' | |
cue_text += f' TITLE "Track {idx+1:02d}"\n' | |
cue_text += f' INDEX 01 {cue_time_str}\n' | |
return cue_text | |
def generate_track_labels(times, audio_duration): | |
"""Creates descriptive labels for the checklist, including track length.""" | |
if not times: | |
return [] | |
sorted_times = sorted(list(set(times))) | |
track_choices = [] | |
for i, t in enumerate(sorted_times): | |
track_length = (sorted_times[i+1] - t) if i < len(sorted_times) - 1 else (audio_duration - t) | |
label = f"Track {i+1:02d} (Starts: {seconds_to_cue_time(t)}) [Length: {seconds_to_cue_time(track_length)}]" | |
track_choices.append(label) | |
return track_choices | |
# --- Core Gradio Functions --- | |
def analyze_audio_to_cue(audio_file, top_db, min_segment_len, merge_threshold, merge_protection_len): | |
"""Workflow 1: Analyzes an uploaded audio file to generate the initial CUE text.""" | |
if not audio_file: | |
raise gr.Error("Please upload an audio file first.") | |
# --- 1. Load Audio File --- | |
try: | |
y, sr = librosa.load(audio_file, sr=None) | |
audio_duration = librosa.get_duration(y=y, sr=sr) | |
except Exception as e: | |
raise gr.Error(f"Could not load audio file: {e}") | |
# --- 2. Detect Segments using Silence Detection --- | |
intervals = librosa.effects.split(y, top_db=top_db) | |
# Corrected way to check if NumPy array is empty | |
times = [iv[0] / sr for iv in intervals if (iv[1] - iv[0]) / sr >= min_segment_len] if intervals.size > 0 else [] | |
# --- 3. Post-process Tracks (Add Start, Auto-Merge) --- | |
if not times or times[0] > 0.5: | |
times.insert(0, 0.0) | |
# Auto-merging logic | |
if len(times) > 1: | |
final_times = [times[0]] | |
i = 0 | |
while i < len(times) - 1: | |
track_length = times[i+1] - times[i] | |
# Merge if track is shorter than threshold AND not longer than protection length | |
if (track_length < merge_threshold) and (track_length <= merge_protection_len): | |
# Condition to MERGE is met. Skip adding the next timestamp. | |
pass | |
else: | |
# Condition to KEEP is met. | |
final_times.append(times[i+1]) | |
i += 1 | |
if len(final_times) > 1 and (audio_duration - final_times[-1]) < merge_threshold: | |
final_times.pop() | |
times = final_times | |
# --- 4. Prepare Outputs for Gradio --- | |
times = sorted(list(set(times))) | |
audio_filename = os.path.basename(audio_file) | |
initial_cue_text = format_cue_text(times, audio_filename) | |
track_labels = generate_track_labels(times, audio_duration) | |
# This function now returns everything needed to update the entire UI in one step. | |
return ( | |
initial_cue_text, audio_filename, times, audio_duration, | |
gr.update(choices=track_labels, value=[]), gr.update(visible=True) | |
) | |
def parse_cue_and_update_ui(cue_text): | |
"""Workflow 2: Parses pasted CUE text. NOW returns the text itself to populate the output box.""" | |
if not cue_text or "INDEX 01" not in cue_text: | |
return cue_text, "CDImage.wav", None, 0, gr.update(choices=[], value=[]), gr.update(visible=False) | |
file_match = re.search(r'FILE\s+"([^"]+)"', cue_text, re.IGNORECASE) | |
audio_filename = file_match.group(1) if file_match else "CDImage.wav" | |
index_matches = re.findall(r'INDEX\s+\d+\s+([\d:]{7,8})', cue_text) | |
times = [parse_cue_time_to_seconds(t) for t in index_matches if parse_cue_time_to_seconds(t) is not None] | |
if not times: | |
return cue_text, audio_filename, None, 0, gr.update(choices=[], value=[]), gr.update(visible=False) | |
times = sorted(list(set(times))) | |
# Estimate duration for UI labels. It's the last track's start time. | |
# This is a limitation of text-only mode, but makes the tool usable. | |
audio_duration = times[-1] if times else 0 | |
track_labels = generate_track_labels(times, audio_duration) | |
return cue_text, audio_filename, times, audio_duration, gr.update(choices=track_labels, value=[]), gr.update(visible=True) | |
def update_editing_tools(selected_tracks, current_times, audio_duration): | |
"""Dynamically shows/hides Merge or Split tools based on selection count.""" | |
num_selected = len(selected_tracks) | |
if num_selected == 1: | |
# Configure and show Split UI | |
# --- 1. Get track boundaries --- | |
track_idx = int(selected_tracks[0].split(' ')[1]) - 1 | |
start_time = current_times[track_idx] | |
end_time = audio_duration if (track_idx + 1) >= len(current_times) else current_times[track_idx + 1] | |
# --- 2. [CORRECTION] Add padding to prevent splitting at the exact edges --- | |
# A CUE sheet frame is 1/75s (~0.013s). We use a slightly larger padding. | |
padding = 0.02 | |
new_min_time = start_time + padding | |
new_max_time = end_time | |
# --- 3. [CORRECTION] Check if the track is too short to be split --- | |
if new_min_time >= new_max_time: | |
# If the track is too short, splitting is not possible. Hide the tools. | |
return ( | |
gr.update(visible=False), # Hide Merge button | |
gr.update(visible=False), # Hide Split Group | |
None, | |
None | |
) | |
# --- 4. Configure and show the Split UI with the corrected range --- | |
mid_point = start_time + (end_time - start_time) / 2 | |
return ( | |
gr.update(visible=False), # Hide Merge button | |
gr.update(visible=True), # Show Split Group | |
# Use the new padded min/max values for the slider | |
gr.update(minimum=new_min_time, maximum=new_max_time, value=mid_point), # Configure Slider | |
gr.update(value=f"Split at: {seconds_to_cue_time(mid_point)}") # Update slider label | |
) | |
elif num_selected > 1: | |
# Show Merge UI | |
return gr.update(visible=True), gr.update(visible=False), None, None | |
else: | |
# Hide everything | |
return gr.update(visible=False), gr.update(visible=False), None, None | |
def perform_manual_merge(selected_tracks, original_times, audio_duration, audio_filename): | |
"""Merges selected tracks. The internal logic is robust and unchanged.""" | |
# --- 1. Identify which track start times to remove --- | |
indices_to_merge = {int(label.split(' ')[1]) - 1 for label in selected_tracks} | |
# --- 2. Create the new list of times --- | |
# --- This logic correctly handles all merge cases. --- | |
new_times = [] | |
# We iterate through the original times and decide which ones to KEEP. | |
for i, time in enumerate(original_times): | |
is_selected = i in indices_to_merge | |
# Condition to KEEP a track's start time: | |
# 1. It was NOT selected. | |
# OR | |
# 2. It WAS selected, BUT it's the start of a merge block. | |
# (This means it's the very first track, OR the track before it was NOT selected). | |
if not is_selected or (i == 0) or ((i - 1) not in indices_to_merge): | |
new_times.append(time) | |
# --- 3. Prepare all the outputs to update the UI --- | |
# The new CUE text for the textbox | |
final_cue_text = format_cue_text(new_times, audio_filename) | |
new_track_labels = generate_track_labels(new_times, audio_duration) | |
# Return a tuple that will update the textbox, the state, and the checklist | |
return final_cue_text, new_times, gr.update(choices=new_track_labels, value=[]) | |
def perform_manual_split(split_time_sec, original_times, audio_duration, audio_filename): | |
"""Splits a track at the time specified by the slider.""" | |
if split_time_sec in original_times: | |
raise gr.Error("This exact timestamp already exists.") | |
new_times = sorted(original_times + [split_time_sec]) | |
final_cue_text = format_cue_text(new_times, audio_filename) | |
new_track_labels = generate_track_labels(new_times, audio_duration) | |
return final_cue_text, new_times, gr.update(choices=new_track_labels, value=[]) | |
# --- Gradio User Interface Definition --- | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# 🎵 Advanced CUE Sheet Generator") | |
# --- Hidden State Variables --- | |
track_times_state = gr.State([]) | |
audio_duration_state = gr.State(0) | |
audio_filename_state = gr.State("CDImage.wav") | |
with gr.Tabs(): | |
with gr.TabItem("Start with Audio File"): | |
gr.Markdown("Upload an audio file to automatically detect track points.") | |
audio_input = gr.Audio(type="filepath", label="Upload Audio File") | |
with gr.Accordion("Analysis Parameters", open=False): | |
threshold_slider = gr.Slider(10, 80, 40, step=1, label="Silence Threshold (dB)") | |
min_length_slider = gr.Slider(0.5, 30, 2, step=0.1, label="Min. Segment Length (s)") | |
merge_length_slider = gr.Slider(1, 60, 15, step=1, label="Auto-Merge Threshold (s)") | |
min_silence_length_slider = gr.Slider(0.5, 60, 5, step=0.1, label="Merge Protection Length (s)") | |
generate_button = gr.Button("Analyze Audio", variant="primary") | |
with gr.TabItem("Start with CUE Text"): | |
gr.Markdown("Or paste CUE text below and click outside the box. The editing tools will appear automatically.") | |
cue_text_input_for_paste = gr.Textbox(label="Paste CUE Text Here", lines=8) | |
# The main output textbox is now outside the tabs, serving as a central display. | |
output_text = gr.Textbox(label="CUE Sheet Output", lines=15, show_copy_button=True, interactive=True) | |
with gr.Group(visible=False) as manual_editing_group: | |
gr.Markdown("### Manual Editing Tools") | |
track_checkboxes = gr.CheckboxGroup(label="Select Tracks to Edit") | |
with gr.Row(visible=False) as merge_tools: | |
merge_button = gr.Button("Merge Selected Tracks", variant="secondary", size="lg") | |
with gr.Group(visible=False) as split_tools: | |
split_slider_label = gr.Textbox(label="Current Split Time", interactive=False) | |
split_slider = gr.Slider(label="Drag to select split point") | |
split_button = gr.Button("Split Track at Selected Time", variant="secondary") | |
# --- Event Wiring --- | |
# Workflow 1: Audio analysis button now updates everything, including the editing tools. | |
generate_button.click( | |
fn=analyze_audio_to_cue, | |
inputs=[audio_input, threshold_slider, min_length_slider, merge_length_slider, min_silence_length_slider], | |
outputs=[output_text, audio_filename_state, track_times_state, audio_duration_state, track_checkboxes, manual_editing_group] | |
) | |
# Workflow 2: Pasting text in the dedicated input box populates the main output and enables tools. | |
# The `.change` event now updates all necessary outputs in a single, direct step. | |
cue_text_input_for_paste.change( | |
fn=parse_cue_and_update_ui, | |
inputs=[cue_text_input_for_paste], | |
outputs=[output_text, audio_filename_state, track_times_state, audio_duration_state, track_checkboxes, manual_editing_group] | |
) | |
# Dynamic UI controller for showing/hiding Merge/Split tools | |
track_checkboxes.change( | |
fn=update_editing_tools, | |
inputs=[track_checkboxes, track_times_state, audio_duration_state], | |
outputs=[merge_tools, split_tools, split_slider, split_slider_label] | |
) | |
# Live update for the split slider's time display | |
split_slider.input( | |
fn=lambda t: f"Split at: {seconds_to_cue_time(t)}", | |
inputs=[split_slider], | |
outputs=[split_slider_label] | |
) | |
# Action buttons | |
merge_button.click( | |
fn=perform_manual_merge, | |
inputs=[track_checkboxes, track_times_state, audio_duration_state, audio_filename_state], | |
outputs=[output_text, track_times_state, track_checkboxes] | |
) | |
split_button.click( | |
fn=perform_manual_split, | |
inputs=[split_slider, track_times_state, audio_duration_state, audio_filename_state], | |
outputs=[output_text, track_times_state, track_checkboxes] | |
) | |
if __name__ == "__main__": | |
demo.launch(inbrowser=True) |