# =================================================================
#
# Merged and Integrated Script for Audio/MIDI Processing and Rendering (Stereo Enhanced)
#
# This script combines two functionalities:
# 1. Transcribing audio to MIDI using two methods:
#    a) A general-purpose model (basic-pitch by Spotify).
#    b) A model specialized for solo piano (ByteDance).
#    - Includes stereo processing by splitting channels, transcribing independently, and merging MIDI.
# 2. Applying advanced transformations and re-rendering MIDI files using:
#    a) Standard SoundFonts via FluidSynth (produces stereo audio).
#    b) A custom 8-bit style synthesizer for a chiptune sound (updated for stereo output).
#
# The user can upload a Audio (e.g., WAV, MP3), or MIDI file.
# - If an audio file is uploaded, it is first transcribed to MIDI using the selected method.
# - The resulting MIDI (or an uploaded MIDI) can then be processed
#   with various effects and rendered into audio.
#
#================================================================
# Original sources:
# https://huggingface.co/spaces/asigalov61/ByteDance-Solo-Piano-Audio-to-MIDI-Transcription
# https://huggingface.co/spaces/asigalov61/Advanced-MIDI-Renderer
#================================================================
# Packages:
#
#   sudo apt install fluidsynth
#
# =================================================================
# Requirements:
#
#   pip install gradio torch pytz numpy scipy matplotlib networkx scikit-learn
#   pip install piano_transcription_inference huggingface_hub
#   pip install basic-pitch pretty_midi librosa soundfile
#
# =================================================================
# Core modules:
#
#   git clone --depth 1 https://github.com/asigalov61/tegridy-tools
#
# =================================================================

import io
import os
import hashlib
import time as reqtime
import copy
import librosa
import pyloudnorm as pyln
import soundfile as sf

import torch
import ffmpeg
import gradio as gr

# --- Imports for Vocal Separation ---
import torchaudio
from demucs.apply import apply_model
from demucs.pretrained import get_model
from demucs.audio import convert_audio

from src.piano_transcription.utils import initialize_app
from piano_transcription_inference  import PianoTranscription, utilities, sample_rate as transcription_sample_rate

# --- Import core transcription and MIDI processing libraries ---
from src import TMIDIX, TPLOTS
from src import MIDI
from src.midi_to_colab_audio import midi_to_colab_audio

# --- Imports for General Purpose Transcription (basic-pitch) ---
import basic_pitch
from basic_pitch.inference import predict
from basic_pitch import ICASSP_2022_MODEL_PATH

# --- Imports for 8-bit Synthesizer & MIDI Merging ---
import pretty_midi
import numpy as np
from scipy import signal

# =================================================================================================
# === Hugging Face SoundFont Downloader ===
# =================================================================================================
from huggingface_hub import hf_hub_download
import glob

# --- Define a constant for the 8-bit synthesizer option ---
SYNTH_8_BIT_LABEL = "None (8-bit Synthesizer)"

def prepare_soundfonts():
    """
    Ensures a default set of SoundFonts are downloaded, then scans the 'src/sf2'
    directory recursively for all .sf2 files.
    Returns a dictionary mapping a user-friendly name to its full file path, with
    default soundfonts listed first in their specified order.

    Downloads soundfont files from the specified Hugging Face Space repository
    to a local 'src/sf2' directory if they don't already exist.
    Returns a list of local paths to the soundfont files.
    """
    SF2_REPO_ID = "asigalov61/Advanced-MIDI-Renderer"
    SF2_DIR = "src/sf2"
    # This list is now just for ensuring default files exist
    # {"Super GM": 0, "Orpheus GM": 1, "Live HQ GM": 2, "Nice Strings + Orchestra": 3, "Real Choir": 4, "Super Game Boy": 5, "Proto Square": 6}
    DEFAULT_SF2_FILENAMES = [
        "SGM-v2.01-YamahaGrand-Guit-Bass-v2.7.sf2",
        "Orpheus_18.06.2020.sf2",
        "Live HQ Natural SoundFont GM.sf2",
        "Nice-Strings-PlusOrchestra-v1.6.sf2",
        "KBH-Real-Choir-V2.5.sf2",
        "SuperGameBoy.sf2",
        "ProtoSquare.sf2"
    ]

    # Create the target directory if it doesn't exist
    os.makedirs(SF2_DIR, exist_ok=True)
    
    # --- Step 1: Ensure default SoundFonts are available ---
    print("Checking for SoundFont files...")
    for filename in DEFAULT_SF2_FILENAMES:
        local_path = os.path.join(SF2_DIR, filename)
        
        # Check if the file already exists locally to avoid re-downloading
        if not os.path.exists(local_path):
            print(f"Downloading '{filename}' from Hugging Face Hub...")
            try:
                # Use hf_hub_download to get the file
                # It will be downloaded to the specified local directory
                hf_hub_download(
                    repo_id=SF2_REPO_ID,
                    repo_type='space',  # Specify that the repository is a Space
                    filename=f"{filename}",  # The path to the file within the repository
                    local_dir=SF2_DIR,
                    # local_dir_use_symlinks=False  # Copy file to the dir for a clean folder structure
                )
                print(f"'{filename}' downloaded successfully.")
            except Exception as e:
                print(f"Error downloading {filename}: {e}")
                # If download fails, we might not be able to use this soundfont

    # --- Step 2: Scan the entire directory for all .sf2 files ---
    print(f"Scanning '{SF2_DIR}' for all .sf2 files...")
    all_sfs_map = {}
    # Use glob with recursive=True to find all .sf2 files in subdirectories
    search_pattern = os.path.join(SF2_DIR, '**', '*.sf2')
    for full_path in glob.glob(search_pattern, recursive=True):
        # Create a user-friendly display name, including subfolder if it exists
        relative_path = os.path.relpath(full_path, SF2_DIR)
        display_name = os.path.splitext(relative_path)[0].replace("\\", "/") # Use forward slashes for consistency
        all_sfs_map[display_name] = full_path
        
    # --- Step 3: Create the final ordered dictionary based on priority ---
    ordered_soundfont_map = {}

    # Create display names for default files (filename without extension)
    default_display_names = [os.path.splitext(f)[0] for f in DEFAULT_SF2_FILENAMES]
    
    # Separate other files from the default ones
    other_display_names = [name for name in all_sfs_map.keys() if name not in default_display_names]
    other_display_names.sort() # Sort the rest alphabetically

    # Add default soundfonts first, maintaining the order from DEFAULT_SF2_FILENAMES
    for name in default_display_names:
        if name in all_sfs_map: # Check if the file was actually found by the scanner
            ordered_soundfont_map[name] = all_sfs_map[name]
            
    # Add all other soundfonts after the default ones
    for name in other_display_names:
        ordered_soundfont_map[name] = all_sfs_map[name]

    return ordered_soundfont_map

# =================================================================================================
# === 8-bit Style Synthesizer (Stereo Enabled) ===
# =================================================================================================
def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s, pulse_width,
                           vibrato_rate, vibrato_depth, bass_boost_level, fs=44100,
                           smooth_notes_level=0.0, continuous_vibrato_level=0.0, 
                           noise_level=0.0, distortion_level=0.0, 
                           fm_modulation_depth=0.0, fm_modulation_rate=0.0):
    """
    Synthesizes an 8-bit style audio waveform from a PrettyMIDI object.
    This function generates waveforms manually instead of using a synthesizer like FluidSynth.
    Includes an optional sub-octave bass booster with adjustable level.
    Instruments are panned based on their order in the MIDI file.
    Instrument 1 -> Left, Instrument 2 -> Right.
    Now supports graded levels for smoothing and vibrato continuity.
    """
    total_duration = midi_data.get_end_time()
    # Initialize a stereo waveform buffer (2 channels: Left, Right)
    waveform = np.zeros((2, int(total_duration * fs) + fs))
    
    num_instruments = len(midi_data.instruments)

    # Phase tracking: main oscillator phase for each instrument
    osc_phase = {}
    # Vibrato phase tracking
    vibrato_phase = 0.0

    for i, instrument in enumerate(midi_data.instruments):
        # --- Panning Logic ---
        # Default to center-panned mono
        pan_l, pan_r = 0.707, 0.707
        if num_instruments == 2:
            if i == 0:  # First instrument panned left
                pan_l, pan_r = 1.0, 0.0
            elif i == 1:  # Second instrument panned right
                pan_l, pan_r = 0.0, 1.0
        elif num_instruments > 2:
            if i == 0:  # Left
                pan_l, pan_r = 1.0, 0.0
            elif i == 1: # Right
                pan_l, pan_r = 0.0, 1.0
            # Other instruments remain centered

        osc_phase[i] = 0.0  # Independent phase tracking for each instrument

        for note in instrument.notes:
            freq = pretty_midi.note_number_to_hz(note.pitch)
            note_duration = note.end - note.start
            num_samples = int(note_duration * fs)
            if num_samples <= 0:
                continue

            t = np.arange(num_samples) / fs

            # --- Graded Continuous Vibrato ---
            # This now interpolates between a fully reset vibrato and a fully continuous one.
            # Use accumulated phase to avoid vibrato reset per note
            vib_phase_inc = 2 * np.pi * vibrato_rate / fs
            per_note_vib_phase = 2 * np.pi * vibrato_rate * t
            continuous_vib_phase = vibrato_phase + np.arange(num_samples) * vib_phase_inc
            
            # Weighted average of the two phase types
            final_vib_phase = (
                per_note_vib_phase * (1 - continuous_vibrato_level) + 
                continuous_vib_phase * continuous_vibrato_level
            )
            vibrato_lfo = vibrato_depth * np.sin(final_vib_phase)
            
            # Update the global vibrato phase for the next note
            if num_samples > 0:
                vibrato_phase = (continuous_vib_phase[-1] + vib_phase_inc) % (2 * np.pi)

            # --- Waveform Generation with FM ---
            fm_lfo = fm_modulation_depth * np.sin(2 * np.pi * fm_modulation_rate * t)
            modulated_freq = freq * (1 + fm_lfo)

            # --- Waveform Generation (Main Oscillator with phase continuity) ---
            phase_inc = 2 * np.pi * (modulated_freq + vibrato_lfo) / fs
            phase = osc_phase[i] + np.cumsum(phase_inc)
            if num_samples > 0:
                osc_phase[i] = phase[-1] % (2 * np.pi)  # Store last phase

            if waveform_type == 'Square':
                note_waveform = signal.square(phase, duty=pulse_width)
            elif waveform_type == 'Sawtooth':
                note_waveform = signal.sawtooth(phase)
            else: # Triangle
                note_waveform = signal.sawtooth(phase, width=0.5)
                
            # --- Bass Boost (Sub-Octave Oscillator) ---
            if bass_boost_level > 0:
                bass_freq = freq / 2.0
                # Only add bass if the frequency is reasonably audible
                if bass_freq > 20:
                    # Bass uses a simple square wave, no vibrato, for stability
                    bass_phase_inc = 2 * np.pi * bass_freq / fs
                    bass_phase = np.cumsum(np.full(num_samples, bass_phase_inc))
                    bass_sub_waveform = signal.square(bass_phase, duty=0.5)
                    # Mix the main and bass waveforms.
                    # As bass level increases, slightly decrease main waveform volume to prevent clipping.
                    main_level = 1.0 - (0.5 * bass_boost_level)
                    note_waveform = (note_waveform * main_level) + (bass_sub_waveform * bass_boost_level)

            # --- Noise & Distortion Simulation (White Noise) ---
            if noise_level > 0:
                note_waveform += np.random.uniform(-1, 1, num_samples) * noise_level

            # --- Distortion (Wave Shaping) ---
            if distortion_level > 0:
                # Using a tanh function for a smoother, "warmer" distortion
                note_waveform = np.tanh(note_waveform * (1 + distortion_level * 5))

            # --- ADSR Envelope ---
            start_amp = note.velocity / 127.0
            envelope = np.zeros(num_samples)

            if envelope_type == 'Plucky (AD Envelope)':
                attack_samples = min(int(0.005 * fs), num_samples)
                decay_samples = min(int(decay_time_s * fs), num_samples - attack_samples)

                envelope[:attack_samples] = np.linspace(0, start_amp, attack_samples)
                if decay_samples > 0:
                    envelope[attack_samples:attack_samples+decay_samples] = np.linspace(start_amp, 0, decay_samples)
            else: # Sustained
                envelope = np.linspace(start_amp, 0, num_samples)

            # --- Graded Note Smoothing ---
            # The level controls the length of the fade in/out. Max fade is 10ms.
            if smooth_notes_level > 0 and num_samples > 10:
                fade_length = int(fs * 0.01 * smooth_notes_level)
                fade_samples = min(fade_length, num_samples // 2)
                if fade_samples > 0:
                    envelope[:fade_samples] *= np.linspace(0.5, 1.0, fade_samples)
                    envelope[-fade_samples:] *= np.linspace(1.0, 0.0, fade_samples)

            # Apply envelope to the (potentially combined) waveform
            note_waveform *= envelope

            start_sample = int(note.start * fs)
            end_sample = start_sample + num_samples
            if end_sample > waveform.shape[1]:
                end_sample = waveform.shape[1]
                note_waveform = note_waveform[:end_sample-start_sample]

            # Add the mono note waveform to the stereo buffer with panning
            waveform[0, start_sample:end_sample] += note_waveform * pan_l
            waveform[1, start_sample:end_sample] += note_waveform * pan_r

    return waveform # Returns a (2, N) numpy array


def analyze_midi_velocity(midi_path):
    midi = pretty_midi.PrettyMIDI(midi_path)
    all_velocities = []
    
    print(f"Analyzing velocity for MIDI: {midi_path}")
    for i, instrument in enumerate(midi.instruments):
        velocities = [note.velocity for note in instrument.notes]
        all_velocities.extend(velocities)

        if velocities:
            print(f"Instrument {i} ({instrument.name}):")
            print(f"  Notes count: {len(velocities)}")
            print(f"  Velocity min: {min(velocities)}")
            print(f"  Velocity max: {max(velocities)}")
            print(f"  Velocity mean: {np.mean(velocities):.2f}")
        else:
            print(f"Instrument {i} ({instrument.name}): no notes found.")

    if all_velocities:
        print("\nOverall MIDI velocity stats:")
        print(f"  Total notes: {len(all_velocities)}")
        print(f"  Velocity min: {min(all_velocities)}")
        print(f"  Velocity max: {max(all_velocities)}")
        print(f"  Velocity mean: {np.mean(all_velocities):.2f}")
    else:
        print("No notes found in this MIDI.")


def scale_instrument_velocity(instrument, scale=0.8):
    for note in instrument.notes:
        note.velocity = max(1, min(127, int(note.velocity * scale)))
    

def normalize_loudness(audio_data, sample_rate, target_lufs=-23.0):
    """
    Normalizes the audio data to a target integrated loudness (LUFS).
    This provides more consistent perceived volume than peak normalization.
    
    Args:
        audio_data (np.ndarray): The audio signal.
        sample_rate (int): The sample rate of the audio.
        target_lufs (float): The target loudness in LUFS. Defaults to -23.0,
                             a common standard for broadcast.

    Returns:
        np.ndarray: The loudness-normalized audio data.
    """
    try:
        # 1. Measure the integrated loudness of the input audio
        meter = pyln.Meter(sample_rate) # create meter
        loudness = meter.integrated_loudness(audio_data)

        # 2. Calculate the gain needed to reach the target loudness
        # The gain is applied in the linear domain, so we convert from dB
        loudness_gain_db = target_lufs - loudness
        loudness_gain_linear = 10.0 ** (loudness_gain_db / 20.0)

        # 3. Apply the gain
        normalized_audio = audio_data * loudness_gain_linear

        # 4. Final safety check: peak normalize to prevent clipping, just in case
        # the loudness normalization results in peaks > 1.0
        peak_val = np.max(np.abs(normalized_audio))
        if peak_val > 1.0:
            normalized_audio /= peak_val
            print(f"Warning: Loudness normalization resulted in clipping. Audio was peak-normalized as a safeguard.")
        
        print(f"Audio normalized from {loudness:.2f} LUFS to target {target_lufs} LUFS.")
        return normalized_audio

    except Exception as e:
        print(f"Loudness normalization failed: {e}. Falling back to original audio.")
        return audio_data


# =================================================================================================
# === MIDI Merging Function ===
# =================================================================================================
def merge_midis(midi_path_left, midi_path_right, output_path):
    """
    Merges two MIDI files into a single MIDI file. This robust version iterates
    through ALL instruments in both MIDI files, ensuring no data is lost if the
    source files are multi-instrumental.
    
    It applies hard-left panning (Pan=0) to every instrument from the left MIDI
    and hard-right panning (Pan=127) to every instrument from the right MIDI.
    """
    try:
        analyze_midi_velocity(midi_path_left)
        analyze_midi_velocity(midi_path_right)
        midi_left = pretty_midi.PrettyMIDI(midi_path_left)
        midi_right = pretty_midi.PrettyMIDI(midi_path_right)
        
        merged_midi = pretty_midi.PrettyMIDI()

        # --- Process ALL instruments from the left channel MIDI ---
        if midi_left.instruments:
            print(f"Found {len(midi_left.instruments)} instrument(s) in the left channel MIDI.")
            # Use a loop to iterate through every instrument
            for instrument in midi_left.instruments:
                scale_instrument_velocity(instrument, scale=0.8)
                # To avoid confusion, we can prefix the instrument name
                instrument.name = f"Left - {instrument.name if instrument.name else 'Instrument'}"
                
                # Create and add the Pan Left control change
                # Create a Control Change event for Pan (controller number 10).
                # Set its value to 0 for hard left panning.
                # Add it at the very beginning of the track (time=0.0).
                pan_left = pretty_midi.ControlChange(number=10, value=0, time=0.0)
                # Use insert() to ensure the pan event is the very first one
                instrument.control_changes.insert(0, pan_left)
                
                # Append the fully processed instrument to the merged MIDI
                merged_midi.instruments.append(instrument)
            
        # --- Process ALL instruments from the right channel MIDI ---
        if midi_right.instruments:
            print(f"Found {len(midi_right.instruments)} instrument(s) in the right channel MIDI.")
            # Use a loop here as well
            for instrument in midi_right.instruments:
                scale_instrument_velocity(instrument, scale=0.8)
                instrument.name = f"Right - {instrument.name if instrument.name else 'Instrument'}"
                
                # Create and add the Pan Right control change
                # Create a Control Change event for Pan (controller number 10).
                # Set its value to 127 for hard right panning.
                # Add it at the very beginning of the track (time=0.0).
                pan_right = pretty_midi.ControlChange(number=10, value=127, time=0.0)
                instrument.control_changes.insert(0, pan_right)
                
                merged_midi.instruments.append(instrument)
            
        merged_midi.write(output_path)
        print(f"Successfully merged all instruments and panned into '{os.path.basename(output_path)}'")
        analyze_midi_velocity(output_path)
        return output_path
        
    except Exception as e:
        print(f"Error merging MIDI files: {e}")
        # Fallback logic remains the same
        if os.path.exists(midi_path_left):
            print("Fallback: Using only the left channel MIDI.")
            return midi_path_left
        return None


# =================================================================================================
# === Stage 1: Audio to MIDI Transcription Functions ===
# =================================================================================================

def TranscribePianoAudio(input_file):
    """
    Transcribes a WAV or MP3 audio file of a SOLO PIANO performance into a MIDI file.
    This uses the ByteDance model.
    Args:
        input_file_path (str): The path to the input audio file.
    Returns:
        str: The file path of the generated MIDI file.
    """
    print('=' * 70)
    print('STAGE 1: Starting Piano-Specific Transcription')
    print('=' * 70)
    
    # Generate a unique output filename for the MIDI
    fn = os.path.basename(input_file)
    fn1 = fn.split('.')[0]

    # Use os.path.join to create a platform-independent directory path
    output_dir = os.path.join("output", "transcribed_piano_")
    out_mid_path = os.path.join(output_dir, fn1 + '.mid')
    
    # Check for the directory's existence and create it if necessary
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    print('-' * 70)
    print(f'Input file name: {fn}')
    print(f'Output MIDI path: {out_mid_path}')
    print('-' * 70)
    
    # Load audio using the utility function
    print('Loading audio...')
    (audio, _) = utilities.load_audio(input_file, sr=transcription_sample_rate, mono=True)
    print('Audio loaded successfully.')
    print('-' * 70)
    
    # Initialize the transcription model
    # Use 'cuda' if a GPU is available and configured, otherwise 'cpu'
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f'Loading transcriptor model... device= {device}')
    transcriptor = PianoTranscription(device=device, checkpoint_path="src/models/CRNN_note_F1=0.9677_pedal_F1=0.9186.pth")
    print('Transcriptor loaded.')
    print('-' * 70)
    
    # Perform transcription
    print('Transcribing audio to MIDI (Piano-Specific)...')
    # This function call saves the MIDI file to the specified path
    transcriptor.transcribe(audio, out_mid_path)
    print('Piano transcription complete.')
    print('=' * 70)
    
    # Return the path to the newly created MIDI file
    return out_mid_path

def TranscribeGeneralAudio(input_file, onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool):
    """
    Transcribes a general audio file into a MIDI file using basic-pitch.
    This is suitable for various instruments and vocals.
    """
    print('=' * 70)
    print('STAGE 1: Starting General Purpose Transcription')
    print('=' * 70)
    
    fn = os.path.basename(input_file)
    fn1 = fn.split('.')[0]
    output_dir = os.path.join("output", "transcribed_general_")
    out_mid_path = os.path.join(output_dir, fn1 + '.mid')
    os.makedirs(output_dir, exist_ok=True)

    print(f'Input file: {fn}\nOutput MIDI: {out_mid_path}')
    
    # --- Perform transcription using basic-pitch ---
    print('Transcribing audio to MIDI (General Purpose)...')
    # The predict function handles audio loading internally
    model_output, midi_data, note_events = basic_pitch.inference.predict(
        audio_path=input_file,
        model_or_model_path=ICASSP_2022_MODEL_PATH,
        onset_threshold=onset_thresh,
        frame_threshold=frame_thresh,
        minimum_note_length=min_note_len,
        minimum_frequency=min_freq,
        maximum_frequency=max_freq,
        infer_onsets=infer_onsets_bool,
        melodia_trick=melodia_trick_bool,
        multiple_pitch_bends=multiple_bends_bool
    )
    
    # --- Save the MIDI file ---
    midi_data.write(out_mid_path)
    print('General transcription complete.')
    print('=' * 70)
    
    return out_mid_path

# =================================================================================================
# === Stage 2: MIDI Transformation and Rendering Function ===
# =================================================================================================

def Render_MIDI(input_midi_path, 
                render_type, 
                soundfont_bank, 
                render_sample_rate,
                render_with_sustains,
                merge_misaligned_notes,
                custom_render_patch,
                render_align,
                render_transpose_value,
                render_transpose_to_C4,
                render_output_as_solo_piano,
                render_remove_drums,
                # --- 8-bit synth params ---
                s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s, 
                s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
                s8bit_bass_boost_level, s8bit_smooth_notes_level, s8bit_continuous_vibrato_level,
                s8bit_noise_level, s8bit_distortion_level, s8bit_fm_modulation_depth, s8bit_fm_modulation_rate
                ):
    """
    Processes and renders a MIDI file according to user-defined settings.
    Can render using SoundFonts or a custom 8-bit synthesizer.
    Args:
        input_midi_path (str): The path to the input MIDI file.
        All other arguments are rendering options from the Gradio UI.
    Returns:
        A tuple containing all the output elements for the Gradio UI.
    """
    print('*' * 70)
    print('STAGE 2: Starting MIDI Rendering')
    print('*' * 70)

    # --- File and Settings Setup ---
    fn = os.path.basename(input_midi_path)
    fn1 = fn.split('.')[0]
    
    # Use os.path.join to create a platform-independent directory path
    output_dir = os.path.join("output", "rendered_midi")
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Now, join the clean directory path with the filename
    new_fn_path = os.path.join(output_dir, fn1 + '_rendered.mid')

    try:
        with open(input_midi_path, 'rb') as f:
            fdata = f.read()
        input_midi_md5hash = hashlib.md5(fdata).hexdigest()
    except FileNotFoundError:
        # Handle cases where the input file might not exist
        print(f"Error: Input MIDI file not found at {input_midi_path}")
        return [None] * 7 # Return empty values for all outputs

    print('=' * 70)
    print('Requested settings:')
    print(f'Input MIDI file name: {fn}')
    print(f'Input MIDI md5 hash: {input_midi_md5hash}')
    print('-' * 70)
    print(f'Render type: {render_type}')
    print(f'Soundfont bank: {soundfont_bank}')
    print(f'Audio render sample rate: {render_sample_rate}')
    # ... (add other print statements for settings if needed)
    print('=' * 70)
    
    # --- MIDI Processing using TMIDIX ---
    print('Processing MIDI... Please wait...')
    raw_score = MIDI.midi2single_track_ms_score(fdata)
    # call the function and store the returned list in a variable.
    processed_scores = TMIDIX.advanced_score_processor(raw_score, 
                                                   return_enhanced_score_notes=True, 
                                                   apply_sustain=render_with_sustains)
    # check if the returned list is empty. This happens when transcription finds no notes.
    # This check prevents the 'IndexError: list index out of range'.
    if not processed_scores:
        # If it is empty, print a warning and return a user-friendly error message.
        print("Warning: MIDI file contains no processable notes.")
        # The number of returned values must match the function's expected output.
        # We return a tuple with empty or placeholder values for all 7 output components.
        return ("N/A", fn1, "MIDI file contains no notes.", None, None, None, "No notes found.")

    # If the list is not empty, it is now safe to get the first element.
    escore = processed_scores[0]

    # Handle cases where the MIDI might not contain any notes
    if not escore:
        print("Warning: MIDI file contains no processable notes.")
        return ("N/A", fn1, "MIDI file contains no notes.",None, None, None, "No notes found.")

    # This line will now work correctly because merge_misaligned_notes is guaranteed to be an integer.
    if merge_misaligned_notes > 0:
        escore = TMIDIX.merge_escore_notes(escore, merge_threshold=merge_misaligned_notes)
        
    escore = TMIDIX.augment_enhanced_score_notes(escore, timings_divider=1)

    first_note_index = [e[0] for e in raw_score[1]].index('note')
    cscore = TMIDIX.chordify_score([1000, escore])

    meta_data = raw_score[1][:first_note_index] + [escore[0]] + [escore[-1]] + [raw_score[1][-1]]

    aux_escore_notes = TMIDIX.augment_enhanced_score_notes(escore, sort_drums_last=True)
    song_description = TMIDIX.escore_notes_to_text_description(aux_escore_notes)
    
    print('Done!')
    print('=' * 70)
    print('Input MIDI metadata:', meta_data[:5])
    print('=' * 70)
    print('Input MIDI song description:', song_description)
    print('=' * 70)
    print('Processing...Please wait...')

    # A deep copy of the score to be modified
    output_score = copy.deepcopy(escore)

    # Apply transformations based on render_type
    if render_type == "Extract melody":
        output_score = TMIDIX.add_melody_to_enhanced_score_notes(escore, return_melody=True)
        output_score = TMIDIX.recalculate_score_timings(output_score)
    elif render_type == "Flip":
        output_score = TMIDIX.flip_enhanced_score_notes(escore)
    elif render_type == "Reverse":
        output_score = TMIDIX.reverse_enhanced_score_notes(escore)
    elif render_type == 'Repair Durations':
        output_score = TMIDIX.fix_escore_notes_durations(escore, min_notes_gap=0)
    elif render_type == 'Repair Chords':
        fixed_cscore = TMIDIX.advanced_check_and_fix_chords_in_chordified_score(cscore)[0]
        output_score = TMIDIX.flatten(fixed_cscore)
    elif render_type == 'Remove Duplicate Pitches':
        output_score = TMIDIX.remove_duplicate_pitches_from_escore_notes(escore)
    elif render_type == "Add Drum Track":
        nd_escore = [e for e in escore if e[3] != 9]
        nd_escore = TMIDIX.augment_enhanced_score_notes(nd_escore)
        output_score = TMIDIX.advanced_add_drums_to_escore_notes(nd_escore)

        for e in output_score:
            e[1] *= 16
            e[2] *= 16

    print('MIDI processing complete.')
    print('=' * 70)

    # --- Final Processing and Patching ---
    if render_type != "Render as-is":
        print('Applying final adjustments (transpose, align, patch)...')
        if custom_render_patch != -1: # -1 indicates no change
            for e in output_score:
                if e[3] != 9: # not a drum channel
                    e[6] = custom_render_patch
    
        if render_transpose_value != 0:
            output_score = TMIDIX.transpose_escore_notes(output_score, render_transpose_value)

        if render_transpose_to_C4:
            output_score = TMIDIX.transpose_escore_notes_to_pitch(output_score, 60) # C4 is MIDI pitch 60
        
        if render_align == "Start Times":
            output_score = TMIDIX.recalculate_score_timings(output_score)
            output_score = TMIDIX.align_escore_notes_to_bars(output_score)

        elif render_align == "Start Times and Durations":
            output_score = TMIDIX.recalculate_score_timings(output_score)
            output_score = TMIDIX.align_escore_notes_to_bars(output_score, trim_durations=True)
    
        elif render_align == "Start Times and Split Durations":
            output_score = TMIDIX.recalculate_score_timings(output_score)
            output_score = TMIDIX.align_escore_notes_to_bars(output_score, split_durations=True)

        if render_type == "Longest Repeating Phrase":
            zscore = TMIDIX.recalculate_score_timings(output_score)
            lrno_score = TMIDIX.escore_notes_lrno_pattern_fast(zscore)
    
            if lrno_score is not None:
                output_score = lrno_score
    
            else:
                output_score = TMIDIX.recalculate_score_timings(TMIDIX.escore_notes_middle(output_score, 50))
    
        if render_type == "Multi-Instrumental Summary":
            zscore = TMIDIX.recalculate_score_timings(output_score)
            c_escore_notes = TMIDIX.compress_patches_in_escore_notes_chords(zscore)
        
            if len(c_escore_notes) > 128:
                cmatrix = TMIDIX.escore_notes_to_image_matrix(c_escore_notes, filter_out_zero_rows=True, filter_out_duplicate_rows=True)
                smatrix = TPLOTS.square_image_matrix(cmatrix, num_pca_components=max(1, min(5, len(c_escore_notes) // 128)))
                output_score = TMIDIX.image_matrix_to_original_escore_notes(smatrix)
            
                for o in output_score:
                    o[1] *= 250
                    o[2] *= 250

        if render_output_as_solo_piano:
            output_score = TMIDIX.solo_piano_escore_notes(output_score, keep_drums=(not render_remove_drums))
        
        if render_remove_drums and not render_output_as_solo_piano:
            output_score = TMIDIX.strip_drums_from_escore_notes(output_score)
          
        if render_type == "Solo Piano Summary":
            sp_escore_notes = TMIDIX.solo_piano_escore_notes(output_score, keep_drums=False)
            zscore = TMIDIX.recalculate_score_timings(sp_escore_notes)
    
            if len(zscore) > 128:
            
                bmatrix = TMIDIX.escore_notes_to_binary_matrix(zscore)
                cmatrix = TMIDIX.compress_binary_matrix(bmatrix, only_compress_zeros=True)
                smatrix = TPLOTS.square_binary_matrix(cmatrix, interpolation_order=max(1, min(5, len(zscore) // 128)))
                output_score = TMIDIX.binary_matrix_to_original_escore_notes(smatrix)
    
                for o in output_score:
                    o[1] *= 200
                    o[2] *= 200

        print('Final adjustments complete.')
        print('=' * 70)
        
    # --- Saving Processed MIDI File ---
        # Save the transformed MIDI data
        SONG, patches, _ = TMIDIX.patch_enhanced_score_notes(output_score)
        
        # The underlying function mistakenly adds a '.mid' extension.
        # We must pass the path without the extension to compensate.
        path_without_ext = new_fn_path.rsplit('.mid', 1)[0]

        TMIDIX.Tegridy_ms_SONG_to_MIDI_Converter(SONG,
                                                  output_signature = 'Integrated-MIDI-Processor',
                                                  output_file_name = path_without_ext,
                                                  track_name='Processed Track',
                                                  list_of_MIDI_patches=patches
                                                  )
        midi_to_render_path = new_fn_path
    else:
        # If "Render as-is", use the original MIDI data
        with open(new_fn_path, 'wb') as f:
            f.write(fdata)
        midi_to_render_path = new_fn_path

    # --- Audio Rendering ---
    print('Rendering final audio...')

    # Select sample rate
    srate = int(render_sample_rate)
    
    # --- Conditional Rendering Logic ---
    if soundfont_bank == SYNTH_8_BIT_LABEL:
        print("Using 8-bit style synthesizer...")
        try:
            # Load the MIDI file with pretty_midi for manual synthesis
            midi_data_for_synth = pretty_midi.PrettyMIDI(midi_to_render_path)
            # Synthesize the waveform
            # --- Passing new FX parameters to the synthesis function ---
            audio = synthesize_8bit_style(
                midi_data_for_synth,
                s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s, 
                s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
                s8bit_bass_boost_level,
                fs=srate,
                smooth_notes_level=s8bit_smooth_notes_level,
                continuous_vibrato_level=s8bit_continuous_vibrato_level,
                noise_level=s8bit_noise_level,
                distortion_level=s8bit_distortion_level,
                fm_modulation_depth=s8bit_fm_modulation_depth,
                fm_modulation_rate=s8bit_fm_modulation_rate
            )
            # Normalize and prepare for Gradio
            peak_val = np.max(np.abs(audio))
            if peak_val > 0:
                audio /= peak_val
            # Transpose from (2, N) to (N, 2) and convert to int16 for Gradio
            audio_out = (audio.T * 32767).astype(np.int16)
        except Exception as e:
            print(f"Error during 8-bit synthesis: {e}")
            return [None] * 7
    else:
        print(f"Using SoundFont: {soundfont_bank}")
        # Get the full path from the global dictionary
        soundfont_path = soundfonts_dict.get(soundfont_bank)

        # Select soundfont
        if not soundfont_path or not os.path.exists(soundfont_path):
            # Error handling in case the selected file is not found
            error_msg = f"SoundFont '{soundfont_bank}' not found!"
            print(f"ERROR: {error_msg}")
            # Fallback to the first available soundfont if possible
            if soundfonts_dict:
                fallback_key = list(soundfonts_dict.keys())[0]
                soundfont_path = soundfonts_dict[fallback_key]
                print(f"Falling back to '{fallback_key}'.")
            else:
                # If no soundfonts are available at all, raise an error
                raise gr.Error("No SoundFonts are available for rendering!")

        with open(midi_to_render_path, 'rb') as f:
            midi_file_content = f.read()

        audio_out = midi_to_colab_audio(midi_file_content, 
                                    soundfont_path=soundfont_path, # Use the dynamically found path
                                    sample_rate=srate,
                                    output_for_gradio=True
                                    )
    
    print('Audio rendering complete.')
    print('=' * 70)

    # --- Preparing Outputs for Gradio ---
    with open(midi_to_render_path, 'rb') as f:
        new_md5_hash = hashlib.md5(f.read()).hexdigest()
    output_plot = TPLOTS.plot_ms_SONG(output_score, plot_title=f"Score of {fn1}", return_plt=True)

    output_midi_summary = str(meta_data)
    
    return new_md5_hash, fn1, output_midi_summary, midi_to_render_path, (srate, audio_out), output_plot, song_description


def analyze_midi_features(midi_data):
    """
    Analyzes a PrettyMIDI object to extract musical features for parameter recommendation.
    
    Args:
        midi_data (pretty_midi.PrettyMIDI): The MIDI data to analyze.

    Returns:
        dict or None: A dictionary containing features, or None if the MIDI is empty.
                      Features: 'note_count', 'instruments_count', 'duration',
                                'note_density', 'avg_velocity', 'pitch_range'.
    """
    all_notes = [note for instrument in midi_data.instruments for note in instrument.notes]
    note_count = len(all_notes)
    
    # Return None if the MIDI file has no notes to analyze.
    if note_count == 0:
        return None

    duration = midi_data.get_end_time()
    # Avoid division by zero for empty-duration MIDI files.
    if duration == 0:
        note_density = 0
    else:
        note_density = note_count / duration

    # --- Calculate new required features ---
    avg_velocity = sum(note.velocity for note in all_notes) / note_count
    avg_pitch = sum(note.pitch for note in all_notes) / note_count
    avg_note_length = sum(note.end - note.start for note in all_notes) / note_count

    # Calculate pitch range
    if note_count > 1:
        min_pitch = min(note.pitch for note in all_notes)
        max_pitch = max(note.pitch for note in all_notes)
        pitch_range = max_pitch - min_pitch
    else:
        pitch_range = 0

    return {
        'note_count': note_count,
        'instruments_count': len(midi_data.instruments),
        'duration': duration,
        'note_density': note_density, # Notes per second
        'avg_velocity': avg_velocity,
        'pitch_range': pitch_range, # In semitones
        'avg_pitch': avg_pitch,
        'avg_note_length': avg_note_length,
    }

def determine_waveform_type(features):
    """
    Determines the best waveform type based on analyzed MIDI features.
    - Square: Best for most general-purpose, bright melodies.
    - Sawtooth: Best for intense, heavy, or powerful leads and basses.
    - Triangle: Best for soft, gentle basses or flute-like sounds.
    
    Args:
        features (dict): The dictionary of features from analyze_midi_features.

    Returns:
        str: The recommended waveform type ('Square', 'Sawtooth', or 'Triangle').
    """
    # 1. Check for conditions that strongly suggest a Triangle wave (soft bassline)
    # MIDI Pitch 52 is ~G#3. If the average pitch is below this, it's likely a bass part.
    # If notes are long and the pitch range is narrow, it confirms a simple, melodic bassline.
    if features['avg_pitch'] <= 52 and features['avg_note_length'] >= 0.3 and features['pitch_range'] < 12:
        return "Triangle"

    # 2. Check for conditions that suggest a Sawtooth wave (intense/complex part)
    # High note density or a very wide pitch range often indicates an aggressive lead or a complex solo.
    # The sawtooth's rich harmonics are perfect for this.
    if features['note_density'] >= 6 or features['pitch_range'] >= 18:
        return "Sawtooth"

    # 3. Default to the most versatile waveform: Square
    return "Square"

def recommend_8bit_params(midi_data, default_preset):
    """
    Recommends 8-bit synthesizer parameters using a unified, factor-based model.
    This "AI" generates a sound profile based on normalized musical features.
    
    Args:
        midi_data (pretty_midi.PrettyMIDI): The MIDI data to analyze.
        default_preset (dict): A fallback preset if analysis fails.

    Returns:
        dict: A dictionary of recommended synthesizer parameters.
    """
    features = analyze_midi_features(midi_data)
    if features is None:
        # Return a default preset if MIDI is empty or cannot be analyzed
        return default_preset

    # --- Rule-based Parameter Recommendation ---
    params = {}

    # --- 1. Core Timbre Selection ---
    # Intelligent Waveform Selection
    params['waveform_type'] = determine_waveform_type(features)
    # Determine pulse width *after* knowing the waveform.
    # This only applies if the waveform is Square.
    if params['waveform_type'] == 'Square':
        # For Square waves, use pitch complexity to decide pulse width.
        # Complex melodies get a thinner sound (0.3) for clarity.
        # Simpler melodies get a fuller sound (0.5).
        params['pulse_width'] = 0.3 if features['pitch_range'] > 30 else 0.5
    else:
        # For Sawtooth or Triangle, pulse width is not applicable. Set a default.
        params['pulse_width'] = 0.5

    # --- 2. Envelope and Rhythm ---
    # Determine envelope type based on note density
    is_plucky = features['note_density'] > 10
    params['envelope_type'] = 'Plucky (AD Envelope)' if is_plucky else 'Sustained (Full Decay)'
    params['decay_time_s'] = 0.15 if is_plucky else 0.4
    
    # --- 3. Modulation (Vibrato) ---
    # Vibrato depth and rate based on velocity and density
    params['vibrato_depth'] = min(max((features['avg_velocity'] - 60) / 20, 0), 10) # More velocity = more depth
    if features['note_density'] > 12:
        params['vibrato_rate'] = 7.0 # Very fast music -> frantic vibrato
    elif features['note_density'] > 6:
        params['vibrato_rate'] = 5.0 # Moderately fast music -> standard vibrato
    else:
        params['vibrato_rate'] = 3.0 # Slow music -> gentle vibrato

    # --- 4. Progressive/Graded Parameters using Normalization ---
    
    # Smooth notes level (0.0 to 1.0): More smoothing for denser passages.
    # Effective range: 3 to 8 notes/sec.
    params['smooth_notes_level'] = min(max((features['note_density'] - 3) / 5.0, 0.0), 1.0) # Smoothen notes in denser passages

    # Continuous vibrato level (0.0 to 1.0): Less dense passages get more lyrical, continuous vibrato.
    # Effective range: 5 to 10 notes/sec. (Inverted)
    params['continuous_vibrato_level'] = 1.0 - min(max((features['note_density'] - 5) / 5.0, 0.0), 1.0) # Lyrical (less dense) music gets connected vibrato
    
    # Noise level (0.0 to 0.1): Higher velocity passages get more "air" or "grit".
    # Effective range: velocity 50 to 90.
    params['noise_level'] = min(max((features['avg_velocity'] - 50) / 40.0, 0.0), 1.0) * 0.1

    # Distortion level (0.0 to 0.1): Shorter notes get more distortion for punch.
    # Effective range: note length 0.5s down to 0.25s. (Inverted)
    if features['avg_note_length'] < 0.25: # Short, staccato notes
        params['distortion_level'] = 0.1
    elif features['avg_note_length'] < 0.5: # Medium length notes
        params['distortion_level'] = 0.05
    else: # Long, sustained notes
        params['distortion_level'] = 0.0

    # Progressive FM modulation based on a combined complexity factor.
    # Normalizes note density and pitch range to a 0-1 scale.
    density_factor = min(max((features['note_density'] - 5) / 15, 0), 1)  # Effective range 5-20 notes/sec
    range_factor = min(max((features['pitch_range'] - 15) / 30, 0), 1)    # Effective range 15-45 semitones

    # The overall complexity is the average of these two factors.
    complexity_factor = (density_factor + range_factor) / 2
    params['fm_modulation_depth'] = round(0.3 * complexity_factor, 3)
    params['fm_modulation_rate'] = round(200 * complexity_factor, 1)

    # Non-linear bass boost
    # REFINED LOGIC: Non-linear bass boost based on instrument count.
    # More instruments lead to less bass boost to avoid a muddy mix,
    # while solo or duo arrangements get a significant boost to sound fuller.
    # The boost level has a floor of 0.2 and a ceiling of 1.0.
    params['bass_boost_level'] = max(0.2, 1.0 - (features['instruments_count'] - 1) * 0.15)
    
    # Round all float values for cleaner output
    for key, value in params.items():
        if isinstance(value, float):
            params[key] = round(value, 3)

    return params


# =================================================================================================
# === Main Application Logic ===
# =================================================================================================


# --- Helper function to encapsulate the transcription pipeline for a single audio file ---
def _transcribe_stem(audio_path, base_name, temp_dir,
                     # Pass all transcription-related parameters
                     enable_stereo, transcription_method,
                     onset_thresh, frame_thresh, min_note_len, min_freq, max_freq,
                     infer_onsets_bool, melodia_trick_bool, multiple_bends_bool):
    """
    Takes a single audio file path and runs the full transcription pipeline on it.
    This includes stereo/mono handling and normalization.
    Returns the file path of the resulting transcribed MIDI.
    """
    print(f"\n--- Transcribing Stem: {os.path.basename(audio_path)} ---")
    
    # Load the audio stem to process it
    audio_data, native_sample_rate = librosa.load(audio_path, sr=None, mono=False)
    
    if enable_stereo and audio_data.ndim == 2 and audio_data.shape[0] == 2:
        print("Stereo processing enabled for stem.")
        left_channel_np = audio_data[0]
        right_channel_np = audio_data[1]
        
        normalized_left = normalize_loudness(left_channel_np, native_sample_rate)
        normalized_right = normalize_loudness(right_channel_np, native_sample_rate)
        
        temp_left_path = os.path.join(temp_dir, f"{base_name}_left.flac")
        temp_right_path = os.path.join(temp_dir, f"{base_name}_right.flac")
        
        sf.write(temp_left_path, normalized_left, native_sample_rate)
        sf.write(temp_right_path, normalized_right, native_sample_rate)
        
        print(f"Saved left channel to: {temp_left_path}")
        print(f"Saved right channel to: {temp_right_path}")
        
        print("Transcribing left and right channel...")
        if transcription_method == "General Purpose":
            midi_path_left = TranscribeGeneralAudio(temp_left_path, onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool)
            midi_path_right = TranscribeGeneralAudio(temp_right_path, onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool)
        else: # Piano-Specific
            midi_path_left = TranscribePianoAudio(temp_left_path)
            midi_path_right = TranscribePianoAudio(temp_right_path)
        
        if midi_path_left and midi_path_right:
            merged_midi_path = os.path.join(temp_dir, f"{base_name}_merged.mid")
            return merge_midis(midi_path_left, midi_path_right, merged_midi_path)
        elif midi_path_left:
            print("Warning: Right channel transcription failed. Using left channel only.")
            return midi_path_left
        elif midi_path_right:
            print("Warning: Left channel transcription failed. Using right channel only.")
            return midi_path_right
        else:
            print(f"Warning: Stereo transcription failed for stem {base_name}.")
            return None
    else:
        print("Mono processing for stem.")
        mono_signal_np = np.mean(audio_data, axis=0) if audio_data.ndim > 1 else audio_data
        normalized_mono = normalize_loudness(mono_signal_np, native_sample_rate)
        temp_mono_path = os.path.join(temp_dir, f"{base_name}_mono.flac")
        sf.write(temp_mono_path, normalized_mono, native_sample_rate)
        
        if transcription_method == "General Purpose":
            return TranscribeGeneralAudio(temp_mono_path, onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool)
        else:
            return TranscribePianoAudio(temp_mono_path)

# --- The main processing function is now significantly refactored ---
def process_and_render_file(input_file,
                            # --- Pass the preset selector value ---
                            s8bit_preset_selector,
                            separate_vocals,
                            remerge_vocals,
                            transcription_target,
                            # --- ADDED: New parameter from UI ---
                            transcribe_both_stems,
                            # --- Transcription params ---
                            enable_stereo_processing,
                            transcription_method,
                            onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool,
                            # --- MIDI rendering params ---
                            render_type, soundfont_bank, render_sample_rate,
                            render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
                            render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
                            # --- 8-bit synth params ---
                            s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s, 
                            s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
                            s8bit_bass_boost_level, s8bit_smooth_notes_level, s8bit_continuous_vibrato_level,
                            s8bit_noise_level, s8bit_distortion_level, s8bit_fm_modulation_depth, s8bit_fm_modulation_rate
                           ):
    """
    Main function to handle file processing. It determines the file type and calls the
    appropriate functions for transcription and/or rendering based on user selections.
    """
    start_time = reqtime.time()
    if input_file is None:
        # Return a list of updates to clear all output fields and UI controls
        return [gr.update(value=None)] * (7 + 13) # 7 results + 13 synth controls

    # The input_file from gr.Audio(type="filepath") is now the direct path (a string),
    # not a temporary file object. We no longer need to access the .name attribute.
    input_file_path = input_file
    filename = os.path.basename(input_file_path)
    print(f"Processing new file: {filename}")
    
    # This will store the other part if separation is performed
    other_part_tensor = None
    other_part_sr = None
    
    # --- Step 1: Check file type and transcribe if necessary ---
    if filename.lower().endswith(('.mid', '.midi', '.kar')):
        print("MIDI file detected. Cannot perform vocal separation. Proceeding directly to rendering.")
        midi_path_for_rendering = input_file_path
    else:
        print("Audio file detected. Starting pre-processing...")
        
        # --- Robust audio loading with ffmpeg fallback ---
        try:
            # Try loading directly with torchaudio (efficient for supported formats).
            # This works for formats like WAV, MP3, FLAC, OGG, etc.
            print("Attempting to load audio with torchaudio...")
            audio_tensor, native_sample_rate = torchaudio.load(input_file_path)
            print("Torchaudio loading successful.")
        except Exception as e:
            print(f"Torchaudio failed: {e}. Attempting fallback with ffmpeg...")
            try:
                # Use ffmpeg to convert the audio to WAV in-memory, then load the bytes.
                out, err = (
                    ffmpeg
                    .input(input_file_path)
                    .output('pipe:', format='flac')
                    .run(capture_stdout=True, capture_stderr=True)
                )
                # Load the WAV data from the in-memory buffer
                audio_tensor, native_sample_rate = torchaudio.load(io.BytesIO(out))
                print("FFmpeg fallback successful.")
            except Exception as ffmpeg_err:
                # If both direct loading and ffmpeg fallback fail, raise an error.
                raise gr.Error(f"Failed to load audio file with both torchaudio and ffmpeg.\n"
                               f"Torchaudio error: {e}\n"
                               f"FFmpeg error: {ffmpeg_err.decode() if isinstance(ffmpeg_err, bytes) else ffmpeg_err}")
        
        base_name = os.path.splitext(filename)[0]
        temp_dir = "output/temp_transcribe"
        os.makedirs(temp_dir, exist_ok=True)

        # --- Demucs Vocal Separation Logic, now decides which stem to process ---
        if not separate_vocals:
            # --- Standard Workflow: Transcribe the original full audio ---
            print("Standard workflow: No vocal separation.")
            audio_to_transcribe_path = os.path.join(temp_dir, f"{base_name}_original.flac")
            torchaudio.save(audio_to_transcribe_path, audio_tensor, native_sample_rate)
            midi_path_for_rendering = _transcribe_stem(
                audio_to_transcribe_path, f"{base_name}_original", temp_dir,
                enable_stereo_processing, transcription_method,
                onset_thresh, frame_thresh, min_note_len, min_freq, max_freq,
                infer_onsets_bool, melodia_trick_bool, multiple_bends_bool
            )
        else:
            # --- Vocal Separation Workflow ---
            if demucs_model is None:
                raise gr.Error("Demucs model is not loaded. Cannot separate vocals.")
            
            # Convert to a common format (stereo, float32) that demucs expects
            audio_tensor = convert_audio(audio_tensor, native_sample_rate, demucs_model.samplerate, demucs_model.audio_channels)

            if torch.cuda.is_available():
                audio_tensor = audio_tensor.cuda()
            
            print("Separating audio with Demucs... This may take some time.")
            # --- Wrap the model call in a no_grad() context ---
            with torch.no_grad():
                all_stems = apply_model(
                    demucs_model,
                    audio_tensor[None], # The input shape is [batch, channels, samples]
                    device='cuda' if torch.cuda.is_available() else 'cpu',
                    progress=True,
                )[0] # Remove the batch dimension from the output
                
            # --- Clear CUDA cache immediately after use ---
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
                print("CUDA cache cleared.")

            # --- Robust stem handling to prevent CUDA errors ---
            # Instead of complex GPU indexing, we create a dictionary of stems on the CPU.
            # This is safer and more robust across different hardware.
            sources = {}
            for i, source_name in enumerate(demucs_model.sources):
                sources[source_name] = all_stems[i]

            vocals_tensor = sources['vocals']
            
            # Sum the other stems to create the accompaniment.
            # This loop is safer than a single complex indexing operation.
            accompaniment_tensor = torch.zeros_like(vocals_tensor)
            for source_name, stem_tensor in sources.items():
                if source_name != 'vocals':
                    accompaniment_tensor += stem_tensor

            # --- Save both stems to temporary files ---
            vocals_path = os.path.join(temp_dir, f"{base_name}_vocals.flac")
            accompaniment_path = os.path.join(temp_dir, f"{base_name}_accompaniment.flac")
            torchaudio.save(vocals_path, vocals_tensor.cpu(), demucs_model.samplerate)
            torchaudio.save(accompaniment_path, accompaniment_tensor.cpu(), demucs_model.samplerate)
            
            # --- Determine which stem is the primary target and which is the "other part" ---
            primary_target_path = vocals_path if transcription_target == "Transcribe Vocals" else accompaniment_path
            other_part_path = accompaniment_path if transcription_target == "Transcribe Vocals" else vocals_path
            
            # Store the audio tensor of the "other part" for potential audio re-merging
            other_part_tensor = accompaniment_tensor if transcription_target == "Transcribe Vocals" else vocals_tensor
            other_part_sr = demucs_model.samplerate
            print("Separation complete.")
            
            # --- Main Branching Logic: Transcribe one or both stems ---
            if not transcribe_both_stems:
                print(f"Transcribing primary target only: {os.path.basename(primary_target_path)}")
                midi_path_for_rendering = _transcribe_stem(
                    primary_target_path, os.path.splitext(os.path.basename(primary_target_path))[0], temp_dir,
                    enable_stereo_processing, transcription_method,
                    onset_thresh, frame_thresh, min_note_len, min_freq, max_freq,
                    infer_onsets_bool, melodia_trick_bool, multiple_bends_bool
                )
            else:
                print("Transcribing BOTH stems and merging the MIDI results.")
                
                # Transcribe the primary target
                midi_path_primary = _transcribe_stem(
                    primary_target_path, os.path.splitext(os.path.basename(primary_target_path))[0], temp_dir,
                    enable_stereo_processing, transcription_method,
                    onset_thresh, frame_thresh, min_note_len, min_freq, max_freq,
                    infer_onsets_bool, melodia_trick_bool, multiple_bends_bool
                )
                
                # Transcribe the other part
                midi_path_other = _transcribe_stem(
                    other_part_path, os.path.splitext(os.path.basename(other_part_path))[0], temp_dir,
                    enable_stereo_processing, transcription_method,
                    onset_thresh, frame_thresh, min_note_len, min_freq, max_freq,
                    infer_onsets_bool, melodia_trick_bool, multiple_bends_bool
                )
                
                # Merge the two resulting MIDI files
                if midi_path_primary and midi_path_other:
                    final_merged_midi_path = os.path.join(temp_dir, f"{base_name}_full_transcription.mid")
                    print(f"Merging transcribed MIDI files into {os.path.basename(final_merged_midi_path)}")
                    
                    # A more robust MIDI merge is needed here
                    primary_midi = pretty_midi.PrettyMIDI(midi_path_primary)
                    other_midi = pretty_midi.PrettyMIDI(midi_path_other)
                    
                    # Add all instruments from the other midi to the primary one
                    for instrument in other_midi.instruments:
                        instrument.name = f"Other - {instrument.name}" # Rename to avoid confusion
                        primary_midi.instruments.append(instrument)
                    
                    primary_midi.write(final_merged_midi_path)
                    midi_path_for_rendering = final_merged_midi_path
                elif midi_path_primary:
                    print("Warning: Transcription of the 'other' part failed. Using primary transcription only.")
                    midi_path_for_rendering = midi_path_primary
                else:
                    raise gr.Error("Transcription of the primary target failed. Aborting.")
    
    # --- Step 2: Render the FINAL MIDI file with selected options ---
    
    # --- Auto-Recommendation Logic ---
    # Store the original parameters from the UI sliders into a dictionary.
    # The keys in this dictionary match the keys returned by recommend_8bit_params.
    synth_params = {
        'waveform_type': s8bit_waveform_type, 'pulse_width': s8bit_pulse_width, 'envelope_type': s8bit_envelope_type,
        'decay_time_s': s8bit_decay_time_s, 'vibrato_rate': s8bit_vibrato_rate, 'vibrato_depth': s8bit_vibrato_depth,
        'bass_boost_level': s8bit_bass_boost_level, 'smooth_notes_level': s8bit_smooth_notes_level, 'continuous_vibrato_level': s8bit_continuous_vibrato_level,
        'noise_level': s8bit_noise_level, 'distortion_level': s8bit_distortion_level,
        'fm_modulation_depth': s8bit_fm_modulation_depth, 'fm_modulation_rate': s8bit_fm_modulation_rate,
    }
    
    # This variable will hold the values to update the UI sliders
    ui_updates = {}

    # If the user selected the auto-recommend option, override the parameters
    if s8bit_preset_selector == "Auto-Recommend (Analyze MIDI)":
        print("Auto-Recommendation is enabled. Analyzing MIDI features...")
        try:
            midi_to_analyze = pretty_midi.PrettyMIDI(midi_path_for_rendering)
            default_params = S8BIT_PRESETS[FALLBACK_PRESET_NAME]
            recommended_params = recommend_8bit_params(midi_to_analyze, default_params)
            
            print("Recommended parameters:", recommended_params)
            # Both the synthesis parameters and the UI update values are set to the recommendations
            synth_params.update(recommended_params)
            ui_updates = recommended_params.copy() # Use a copy for UI updates
        except Exception as e:
            print(f"Could not auto-recommend parameters: {e}. Using default values from UI.")

    print(f"Proceeding to render MIDI file: {os.path.basename(midi_path_for_rendering)}")
    
    # --- Correctly pass parameters to Render_MIDI ---
    # The Render_MIDI function expects positional arguments, not keyword arguments.
    # We must unpack the values from our synth_params dictionary in the correct order.
    results = Render_MIDI(midi_path_for_rendering,
                          render_type, soundfont_bank, render_sample_rate,
                          render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
                          render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
                          # Unpack the values from the dictionary as positional arguments
                          synth_params['waveform_type'],
                          synth_params['envelope_type'],
                          synth_params['decay_time_s'],
                          synth_params['pulse_width'],
                          synth_params['vibrato_rate'],
                          synth_params['vibrato_depth'],
                          synth_params['bass_boost_level'],
                          synth_params['smooth_notes_level'],
                          synth_params['continuous_vibrato_level'],
                          synth_params['noise_level'],
                          synth_params['distortion_level'],
                          synth_params['fm_modulation_depth'],
                          synth_params['fm_modulation_rate']
                          )

    # --- Vocal Re-merging Logic now uses the generic "other_part" ---
    # IMPORTANT: This only runs if we did NOT transcribe both stems.
    if separate_vocals and remerge_vocals and not transcribe_both_stems and other_part_tensor is not None:
        print(f"Re-merging the non-transcribed part with newly rendered music...")
        
        rendered_srate, rendered_music_int16 = results[4]
        
        rendered_music_float = rendered_music_int16.astype(np.float32) / 32767.0
        rendered_music_tensor = torch.from_numpy(rendered_music_float).T
        
        if rendered_srate != other_part_sr:
            resampler = torchaudio.transforms.Resample(rendered_srate, other_part_sr)
            rendered_music_tensor = resampler(rendered_music_tensor)
            
        len_music = rendered_music_tensor.shape[1]
        len_other = other_part_tensor.shape[1]
        
        if len_music > len_other:
            padding = len_music - len_other
            other_part_tensor = torch.nn.functional.pad(other_part_tensor, (0, padding))
        elif len_other > len_music:
            padding = len_other - len_music
            rendered_music_tensor = torch.nn.functional.pad(rendered_music_tensor, (0, padding))
            
        merged_audio_tensor = rendered_music_tensor + other_part_tensor.cpu()
        
        max_abs = torch.max(torch.abs(merged_audio_tensor))
        if max_abs > 1.0:
            merged_audio_tensor /= max_abs
            
        merged_audio_int16 = (merged_audio_tensor.T.numpy() * 32767).astype(np.int16)
        
        new_results = list(results)
        new_results[4] = (other_part_sr, merged_audio_int16)
        results = tuple(new_results)
        print("Re-merging complete.")

    print(f'Total processing time: {(reqtime.time() - start_time):.2f} sec')
    print('*' * 70)
    
    # --- Prepare the final return value for Gradio ---
    
    # This list defines the order of UI components to be updated.
    # IT MUST MATCH THE ORDER IN `s8bit_updater_outputs` IN THE MAIN BLOCK.
    param_order = [
        'waveform_type', 'pulse_width', 'envelope_type', 'decay_time_s', 'vibrato_rate',
        'vibrato_depth', 'bass_boost_level', 'smooth_notes_level', 'continuous_vibrato_level',
        'noise_level', 'distortion_level', 'fm_modulation_depth', 'fm_modulation_rate'
    ]

    final_ui_updates = []
    if ui_updates: # If auto-recommendation was successful
        # We have new values, so we create a list of these values in the correct order.
        for param in param_order:
            final_ui_updates.append(ui_updates.get(param))
    else:
        # No auto-recommendation, so we tell Gradio not to change the UI.
        # We send a gr.update() for each UI component.
        for _ in param_order:
            final_ui_updates.append(gr.update())
    
    # The final return is a combination of the result values and the UI update values.
    return list(results) + final_ui_updates

# =================================================================================================
# === Gradio UI Setup ===
# =================================================================================================

if __name__ == "__main__":
    # Initialize the app: download model (if needed) and apply patches
    # Set to False if you don't have 'requests' or 'tqdm' installed
    initialize_app()
    
    # --- Prepare soundfonts and make the map globally accessible ---
    global soundfonts_dict
    # On application start, download SoundFonts from Hugging Face Hub if they don't exist.
    soundfonts_dict = prepare_soundfonts()
    print(f"Found {len(soundfonts_dict)} local SoundFonts.")

    if not soundfonts_dict:
        print("\nWARNING: No SoundFonts were found or could be downloaded.")
        print("Rendering with SoundFonts will fail. Only the 8-bit synthesizer will be available.")

    # --- Pre-load the Demucs model on startup for efficiency ---
    print("Loading Demucs model (htdemucs_ft), this may take a moment on first run...")
    try:
        demucs_model = get_model(name='htdemucs_ft')
        if torch.cuda.is_available():
            demucs_model = demucs_model.cuda()
        print("Demucs model loaded successfully.")
    except Exception as e:
        print(f"Warning: Could not load Demucs model. Vocal separation will not be available. Error: {e}")
        demucs_model = None

    # --- Define a constant for the fallback preset name ---
    # This prevents errors if the preset name is changed in the dictionary.
    FALLBACK_PRESET_NAME = "Generic Chiptune Loop"

    # --- Data structure for 8-bit synthesizer presets ---
    # Comprehensive preset dictionary with new FX parameters for all presets
    # Comprehensive preset dictionary including new JRPG and Handheld classics
    # Note: Vibrato depth is mapped to a representative value on the 0-50 Hz slider.
    S8BIT_PRESETS = {
        # --- Classic Chiptune ---
        "Mario (Super Mario Bros / スーパーマリオブラザーズ)": {
            # Description: A bright square wave with a per-note vibrato, producing the classic bouncy platformer sound.
            'waveform_type': 'Square', 'pulse_width': 0.3, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.25,
            'vibrato_rate': 5.0, 'vibrato_depth': 5,
            'smooth_notes_level': 0.8,
            'continuous_vibrato_level': 0.25,
            'bass_boost_level': 0.2, 'noise_level': 0.0, 'distortion_level': 0.0,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        "Mega Man (Rockman / ロックマン)": {
            # Description: A thin, sharp square wave lead with fast vibrato, iconic for its driving, heroic melodies.
            'waveform_type': 'Square', 'pulse_width': 0.2, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.15,
            'vibrato_rate': 6.0, 'vibrato_depth': 8,
            'smooth_notes_level': 0.9,
            'continuous_vibrato_level': 0.85,
            'bass_boost_level': 0.3, 'noise_level': 0.0, 'distortion_level': 0.05,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        "Zelda (The Legend of Zelda / ゼルダの伝説)": {
            # Description: The classic pure triangle wave lead, perfect for heroic and adventurous overworld themes.
            'waveform_type': 'Triangle', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.3,
            'vibrato_rate': 4.5, 'vibrato_depth': 4,
            'smooth_notes_level': 0.9,
            'continuous_vibrato_level': 0.9,
            'bass_boost_level': 0.15, 'noise_level': 0.0, 'distortion_level': 0.0,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        "Kirby's Bubbly Melody (Hoshi no Kirby / 星のカービィ)": {
            # Description: A soft, round square wave with a bouncy vibrato, creating a cheerful and adorable sound.
            'waveform_type': 'Square', 'pulse_width': 0.4, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.2,
            'vibrato_rate': 6.0, 'vibrato_depth': 4,
            'smooth_notes_level': 0.85,
            'continuous_vibrato_level': 0.3,   # Formerly False (0.0); adds a hint of continuity for more liveliness.
            'bass_boost_level': 0.1, 'noise_level': 0.0, 'distortion_level': 0.0,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        "Pokémon (Game Boy Classics / ポケットモンスター)": {
            # Description: A full, friendly square wave sound, capturing the cheerful and adventurous spirit of early handheld RPGs.
            'waveform_type': 'Square', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.22,
            'vibrato_rate': 5.0, 'vibrato_depth': 5,
            'smooth_notes_level': 0.9,
            'continuous_vibrato_level': 0.9,
            'bass_boost_level': 0.25, 'noise_level': 0.0, 'distortion_level': 0.0,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        "Castlevania (Akumajō Dracula / 悪魔城ドラキュラ)": {
            # Description: A sharp square wave with dramatic vibrato, ideal for fast, gothic, and baroque-inspired melodies.
            'waveform_type': 'Square', 'pulse_width': 0.25, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.18,
            'vibrato_rate': 6.5, 'vibrato_depth': 6,
            'smooth_notes_level': 0.85,
            'continuous_vibrato_level': 0.85,
            'bass_boost_level': 0.35, 'noise_level': 0.0, 'distortion_level': 0.0,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        "Final Fantasy (Arpeggio / ファイナルファンタジー)": {
            # Description: A perfect, clean square wave with zero vibrato, creating the iconic, crystal-clear arpeggio sound.
            'waveform_type': 'Square', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.22,
            'vibrato_rate': 5.0, 'vibrato_depth': 0,
            'smooth_notes_level': 0.9,
            'continuous_vibrato_level': 0.2,
            'bass_boost_level': 0.2, 'noise_level': 0.0, 'distortion_level': 0.0,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        "ONI V (Wafu Mystic / ONI V 隠忍を継ぐ者)": {
            # Description: A solemn triangle wave with a slow, expressive vibrato, evoking the mysterious atmosphere of Japanese folklore.
            'waveform_type': 'Triangle', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.4,
            'vibrato_rate': 3.5, 'vibrato_depth': 3,
            'smooth_notes_level': 0.9,
            'continuous_vibrato_level': 0.85,
            'bass_boost_level': 0.4, 'noise_level': 0.0, 'distortion_level': 0.0,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        # --- Advanced System Impressions ---
        "Commodore 64 (SID Feel)": {
            # Description: (Impression) Uses high-speed, shallow vibrato to mimic the characteristic "buzzy" texture of the SID chip's PWM.
            'waveform_type': 'Square', 'pulse_width': 0.25, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.25,
            'vibrato_rate': 8.0, 'vibrato_depth': 4,
            'smooth_notes_level': 0.9,
            'continuous_vibrato_level': 0.3,
            'bass_boost_level': 0.2, 'noise_level': 0.05, 'distortion_level': 0.1,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        "Megadrive/Genesis (FM Grit)": {
            # Description: (Impression) Uses FM, distortion, and noise to capture the gritty, metallic, and aggressive tone of the YM2612 chip.
            'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.18,
            'vibrato_rate': 0.0, 'vibrato_depth': 0,
            'smooth_notes_level': 0.0,
            'continuous_vibrato_level': 0.9,
            'bass_boost_level': 0.4, 'noise_level': 0.1, 'distortion_level': 0.2,
            'fm_modulation_depth': 0.2, 'fm_modulation_rate': 150
        },
        "PC-98 (Touhou Feel / 東方Project)": {
            # Description: (Impression) A very sharp square wave with fast FM, emulating the bright, high-energy leads of Japanese PC games.
            'waveform_type': 'Square', 'pulse_width': 0.15, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.12,
            'vibrato_rate': 7.5, 'vibrato_depth': 7,
            'smooth_notes_level': 0.95,
            'continuous_vibrato_level': 0.85,
            'bass_boost_level': 0.3, 'noise_level': 0.0, 'distortion_level': 0.0,
            'fm_modulation_depth': 0.1, 'fm_modulation_rate': 200
        },
        "Roland SC-88 (GM Vibe)": {
            # Description: (Impression) A clean, stable triangle wave with no effects, mimicking the polished, sample-based sounds of General MIDI.
            'waveform_type': 'Triangle', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.35,
            'vibrato_rate': 0, 'vibrato_depth': 0,
            'smooth_notes_level': 1.0,
            'continuous_vibrato_level': 0.0,
            'bass_boost_level': 0.1, 'noise_level': 0.0, 'distortion_level': 0.0,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        # --- Action & Rock Leads ---
        "Falcom Ys (Rock Lead / イース)": {
            # Description: A powerful sawtooth with slight distortion, emulating the driving rock organ and guitar leads of action JRPGs.
            'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.15,
            'vibrato_rate': 5.5, 'vibrato_depth': 6,
            'smooth_notes_level': 0.85,
            'continuous_vibrato_level': 0.8,
            'bass_boost_level': 0.4, 'noise_level': 0.05, 'distortion_level': 0.15,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        "Arcade Brawler Lead (Street Fighter / ストリートファイター)": {
            # Description: A gritty sawtooth lead with a hard attack, capturing the high-energy feel of classic fighting games.
            'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.15,
            'vibrato_rate': 5.0, 'vibrato_depth': 6,
            'smooth_notes_level': 0.8,
            'continuous_vibrato_level': 0.7,
            'bass_boost_level': 0.4, 'noise_level': 0.05, 'distortion_level': 0.1,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        "Rhythm Pop Lead (Rhythm Tengoku / リズム天国)": {
            # Description: A clean, round square wave perfect for the snappy, catchy feel of rhythm games.
            'waveform_type': 'Square', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.18,
            'vibrato_rate': 4.5, 'vibrato_depth': 4,
            'smooth_notes_level': 0.9,          # Formerly True -> 1.0; slightly reduced for a bit more attack.
            'continuous_vibrato_level': 0.8,    # Formerly True -> 1.0; slightly weakened for more defined note transitions.
            'bass_boost_level': 0.3, 'noise_level': 0.0, 'distortion_level': 0.0,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        # --- Epic & Orchestral Pads ---
        "Dragon Quest (Orchestral Feel / ドラゴンクエスト)": {
            # Description: A pure triangle wave with a long decay, mimicking the grand, orchestral feel of a classical flute or string section.
           'waveform_type': 'Triangle', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.6,
            'vibrato_rate': 3.0, 'vibrato_depth': 4,
            'smooth_notes_level': 0.9,
            'continuous_vibrato_level': 0.9,
            'bass_boost_level': 0.3, 'noise_level': 0.0, 'distortion_level': 0.0,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        "Mystic Mana Pad (Secret of Mana / 聖剣伝説2)": {
             # Description: A warm, ethereal square wave pad with slow vibrato, capturing a feeling of fantasy and wonder.
            'waveform_type': 'Square', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.5,
            'vibrato_rate': 2.5, 'vibrato_depth': 4,
            'smooth_notes_level': 1.0,
            'continuous_vibrato_level': 0.95,
            'bass_boost_level': 0.3, 'noise_level': 0.0, 'distortion_level': 0.0,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        "Modern JRPG Pad (Persona / ペルソナ)": {
            # Description: A warm, stylish square wave pad, capturing the modern, pop/jazz-infused feel of the Persona series.
            'waveform_type': 'Square', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.5,
            'vibrato_rate': 2.5, 'vibrato_depth': 4,
            'smooth_notes_level': 1.0,
            'continuous_vibrato_level': 0.95,
            'bass_boost_level': 0.3, 'noise_level': 0.0, 'distortion_level': 0.0,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        "Tactical Brass (Fire Emblem / ファイアーエムブレム)": {
            # Description: A powerful, sustained sawtooth emulating the bold, heroic synth-brass of Fire Emblem's tactical themes.
            'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.4,
            'vibrato_rate': 3.5, 'vibrato_depth': 5,
            'smooth_notes_level': 0.95,
            'continuous_vibrato_level': 0.9,
            'bass_boost_level': 0.5, 'noise_level': 0.1, 'distortion_level': 0.15,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        "Mecha & Tactics Brass (Super Robot Wars / スーパーロボット大戦)": {
            # Description: A powerful, sustained sawtooth emulating the bold, heroic synth-brass of strategy and mecha anime themes.
            'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.4,
            'vibrato_rate': 3.5, 'vibrato_depth': 5,
            'smooth_notes_level': 0.95,
            'continuous_vibrato_level': 0.9,
            'bass_boost_level': 0.5, 'noise_level': 0.1, 'distortion_level': 0.15,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        "Dark/Boss Atmosphere (Shin Megami Tensei / 真・女神転生)": {
            # Description: An aggressive sawtooth, inspired by the dark, rock-infused themes of SMT.
            'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.35,
            'vibrato_rate': 7.0, 'vibrato_depth': 12,
            'smooth_notes_level': 0.1,
            'continuous_vibrato_level': 0.0,
            'bass_boost_level': 0.4, 'noise_level': 0.15, 'distortion_level': 0.25,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        # --- Vocal Synthesis ---
        "8-Bit Vocal Lead": {
            # Description: A soft, sustained triangle wave with gentle vibrato to mimic a singing voice.
            'waveform_type': 'Triangle',
            'pulse_width': 0.5,
            'envelope_type': 'Sustained (Full Decay)',
            'decay_time_s': 0.8,
            'vibrato_rate': 5.5,
            'vibrato_depth': 4, # Mapped from the suggested 0.15 range
            'bass_boost_level': 0.1,
            'smooth_notes_level': 0.85,
            'continuous_vibrato_level': 0.9,
            'noise_level': 0.02,
            'distortion_level': 0.0,
            'fm_modulation_depth': 0.05,
            'fm_modulation_rate': 20
        },
        "8-Bit Male Vocal": {
            # Description: A deeper, fuller triangle wave with more bass and slower vibrato for a masculine feel.
            'waveform_type': 'Triangle',           
            'pulse_width': 0.5,
            'envelope_type': 'Sustained (Full Decay)',
            'decay_time_s': 1.0,
            'vibrato_rate': 5.0,
            'vibrato_depth': 3, # Mapped from the suggested 0.12 range
            'bass_boost_level': 0.3,
            'smooth_notes_level': 0.9,             
            'continuous_vibrato_level': 0.85,      
            'noise_level': 0.015,                  
            'distortion_level': 0.0,               
            'fm_modulation_depth': 0.08,
            'fm_modulation_rate': 25               
        },
        "8-Bit Female Vocal": {
            # Description: A brighter, lighter triangle wave with faster vibrato and less bass for a feminine feel.
            'waveform_type': 'Triangle',
            'pulse_width': 0.5,
            'envelope_type': 'Sustained (Full Decay)',
            'decay_time_s': 0.7,
            'vibrato_rate': 6.0,
            'vibrato_depth': 5, # Mapped from the suggested 0.18 range
            'bass_boost_level': 0.05,
            'smooth_notes_level': 0.85,
            'continuous_vibrato_level': 0.92,
            'noise_level': 0.025,
            'distortion_level': 0.0,
            'fm_modulation_depth': 0.04,
            'fm_modulation_rate': 30
        },
        "Lo-Fi Vocal": {
            # Description: A gritty, noisy square wave with a short decay to simulate a low-resolution vocal sample.
            'waveform_type': 'Square',
            'pulse_width': 0.48,
            'envelope_type': 'Plucky (AD Envelope)', # "Short" implies a plucky, not sustained, envelope
            'decay_time_s': 0.4,
            'vibrato_rate': 4.8,
            'vibrato_depth': 2, # Mapped from the suggested 0.10 range
            'bass_boost_level': 0.1,
            'smooth_notes_level': 0.65,
            'continuous_vibrato_level': 0.6,
            'noise_level': 0.05,
            'distortion_level': 0.05,
            'fm_modulation_depth': 0.02,
            'fm_modulation_rate': 20
        },
        # --- Sound FX & Experimental ---
        "Sci-Fi Energy Field": {
            # Description: (SFX) High-speed vibrato and noise create a constant, shimmering hum suitable for energy shields or force fields.
            'waveform_type': 'Triangle', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.4,
            'vibrato_rate': 10.0, 'vibrato_depth': 3,
            'smooth_notes_level': 0.85,
            'continuous_vibrato_level': 0.9,
            'bass_boost_level': 0.1, 'noise_level': 0.1, 'distortion_level': 0.0,
            'fm_modulation_depth': 0.05, 'fm_modulation_rate': 50
        },
        "Industrial Alarm": {
            # Description: (SFX) Extreme vibrato rate on a sawtooth wave produces a harsh, metallic, dissonant alarm sound.
            'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.2,
            'vibrato_rate': 15.0, 'vibrato_depth': 8,
            'smooth_notes_level': 0.0,
            'continuous_vibrato_level': 0.0,
            'bass_boost_level': 0.3, 'noise_level': 0.2, 'distortion_level': 0.3,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        "Laser Charge-Up": {
            # Description: (SFX) Extreme vibrato depth creates a dramatic, rising pitch effect, perfect for sci-fi weapon sounds.
            'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.3,
            'vibrato_rate': 4.0, 'vibrato_depth': 25,
            'smooth_notes_level': 0.9,
            'continuous_vibrato_level': 0.95,
            'bass_boost_level': 0.2, 'noise_level': 0.0, 'distortion_level': 0.0,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        "Unstable Machine Core": {
            # Description: (SFX) Maximum depth and distortion create a chaotic, atonal noise, simulating a machine on the verge of exploding.
            'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.5,
            'vibrato_rate': 1.0, 'vibrato_depth': 50,
            'smooth_notes_level': 0.0,
            'continuous_vibrato_level': 0.9,
            'bass_boost_level': 0.5, 'noise_level': 0.3, 'distortion_level': 0.4,
            'fm_modulation_depth': 0.5, 'fm_modulation_rate': 10
        },
        "Hardcore Gabber Kick": {
            # Description: (Experimental) Maximum bass boost and distortion create an overwhelmingly powerful, clipped kick drum sound.
            'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.1,
            'vibrato_rate': 0, 'vibrato_depth': 0,
            'smooth_notes_level': 0.0,
            'continuous_vibrato_level': 0.0,
            'bass_boost_level': 0.8, 'noise_level': 0.2, 'distortion_level': 0.5,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
        # --- Utility & Starting Points ---
        "Generic Chiptune Loop": {
            # Description: A well-balanced, pleasant square wave lead that serves as a great starting point for custom sounds.
            'waveform_type': 'Square', 'pulse_width': 0.25, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.2,
            'vibrato_rate': 5.5, 'vibrato_depth': 4,
            'smooth_notes_level': 0.9,
            'continuous_vibrato_level': 0.85,
            'bass_boost_level': 0.25, 'noise_level': 0.0, 'distortion_level': 0.0,
            'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
        },
    }

    # --- Data structure for basic_pitch transcription presets ---
    BASIC_PITCH_PRESETS = {
        # --- General & All-Purpose ---
        "Default (Balanced)": {
            'description': "A good all-around starting point for most music types.",
            'onset_thresh': 0.5, 'frame_thresh': 0.3, 'min_note_len': 128,
            'min_freq': 60, 'max_freq': 4000,
            'infer_onsets_bool': True, 'melodia_trick_bool': True, 'multiple_bends_bool': False
        },
        "Anime / J-Pop": {
            'description': "For tracks with clear melodies and pop/rock arrangements.",
            'onset_thresh': 0.5, 'frame_thresh': 0.3, 'min_note_len': 150,
            'min_freq': 40, 'max_freq': 2500,
            'infer_onsets_bool': True, 'melodia_trick_bool': True, 'multiple_bends_bool': True
        },
        
        # --- Specific Instruments ---
        "Solo Vocals": {
            'description': "Optimized for a single singing voice. Sensitive to nuances.",
            'onset_thresh': 0.4, 'frame_thresh': 0.3, 'min_note_len': 100,
            'min_freq': 80, 'max_freq': 1200,
            'infer_onsets_bool': True, 'melodia_trick_bool': True, 'multiple_bends_bool': True
        },
        "Solo Piano": {
            'description': "For solo piano with a wide dynamic and frequency range.",
            'onset_thresh': 0.4, 'frame_thresh': 0.3, 'min_note_len': 120,
            'min_freq': 27, 'max_freq': 4200,
            'infer_onsets_bool': True, 'melodia_trick_bool': True, 'multiple_bends_bool': True
        },
        "Acoustic Guitar": {
            'description': "Balanced for picked or strummed acoustic guitar.",
            'onset_thresh': 0.5, 'frame_thresh': 0.3, 'min_note_len': 90,
            'min_freq': 80, 'max_freq': 2500,
            'infer_onsets_bool': True, 'melodia_trick_bool': True, 'multiple_bends_bool': False
        },
        "Bass Guitar": {
            'description': "Isolates and transcribes only the low frequencies of a bassline.",
            'onset_thresh': 0.4, 'frame_thresh': 0.3, 'min_note_len': 100,
            'min_freq': 30, 'max_freq': 400,
            'infer_onsets_bool': True, 'melodia_trick_bool': True, 'multiple_bends_bool': False
        },
        "Percussion / Drums": {
            'description': "For drums and rhythmic elements. Catches fast, sharp hits.",
            'onset_thresh': 0.7, 'frame_thresh': 0.6, 'min_note_len': 30,
            'min_freq': 40, 'max_freq': 10000,
            'infer_onsets_bool': True, 'melodia_trick_bool': False, 'multiple_bends_bool': False
        },

        # --- Complex Genres ---
        "Rock / Metal": {
            'description': "Higher thresholds for distorted guitars, bass, and drums in a dense mix.",
            'onset_thresh': 0.6, 'frame_thresh': 0.4, 'min_note_len': 100,
            'min_freq': 50, 'max_freq': 3000,
            'infer_onsets_bool': True, 'melodia_trick_bool': True, 'multiple_bends_bool': True
        },
        "Jazz (Multi-instrument)": {
            'description': "High thresholds to separate notes in complex, improvisational passages.",
            'onset_thresh': 0.7, 'frame_thresh': 0.5, 'min_note_len': 150,
            'min_freq': 55, 'max_freq': 2000,
            'infer_onsets_bool': True, 'melodia_trick_bool': False, 'multiple_bends_bool': True
        },
        "Classical (Orchestral)": {
            'description': "Longer note length to focus on sustained notes and filter out performance noise.",
            'onset_thresh': 0.5, 'frame_thresh': 0.4, 'min_note_len': 200,
            'min_freq': 32, 'max_freq': 4200,
            'infer_onsets_bool': True, 'melodia_trick_bool': True, 'multiple_bends_bool': True
        },
        "Electronic / Synth": {
            'description': "Low thresholds and short note length for sharp, synthetic sounds.",
            'onset_thresh': 0.3, 'frame_thresh': 0.2, 'min_note_len': 50,
            'min_freq': 20, 'max_freq': 8000,
            'infer_onsets_bool': True, 'melodia_trick_bool': False, 'multiple_bends_bool': False
        }
    }


    # --- UI visibility logic now controls three components ---
    def update_vocal_ui_visibility(separate_vocals, remerge_audio):
        """Shows or hides the separation-related UI controls based on selections."""
        is_visible = gr.update(visible=separate_vocals)
        # The "Transcribe Both" checkbox is only visible if separation AND re-merging are active
        transcribe_both_visible = gr.update(visible=(separate_vocals and remerge_audio))
        return is_visible, is_visible, transcribe_both_visible

    def update_ui_visibility(transcription_method, soundfont_choice):
        """
        Dynamically updates the visibility of UI components based on user selections.
        """
        is_general = (transcription_method == "General Purpose")
        is_8bit = (soundfont_choice == SYNTH_8_BIT_LABEL)

        return {
            general_transcription_settings: gr.update(visible=is_general),
            synth_8bit_settings: gr.update(visible=is_8bit),
        }

    # --- Controller function to apply basic_pitch presets to the UI ---
    def apply_basic_pitch_preset(preset_name):
        if preset_name not in BASIC_PITCH_PRESETS:
            # If "Custom" is selected or name is invalid, don't change anything
            return {comp: gr.update() for comp in basic_pitch_ui_components}
        
        settings = BASIC_PITCH_PRESETS[preset_name]
        
        # Return a dictionary that maps each UI component to its new value
        return {
            onset_threshold: gr.update(value=settings['onset_thresh']),
            frame_threshold: gr.update(value=settings['frame_thresh']),
            minimum_note_length: gr.update(value=settings['min_note_len']),
            minimum_frequency: gr.update(value=settings['min_freq']),
            maximum_frequency: gr.update(value=settings['max_freq']),
            infer_onsets: gr.update(value=settings['infer_onsets_bool']),
            melodia_trick: gr.update(value=settings['melodia_trick_bool']),
            multiple_pitch_bends: gr.update(value=settings['multiple_bends_bool'])
        }

    # --- Function to apply 8-bit synthesizer presets ---
    # --- This function must be defined before the UI components that use it ---
    def apply_8bit_preset(preset_name):
        """
        Takes the name of a preset and returns a dictionary of gr.update objects
        to set the values of all 13 of the 8-bit synthesizer's UI components.
        """
        # --- Use a list of keys for consistent updates ---
        param_keys = [
            'waveform_type', 'pulse_width', 'envelope_type', 'decay_time_s', 'vibrato_rate',
            'vibrato_depth', 'bass_boost_level', 'smooth_notes_level', 'continuous_vibrato_level',
            'noise_level', 'distortion_level', 'fm_modulation_depth', 'fm_modulation_rate'
        ]
    
        # If the user selects "Custom" or the preset is not found, do not change the values.
        if preset_name == "Custom" or preset_name not in S8BIT_PRESETS:
            # When switching to custom, don't change any values, just return empty updates.
            return {comp: gr.update() for comp in s8bit_ui_components}
    
        # Get the settings dictionary for the chosen preset.
        settings = S8BIT_PRESETS[preset_name]
    
        # Create a dictionary mapping UI components to their new values from the preset.
        update_dict = {}
        for i, key in enumerate(param_keys):
            component = s8bit_ui_components[i]
            value = settings.get(key)
            if value is not None:
                update_dict[component] = gr.update(value=value)
            else:
                update_dict[component] = gr.update()
        return update_dict

    app = gr.Blocks(theme=gr.themes.Base())

    with app:
        gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Audio-to-MIDI & Advanced Renderer</h1>")
        gr.Markdown(
            "**Upload a Audio for transcription-then-rendering, or a MIDI for rendering-only.**\n\n"
            "This application combines piano audio transcription with a powerful MIDI transformation and rendering toolkit. "
            "Based on the work of [asigalov61](https://github.com/asigalov61)."
        )
        
        with gr.Row():
            waveform_options = gr.WaveformOptions(show_recording_waveform=False)
            with gr.Column(scale=1):
                # --- INPUT COLUMN ---
                gr.Markdown("## 1. Upload File")
                
                # Changed from gr.File to gr.Audio to allow for audio preview.
                # type="filepath" ensures the component returns a string path to the uploaded file.
                # The component will show a player for supported audio types (e.g., WAV, MP3).
                input_file = gr.Audio(
                    label="Input Audio or MIDI File",
                    type="filepath",
                    sources=["upload"], waveform_options=waveform_options
                )
                
                gr.Markdown("## 2. Configure Processing")

                # --- Transcription Method Selector ---
                transcription_method = gr.Radio(
                    ["General Purpose", "Piano-Specific"],
                    label="Audio Transcription Method",
                    value="General Purpose",
                    info="Choose 'General Purpose' for most music (vocals, etc.). Choose 'Piano-Specific' only for solo piano recordings."
                )
                
                # --- Stereo Processing Checkbox ---
                enable_stereo_processing = gr.Checkbox(
                    label="Enable Stereo Transcription",
                    value=False,
                    info="If checked, left/right audio channels are transcribed separately and merged. Doubles processing time."
                )

                # --- Vocal Separation Checkboxes ---
                with gr.Group(): 
                    separate_vocals = gr.Checkbox(
                        label="Separate Vocals",
                        value=False,
                        info="If checked, separates the audio into vocals and music stems before processing."
                    )
                    transcription_target = gr.Radio(
                        ["Transcribe Music (Accompaniment)", "Transcribe Vocals"],
                        label="Transcription Target",
                        value="Transcribe Music (Accompaniment)",
                        info="Choose which part of the separated audio to transcribe to MIDI.",
                        visible=False # Initially hidden
                    )
                    remerge_vocals = gr.Checkbox(
                        label="Re-merge Other Part with Rendered Audio",
                        value=False,
                        info="After rendering, merges the non-transcribed part (e.g., original vocals) back with the new music.",
                        visible=False # Initially hidden
                    )
                    # --- New checkbox for transcribing both stems ---
                    transcribe_both_stems = gr.Checkbox(
                        label="Transcribe Both Parts & Merge MIDI",
                        value=False,
                        info="If checked, transcribes BOTH vocals and music, then merges them into one MIDI file for rendering. Disables audio re-merging.",
                        visible=False # Initially hidden
                    )

                with gr.Accordion("General Purpose Transcription Settings", open=True) as general_transcription_settings:
                    # --- Preset dropdown for basic_pitch ---
                    basic_pitch_preset_selector = gr.Dropdown(
                        choices=["Custom"] + list(BASIC_PITCH_PRESETS.keys()),
                        value="Default (Balanced)",
                        label="Transcription Profile Preset",
                        info="Select a profile to auto-fill settings for different instrument types."
                         "For reference only; it is recommended to test and adjust for optimal results."
                    )
                
                    # --- The existing basic_pitch components ---
                    onset_threshold = gr.Slider(0.0, 1.0, value=0.5, step=0.05, label="On-set Threshold", info="Sensitivity for detecting note beginnings. Higher is stricter.")
                    frame_threshold = gr.Slider(0.0, 1.0, value=0.3, step=0.05, label="Frame Threshold", info="Sensitivity for detecting active notes. Higher is stricter.")
                    minimum_note_length = gr.Slider(10, 500, value=128, step=1, label="Minimum Note Length (ms)", info="Filters out very short, noisy notes.")
                    minimum_frequency = gr.Slider(0, 500, value=60, step=5, label="Minimum Frequency (Hz)", info="Ignores pitches below this frequency.")
                    maximum_frequency = gr.Slider(501, 10000, value=4000, step=10, label="Maximum Frequency (Hz)", info="Ignores pitches above this frequency.")
                    infer_onsets = gr.Checkbox(value=True, label="Infer Onsets (Boost Onsets)")
                    melodia_trick = gr.Checkbox(value=True, label="Melodia Trick (Contour Optimization)")
                    multiple_pitch_bends = gr.Checkbox(value=False, label="Allow Multiple Pitch Bends")

                # --- Rendering Settings ---
                render_type = gr.Radio(
                    ["Render as-is", "Custom render", "Extract melody", "Flip", "Reverse", "Repair Durations", "Repair Chords", "Remove Duplicate Pitches", "Longest Repeating Phrase", "Multi-Instrumental Summary", "Solo Piano Summary", "Add Drum Track"],
                    label="MIDI Transformation Render Type",
                    value="Render as-is",
                    info="Apply transformations to the MIDI before rendering. Select 'Render as-is' for basic rendering or other options for transformations."
                )
                
                # --- SoundFont Bank with 8-bit option ---
                # --- Dynamically create the list of choices ---
                soundfont_choices = [SYNTH_8_BIT_LABEL] + list(soundfonts_dict.keys())
                # Set a safe default value
                default_sf_choice = "SGM-v2.01-YamahaGrand-Guit-Bass-v2.7" if "SGM-v2.01-YamahaGrand-Guit-Bass-v2.7" in soundfonts_dict else (soundfont_choices[0] if soundfont_choices else "")
                
                soundfont_bank = gr.Dropdown(
                    soundfont_choices,
                    label="SoundFont / Synthesizer",
                    value=default_sf_choice
                )

                render_sample_rate = gr.Radio(
                    ["16000", "32000", "44100"],
                    label="Audio Sample Rate",
                    value="44100"
                )

                # --- 8-bit Synthesizer Settings ---
                #
                # =================================================================================
                # === 8-Bit Synthesizer Parameter Guide ===
                # =================================================================================
                #
                # --- Basic Tone Shaping ---
                #
                # Waveform Type: The fundamental timbre of the sound.
                #   - Square: The classic, bright, somewhat hollow sound of the NES. Its tone is heavily modified by Pulse Width.
                #   - Sawtooth: Aggressive, buzzy, and rich. Great for intense leads or gritty basslines.
                #   - Triangle: Soft, pure, and flute-like. Often used for basslines or gentler melodies.
                #
                # Pulse Width (Square Wave Only): Modifies the character of the Square wave.
                #   - Low (near 0.1) or High (near 0.9): Creates a thin, sharp, or nasal sound. A common choice for classic leads.
                #   - Mid (near 0.5): A "perfect" square wave. The sound is full, round, and most robust.
                #
                # Envelope Type: Shapes the volume of each note over its duration.
                #   - Plucky (AD): Creates a percussive, short sound that attacks instantly and then fades. Ideal for fast melodies and arpeggios.
                #   - Sustained (Full Decay): Creates a held-out sound that lasts for the note's full duration. Ideal for pads and atmospheric sounds.
                #
                # Decay Time (s): Controls how long a note's sound lasts (in the Plucky envelope).
                #   - Low: Very short, staccato notes.
                #   - High: Longer, more resonant notes that can bleed into each other.
                #
                # Bass Boost Level: Mixes in a sub-octave (a square wave one octave lower).
                #   - Low (or 0): The pure, original waveform.
                #   - High: Adds significant weight, thickness, and power to the sound.
                #
                # --- Modulation & Performance ---
                #
                # Vibrato Rate (Hz): The SPEED of the pitch wobble.
                #   - Low: A slow, gentle wavering effect.
                #   - High (8Hz+): A fast, frantic buzzing or trembling effect. Can create "ring-mod" style sounds at extreme values.
                #
                # Vibrato Depth (Hz): The INTENSITY of the pitch wobble.
                #   - Low (or 0): A very subtle effect, or no vibrato at all.
                #   - High: An extreme, dramatic pitch bend. Can sound chaotic or like a siren at extreme values.
                #
                # Smooth Notes (Checkbox):
                #   - Enabled: Applies a tiny fade-in/out to reduce clicking artifacts. Makes the sound slightly softer but cleaner.
                #   - Disabled: More abrupt, harsh note onsets. Can be desirable for an aggressive sound.
                #
                # Continuous Vibrato (Checkbox):
                #   - Enabled: The vibrato is smooth and connected across a musical phrase, creating a "singing" or legato effect.
                #   - Disabled: The vibrato resets on each new note, creating a bouncy, per-note, staccato effect (key for the "Mario" style).
                #
                # --- FX & Advanced Synthesis ---
                #
                # Noise Level: Mixes in white noise with the main waveform.
                #   - Low (or 0): No noise.
                #   - High: Adds "air," "grit," or a "hissing" quality. Essential for simulating percussion or creating wind-like sound effects.
                #
                # Distortion Level: Applies a wave-shaping algorithm to make the sound harsher.
                #   - Low (or 0): The clean, original sound.
                #   - High: Progressively crushes and saturates the waveform, creating a very aggressive, "fuzzy" or "broken" tone.
                #
                # FM Depth (Frequency Modulation): Controls the intensity of the frequency modulation.
                #   - Low (or 0): No FM effect.
                #   - High: The main frequency is more heavily altered by the FM Rate, creating complex, bell-like, metallic, or dissonant tones.
                #
                # FM Rate (Frequency Modulation): Controls the speed of the modulating oscillator.
                #   - Low: Creates a slow, vibrato-like or "wobbling" FM effect.
                #   - High: Creates fast modulation, resulting in bright, complex, often metallic harmonics and sidebands.
                # =================================================================================
                #
                # --- New option for auto-recommendation ---
                # Define the 8-bit UI components in one place for easy reference
                with gr.Accordion("8-bit Synthesizer Settings", open=False, visible=False) as synth_8bit_settings:
                    # --- Preset selector dropdown ---
                    s8bit_preset_selector = gr.Dropdown(
                        choices=["Custom", "Auto-Recommend (Analyze MIDI)"] + list(S8BIT_PRESETS.keys()),
                        value="Custom",
                        label="Style Preset",
                        info="Select a preset to auto-fill the settings below. Choose 'Custom' for manual control.\nFor reference and entertainment only. These presets are not guaranteed to be perfectly accurate."
                    )
                    
                    s8bit_waveform_type = gr.Dropdown(['Square', 'Sawtooth', 'Triangle'], value='Square', label="Waveform Type")
                    s8bit_pulse_width = gr.Slider(0.01, 0.99, value=0.5, step=0.01, label="Pulse Width (Square Wave Only)")
                    s8bit_envelope_type = gr.Dropdown(['Plucky (AD Envelope)', 'Sustained (Full Decay)'], value='Plucky (AD Envelope)', label="Envelope Type")
                    s8bit_decay_time_s = gr.Slider(0.01, 1.0, value=0.1, step=0.01, label="Decay Time (s)") # Increased max to 0.6 for DQ style
                    s8bit_vibrato_rate = gr.Slider(0, 20, value=5, label="Vibrato Rate (Hz)")
                    s8bit_vibrato_depth = gr.Slider(0, 50, value=0, label="Vibrato Depth (Hz)")
                    s8bit_bass_boost_level = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Bass Boost Level", info="Adjusts the volume of the sub-octave. 0 is off.")
                    s8bit_smooth_notes_level = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Smooth Notes Level", info="Level of fade-in/out to reduce clicks. 0=off, 1=max.")
                    s8bit_continuous_vibrato_level = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Continuous Vibrato Level", info="Controls vibrato continuity. 0=resets per note, 1=fully continuous.")
            
                    # --- New accordion for advanced effects ---
                    with gr.Accordion("Advanced Synthesis & FX", open=False):
                        s8bit_noise_level = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Noise Level", info="Mixes in white noise. Great for percussion or adding 'air'.")
                        s8bit_distortion_level = gr.Slider(minimum=0.0, maximum=0.9, value=0.0, step=0.05, label="Distortion Level", info="Applies wave-shaping distortion for a grittier, harsher sound.")
                        s8bit_fm_modulation_depth = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="FM Depth", info="Depth of Frequency Modulation. Creates complex, metallic, or bell-like tones.")
                        s8bit_fm_modulation_rate = gr.Slider(minimum=0.0, maximum=500.0, value=0.0, step=1.0, label="FM Rate", info="Rate of Frequency Modulation. Higher values create brighter, more complex harmonics.")

                # --- Original Advanced Options (Now tied to Piano-Specific) ---
                with gr.Accordion("Advanced MIDI Rendering Options", open=False) as advanced_rendering_options:
                    render_with_sustains = gr.Checkbox(label="Apply sustain pedal effects (if present)", value=True)
                    render_output_as_solo_piano = gr.Checkbox(label="Convert to Solo Piano (Grand Piano patch)", value=False)
                    render_remove_drums = gr.Checkbox(label="Remove drum track", value=False)
                    render_transpose_to_C4 = gr.Checkbox(label="Transpose entire score to center around C4", value=False)
                    render_transpose_value = gr.Slider(-12, 12, value=0, step=1, label="Transpose (semitones)")
                    custom_render_patch = gr.Slider(-1, 127, value=-1, step=1, label="Force MIDI Patch (-1 to disable)")
                    merge_misaligned_notes = gr.Slider(-1, 127, value=-1, label="Time to merge notes in ms (-1 to disable)")
                    render_align = gr.Radio(
                        ["Do not align", "Start Times", "Start Times and Durations", "Start Times and Split Durations"],
                        label="Align notes to musical bars",
                        value="Do not align"
                    )

                submit_btn = gr.Button("Process and Render", variant="primary")
                
            with gr.Column(scale=2):
                # --- OUTPUT COLUMN ---
                gr.Markdown("## 3. Results")
                output_midi_title = gr.Textbox(label="MIDI Title")
                output_song_description = gr.Textbox(label="MIDI Description", lines=3)
                output_audio = gr.Audio(label="Rendered Audio Output", format="wav", waveform_options=waveform_options)
                output_plot = gr.Plot(label="MIDI Score Plot")
                with gr.Row():
                    output_midi = gr.File(label="Download Processed MIDI File", file_types=[".mid"])
                    output_midi_md5 = gr.Textbox(label="Output MIDI MD5 Hash")
                output_midi_summary = gr.Textbox(label="MIDI metadata summary", lines=4)
                
        # Define all input components for the click event, excluding the preset selector which is not a direct input to the final processing.
        # all_inputs now includes the preset selector itself
        # Inputs for the main processing function
        all_inputs = [
            input_file, 
            s8bit_preset_selector,
            separate_vocals, 
            remerge_vocals,
            transcription_target,
            transcribe_both_stems,
            enable_stereo_processing,
            transcription_method, onset_threshold, frame_threshold, minimum_note_length,
            minimum_frequency, maximum_frequency, infer_onsets, melodia_trick, multiple_pitch_bends,
            render_type, soundfont_bank, render_sample_rate, render_with_sustains,
            merge_misaligned_notes, custom_render_patch, render_align, render_transpose_value,
            render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
            s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s, 
            s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth, s8bit_bass_boost_level,
            s8bit_smooth_notes_level, s8bit_continuous_vibrato_level,
            s8bit_noise_level, s8bit_distortion_level, s8bit_fm_modulation_depth, s8bit_fm_modulation_rate
        ]

        # Outputs for the main results
        result_outputs = [
            output_midi_md5, output_midi_title, output_midi_summary, 
            output_midi, output_audio, output_plot, output_song_description
        ]

        # The list of basic_pitch UI components that can be updated by its preset selector.
        # This MUST be defined after the components themselves are created in the UI.
        basic_pitch_ui_components = [
            onset_threshold, frame_threshold, minimum_note_length, minimum_frequency,
            maximum_frequency, infer_onsets, melodia_trick, multiple_pitch_bends
        ]

        # The list of 8-bit UI components that can be updated
        # This MUST be defined after the components themselves are created in the UI.
        s8bit_ui_components = [
            s8bit_waveform_type, s8bit_pulse_width, s8bit_envelope_type, s8bit_decay_time_s, s8bit_vibrato_rate,
            s8bit_vibrato_depth, s8bit_bass_boost_level, 
            s8bit_smooth_notes_level, s8bit_continuous_vibrato_level,
            s8bit_noise_level, s8bit_distortion_level, s8bit_fm_modulation_depth, s8bit_fm_modulation_rate
        ]
        
        # all_outputs now includes both results AND the UI controls to be updated
        all_outputs = result_outputs + s8bit_ui_components
        
        # Event Handling
        submit_btn.click(
            process_and_render_file, 
            inputs=all_inputs,
            outputs=all_outputs # Pass the combined list
        )
        
        # --- Visibility logic is now more complex ---
        # A simple lambda function to handle multiple inputs
        update_visibility_lambda = lambda sep, rem: update_vocal_ui_visibility(sep, rem)
        
        separate_vocals.change(
            fn=update_visibility_lambda,
            inputs=[separate_vocals, remerge_vocals],
            outputs=[transcription_target, remerge_vocals, transcribe_both_stems]
        )
        remerge_vocals.change(
            fn=update_visibility_lambda,
            inputs=[separate_vocals, remerge_vocals],
            outputs=[transcription_target, remerge_vocals, transcribe_both_stems]
        )
        
        # --- Listeners for dynamic UI updates ---
        transcription_method.change(
            fn=update_ui_visibility,
            inputs=[transcription_method, soundfont_bank],
            outputs=[general_transcription_settings, synth_8bit_settings]
        )
        soundfont_bank.change(
            fn=update_ui_visibility,
            inputs=[transcription_method, soundfont_bank],
            outputs=[general_transcription_settings, synth_8bit_settings]
        )

        # --- Event listener for the new basic_pitch preset dropdown ---
        basic_pitch_preset_selector.change(
            fn=apply_basic_pitch_preset,
            inputs=[basic_pitch_preset_selector],
            outputs=basic_pitch_ui_components
        )
        
        # This listener now correctly handles only the named presets, ignoring "Auto-Recommend"
        # --- Event listener for the preset selector ---
        # When the preset dropdown changes, it calls the `apply_8bit_preset` function.
        # The input to the function is the selected preset name.
        # The outputs are all the individual 8-bit setting components that need to be updated.
        # This listener is for manual preset selection (e.g., choosing "Mega Man")
        s8bit_preset_selector.change(
            fn=apply_8bit_preset,
            inputs=[s8bit_preset_selector],
            outputs=s8bit_ui_components # This now correctly targets the new sliders
        )
        

    # Launch the Gradio app
    app.queue().launch(inbrowser=True, debug=True)