import numpy as np import soundfile as sf import subprocess import tempfile import os import gradio as gr from scipy import signal # ========== Processing Functions ========== def convert_to_wav_float(input_file): """ Convert any input audio to 32-bit float WAV to preserve full dynamic range. """ temp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) temp_wav.close() # PCM 32-bit little endian preserves float dynamic without clipping subprocess.run([ "ffmpeg", "-y", "-i", input_file, "-c:a", "pcm_f32le", "-f", "wav", temp_wav.name ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True) return temp_wav.name def apply_reverb_wet_only(audio, samplerate, reverb_args): """ Apply wet-only reverb using SoX to a single channel with custom reverb args. """ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tin, \ tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tout: sf.write(tin.name, audio, samplerate, subtype='FLOAT') subprocess.run( ["sox", tin.name, tout.name, "reverb", "-w"] + reverb_args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True ) wet, _ = sf.read(tout.name, dtype='float32') os.unlink(tin.name) os.unlink(tout.name) return wet def sox_filter(audio, samplerate, filter_type, cutoff): """ Apply highpass or lowpass filter via SoX. filter_type: 'highpass' or 'lowpass'; cutoff in Hz. """ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tin, \ tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tout: sf.write(tin.name, audio, samplerate, subtype='FLOAT') subprocess.run( ["sox", tin.name, tout.name, filter_type, str(cutoff)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True ) out, _ = sf.read(tout.name, dtype='float32') os.unlink(tin.name) os.unlink(tout.name) return out def extract_phantom_center(input_file, rdf=0.99999): """ Returns FL (front left without centre), FR, and FC (phantom centre). """ wav = convert_to_wav_float(input_file) data, fs = sf.read(wav, dtype='float32') os.unlink(wav) if data.ndim != 2 or data.shape[1] != 2: raise ValueError("Input must be stereo 2-channel") L, R = data[:,0], data[:,1] M = (L + R) / 2 nperseg = fs noverlap = nperseg // 2 _, _, ZL = signal.stft(L, fs=fs, nperseg=nperseg, noverlap=noverlap) _, _, ZR = signal.stft(R, fs=fs, nperseg=nperseg, noverlap=noverlap) _, _, ZM = signal.stft(M, fs=fs, nperseg=nperseg, noverlap=noverlap) Zc = np.minimum(np.abs(ZL), np.abs(ZR)) * np.exp(1j * np.angle(ZM)) Zl_res = ZL - Zc * rdf Zr_res = ZR - Zc * rdf _, FL = signal.istft(Zl_res, fs=fs, nperseg=nperseg, noverlap=noverlap) _, FR = signal.istft(Zr_res, fs=fs, nperseg=nperseg, noverlap=noverlap) _, FC = signal.istft(Zc, fs=fs, nperseg=nperseg, noverlap=noverlap) return fs, FL[:len(L)], FR[:len(R)], FC[:len(M)] def create_5_1_surround(input_file, preset="music"): # Preset-based parameters # Reverberance (50%) HF-damping (50%) room-scale (100%) stereo-depth (100%) pre-delay (0ms) wet-gain (0dB) if preset == "music": hp_cutoff = 120 lfe_cutoff = 120 reverb_args = ['70', '40', '100', '95', '10', '-2'] elif preset == "speech": hp_cutoff = 120 lfe_cutoff = 120 reverb_args = ['50', '99', '50', '70', '0', '0'] elif preset == "open": hp_cutoff = 120 lfe_cutoff = 120 reverb_args = ['20', '50', '100', '100', '100', '0'] else: raise ValueError(f"Unknown preset: {preset}") # 1. Extract FL/FR/phantom centre fs, FL, FR, FC = extract_phantom_center(input_file) # 2. Get stereo original for reverb wav = convert_to_wav_float(input_file) stereo, _ = sf.read(wav, dtype='float32') os.unlink(wav) L_orig, R_orig = stereo[:, 0], stereo[:, 1] # 3. Wet-only reverb with chosen settings SL = apply_reverb_wet_only(L_orig, fs, reverb_args) SR = apply_reverb_wet_only(R_orig, fs, reverb_args) # 4. Highpass filter everything except LFE FL_hp = sox_filter(FL, fs, 'highpass', hp_cutoff) FR_hp = sox_filter(FR, fs, 'highpass', hp_cutoff) FC_hp = sox_filter(FC, fs, 'highpass', hp_cutoff) SL_hp = sox_filter(SL, fs, 'highpass', hp_cutoff) SR_hp = sox_filter(SR, fs, 'highpass', hp_cutoff) # 5. Lowpass for LFE bass_sum = .5 * (L_orig + R_orig) LFE = sox_filter(bass_sum, fs, 'lowpass', lfe_cutoff) # 6. Stack and pad channels = [FL_hp, FR_hp, FC_hp, LFE, SL_hp, SR_hp] length = max(len(ch) for ch in channels) def pad(x): return np.pad(x, (0, length - len(x))) multich = np.column_stack([pad(ch) for ch in channels]) # 7. Write WAV and encode to OGG out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) sf.write(out_wav.name, multich, fs, subtype='FLOAT') out_wav.close() out_ogg = tempfile.NamedTemporaryFile(suffix='.ogg', delete=False) out_ogg.close() subprocess.run([ "ffmpeg", "-y", "-i", out_wav.name, "-c:a", "libvorbis", "-ac", "6", "-channel_layout", "5.1", out_ogg.name ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True) os.unlink(out_wav.name) return out_ogg.name # ========== Gradio UI ========== with gr.Blocks(title="Stereo to 5.1 Surround") as demo: gr.Markdown("# 🎧 Stereo to 5.1 OGG Converter") gr.Markdown("Choose music or speech preset for surround processing") inp = gr.Audio(label="Upload stereo audio", type="filepath") preset = gr.Dropdown( label="Select Preset", choices=["music", "speech", "open"], value="music" # or whichever you want as the default ) btn = gr.Button("Convert to 5.1 OGG") out = gr.File(label="Download 5.1 OGG") btn.click(fn=create_5_1_surround, inputs=[inp, preset], outputs=[out]) if __name__ == "__main__": demo.launch()