Spaces:
Sleeping
Sleeping
import numpy as np | |
import soundfile as sf | |
import subprocess | |
import tempfile | |
import os | |
import gradio as gr | |
from scipy import signal | |
# ========== Processing Functions ========== | |
def convert_to_wav_float(input_file): | |
""" | |
Convert any input audio to 32-bit float WAV to preserve full dynamic range. | |
""" | |
temp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) | |
temp_wav.close() | |
# PCM 32-bit little endian preserves float dynamic without clipping | |
subprocess.run([ | |
"ffmpeg", "-y", "-i", input_file, | |
"-c:a", "pcm_f32le", "-f", "wav", temp_wav.name | |
], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True) | |
return temp_wav.name | |
def apply_reverb_wet_only(audio, samplerate, reverb_args): | |
""" | |
Apply wet-only reverb using SoX to a single channel with custom reverb args. | |
""" | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tin, \ | |
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tout: | |
sf.write(tin.name, audio, samplerate, subtype='FLOAT') | |
subprocess.run( | |
["sox", tin.name, tout.name, "reverb", "-w"] + reverb_args, | |
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True | |
) | |
wet, _ = sf.read(tout.name, dtype='float32') | |
os.unlink(tin.name) | |
os.unlink(tout.name) | |
return wet | |
def sox_filter(audio, samplerate, filter_type, cutoff): | |
""" | |
Apply highpass or lowpass filter via SoX. | |
filter_type: 'highpass' or 'lowpass'; cutoff in Hz. | |
""" | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tin, \ | |
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tout: | |
sf.write(tin.name, audio, samplerate, subtype='FLOAT') | |
subprocess.run( | |
["sox", tin.name, tout.name, filter_type, str(cutoff)], | |
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True | |
) | |
out, _ = sf.read(tout.name, dtype='float32') | |
os.unlink(tin.name) | |
os.unlink(tout.name) | |
return out | |
def extract_phantom_center(input_file, rdf=0.99999): | |
""" | |
Returns FL (front left without centre), FR, and FC (phantom centre). | |
""" | |
wav = convert_to_wav_float(input_file) | |
data, fs = sf.read(wav, dtype='float32') | |
os.unlink(wav) | |
if data.ndim != 2 or data.shape[1] != 2: | |
raise ValueError("Input must be stereo 2-channel") | |
L, R = data[:,0], data[:,1] | |
M = (L + R) / 2 | |
nperseg = fs | |
noverlap = nperseg // 2 | |
_, _, ZL = signal.stft(L, fs=fs, nperseg=nperseg, noverlap=noverlap) | |
_, _, ZR = signal.stft(R, fs=fs, nperseg=nperseg, noverlap=noverlap) | |
_, _, ZM = signal.stft(M, fs=fs, nperseg=nperseg, noverlap=noverlap) | |
Zc = np.minimum(np.abs(ZL), np.abs(ZR)) * np.exp(1j * np.angle(ZM)) | |
Zl_res = ZL - Zc * rdf | |
Zr_res = ZR - Zc * rdf | |
_, FL = signal.istft(Zl_res, fs=fs, nperseg=nperseg, noverlap=noverlap) | |
_, FR = signal.istft(Zr_res, fs=fs, nperseg=nperseg, noverlap=noverlap) | |
_, FC = signal.istft(Zc, fs=fs, nperseg=nperseg, noverlap=noverlap) | |
return fs, FL[:len(L)], FR[:len(R)], FC[:len(M)] | |
def create_5_1_surround(input_file, preset="music"): | |
# Preset-based parameters | |
# Reverberance (50%) HF-damping (50%) room-scale (100%) stereo-depth (100%) pre-delay (0ms) wet-gain (0dB) | |
if preset == "music": | |
hp_cutoff = 120 | |
lfe_cutoff = 120 | |
reverb_args = ['70', '40', '100', '95', '10', '-2'] | |
elif preset == "speech": | |
hp_cutoff = 120 | |
lfe_cutoff = 120 | |
reverb_args = ['50', '99', '50', '70', '0', '0'] | |
elif preset == "open": | |
hp_cutoff = 120 | |
lfe_cutoff = 120 | |
reverb_args = ['20', '50', '100', '100', '100', '0'] | |
else: | |
raise ValueError(f"Unknown preset: {preset}") | |
# 1. Extract FL/FR/phantom centre | |
fs, FL, FR, FC = extract_phantom_center(input_file) | |
# 2. Get stereo original for reverb | |
wav = convert_to_wav_float(input_file) | |
stereo, _ = sf.read(wav, dtype='float32') | |
os.unlink(wav) | |
L_orig, R_orig = stereo[:, 0], stereo[:, 1] | |
# 3. Wet-only reverb with chosen settings | |
SL = apply_reverb_wet_only(L_orig, fs, reverb_args) | |
SR = apply_reverb_wet_only(R_orig, fs, reverb_args) | |
# 4. Highpass filter everything except LFE | |
FL_hp = sox_filter(FL, fs, 'highpass', hp_cutoff) | |
FR_hp = sox_filter(FR, fs, 'highpass', hp_cutoff) | |
FC_hp = sox_filter(FC, fs, 'highpass', hp_cutoff) | |
SL_hp = sox_filter(SL, fs, 'highpass', hp_cutoff) | |
SR_hp = sox_filter(SR, fs, 'highpass', hp_cutoff) | |
# 5. Lowpass for LFE | |
bass_sum = .5 * (L_orig + R_orig) | |
LFE = sox_filter(bass_sum, fs, 'lowpass', lfe_cutoff) | |
# 6. Stack and pad | |
channels = [FL_hp, FR_hp, FC_hp, LFE, SL_hp, SR_hp] | |
length = max(len(ch) for ch in channels) | |
def pad(x): return np.pad(x, (0, length - len(x))) | |
multich = np.column_stack([pad(ch) for ch in channels]) | |
# 7. Write WAV and encode to OGG | |
out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) | |
sf.write(out_wav.name, multich, fs, subtype='FLOAT') | |
out_wav.close() | |
out_ogg = tempfile.NamedTemporaryFile(suffix='.ogg', delete=False) | |
out_ogg.close() | |
subprocess.run([ | |
"ffmpeg", "-y", "-i", out_wav.name, | |
"-c:a", "libvorbis", "-ac", "6", "-channel_layout", "5.1", out_ogg.name | |
], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True) | |
os.unlink(out_wav.name) | |
return out_ogg.name | |
# ========== Gradio UI ========== | |
with gr.Blocks(title="Stereo to 5.1 Surround") as demo: | |
gr.Markdown("# 🎧 Stereo to 5.1 OGG Converter") | |
gr.Markdown("Choose music or speech preset for surround processing") | |
inp = gr.Audio(label="Upload stereo audio", type="filepath") | |
preset = gr.Dropdown( | |
label="Select Preset", | |
choices=["music", "speech", "open"], | |
value="music" # or whichever you want as the default | |
) | |
btn = gr.Button("Convert to 5.1 OGG") | |
out = gr.File(label="Download 5.1 OGG") | |
btn.click(fn=create_5_1_surround, inputs=[inp, preset], outputs=[out]) | |
if __name__ == "__main__": | |
demo.launch() |