surroundify / app.py
ziqiangao
fix incorrectly feeding process data
05d36ea
raw
history blame
6.12 kB
import numpy as np
import soundfile as sf
import subprocess
import tempfile
import os
import gradio as gr
from scipy import signal
# ========== Processing Functions ==========
def convert_to_wav_float(input_file):
"""
Convert any input audio to 32-bit float WAV to preserve full dynamic range.
"""
temp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
temp_wav.close()
# PCM 32-bit little endian preserves float dynamic without clipping
subprocess.run([
"ffmpeg", "-y", "-i", input_file,
"-c:a", "pcm_f32le", "-f", "wav", temp_wav.name
], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
return temp_wav.name
def apply_reverb_wet_only(audio, samplerate, reverb_args):
"""
Apply wet-only reverb using SoX to a single channel with custom reverb args.
"""
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tin, \
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tout:
sf.write(tin.name, audio, samplerate, subtype='FLOAT')
subprocess.run(
["sox", tin.name, tout.name, "reverb", "-w"] + reverb_args,
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True
)
wet, _ = sf.read(tout.name, dtype='float32')
os.unlink(tin.name)
os.unlink(tout.name)
return wet
def sox_filter(audio, samplerate, filter_type, cutoff):
"""
Apply highpass or lowpass filter via SoX.
filter_type: 'highpass' or 'lowpass'; cutoff in Hz.
"""
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tin, \
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tout:
sf.write(tin.name, audio, samplerate, subtype='FLOAT')
subprocess.run(
["sox", tin.name, tout.name, filter_type, str(cutoff)],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True
)
out, _ = sf.read(tout.name, dtype='float32')
os.unlink(tin.name)
os.unlink(tout.name)
return out
def extract_phantom_center(input_file, rdf=0.99999):
"""
Returns FL (front left without centre), FR, and FC (phantom centre).
"""
wav = convert_to_wav_float(input_file)
data, fs = sf.read(wav, dtype='float32')
os.unlink(wav)
if data.ndim != 2 or data.shape[1] != 2:
raise ValueError("Input must be stereo 2-channel")
L, R = data[:,0], data[:,1]
M = (L + R) / 2
nperseg = fs
noverlap = nperseg // 2
_, _, ZL = signal.stft(L, fs=fs, nperseg=nperseg, noverlap=noverlap)
_, _, ZR = signal.stft(R, fs=fs, nperseg=nperseg, noverlap=noverlap)
_, _, ZM = signal.stft(M, fs=fs, nperseg=nperseg, noverlap=noverlap)
Zc = np.minimum(np.abs(ZL), np.abs(ZR)) * np.exp(1j * np.angle(ZM))
Zl_res = ZL - Zc * rdf
Zr_res = ZR - Zc * rdf
_, FL = signal.istft(Zl_res, fs=fs, nperseg=nperseg, noverlap=noverlap)
_, FR = signal.istft(Zr_res, fs=fs, nperseg=nperseg, noverlap=noverlap)
_, FC = signal.istft(Zc, fs=fs, nperseg=nperseg, noverlap=noverlap)
return fs, FL[:len(L)], FR[:len(R)], FC[:len(M)]
def create_5_1_surround(input_file, preset="music"):
# Preset-based parameters
# Reverberance (50%) HF-damping (50%) room-scale (100%) stereo-depth (100%) pre-delay (0ms) wet-gain (0dB)
if preset == "music":
hp_cutoff = 120
lfe_cutoff = 120
reverb_args = ['70', '40', '100', '95', '10', '-2']
elif preset == "speech":
hp_cutoff = 120
lfe_cutoff = 120
reverb_args = ['50', '99', '50', '70', '0', '0']
elif preset == "open":
hp_cutoff = 120
lfe_cutoff = 120
reverb_args = ['20', '50', '100', '100', '100', '0']
else:
raise ValueError(f"Unknown preset: {preset}")
# 1. Extract FL/FR/phantom centre
fs, FL, FR, FC = extract_phantom_center(input_file)
# 2. Get stereo original for reverb
wav = convert_to_wav_float(input_file)
stereo, _ = sf.read(wav, dtype='float32')
os.unlink(wav)
L_orig, R_orig = stereo[:, 0], stereo[:, 1]
# 3. Wet-only reverb with chosen settings
SL = apply_reverb_wet_only(L_orig, fs, reverb_args)
SR = apply_reverb_wet_only(R_orig, fs, reverb_args)
# 4. Highpass filter everything except LFE
FL_hp = sox_filter(FL, fs, 'highpass', hp_cutoff)
FR_hp = sox_filter(FR, fs, 'highpass', hp_cutoff)
FC_hp = sox_filter(FC, fs, 'highpass', hp_cutoff)
SL_hp = sox_filter(SL, fs, 'highpass', hp_cutoff)
SR_hp = sox_filter(SR, fs, 'highpass', hp_cutoff)
# 5. Lowpass for LFE
bass_sum = .5 * (L_orig + R_orig)
LFE = sox_filter(bass_sum, fs, 'lowpass', lfe_cutoff)
# 6. Stack and pad
channels = [FL_hp, FR_hp, FC_hp, LFE, SL_hp, SR_hp]
length = max(len(ch) for ch in channels)
def pad(x): return np.pad(x, (0, length - len(x)))
multich = np.column_stack([pad(ch) for ch in channels])
# 7. Write WAV and encode to OGG
out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
sf.write(out_wav.name, multich, fs, subtype='FLOAT')
out_wav.close()
out_ogg = tempfile.NamedTemporaryFile(suffix='.ogg', delete=False)
out_ogg.close()
subprocess.run([
"ffmpeg", "-y", "-i", out_wav.name,
"-c:a", "libvorbis", "-ac", "6", "-channel_layout", "5.1", out_ogg.name
], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
os.unlink(out_wav.name)
return out_ogg.name
# ========== Gradio UI ==========
with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
gr.Markdown("# 🎧 Stereo to 5.1 OGG Converter")
gr.Markdown("Choose music or speech preset for surround processing")
inp = gr.Audio(label="Upload stereo audio", type="filepath")
preset = gr.Dropdown(
label="Select Preset",
choices=["music", "speech", "open"],
value="music" # or whichever you want as the default
)
btn = gr.Button("Convert to 5.1 OGG")
out = gr.File(label="Download 5.1 OGG")
btn.click(fn=create_5_1_surround, inputs=[inp, preset], outputs=[out])
if __name__ == "__main__":
demo.launch()