Spaces:
Sleeping
Sleeping
File size: 6,118 Bytes
62ee3db 26c1442 62ee3db a310a15 26c1442 13586a0 26c1442 13586a0 26c1442 13586a0 26c1442 13586a0 26c1442 62ee3db 26c1442 13586a0 3826453 13586a0 26c1442 13586a0 26c1442 13586a0 26c1442 13586a0 26c1442 13586a0 26c1442 13586a0 05d36ea 13586a0 26c1442 13586a0 26c1442 13586a0 26c1442 13586a0 26c1442 13586a0 26c1442 13586a0 26c1442 13586a0 62ee3db 13586a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import numpy as np
import soundfile as sf
import subprocess
import tempfile
import os
import gradio as gr
from scipy import signal
# ========== Processing Functions ==========
def convert_to_wav_float(input_file):
"""
Convert any input audio to 32-bit float WAV to preserve full dynamic range.
"""
temp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
temp_wav.close()
# PCM 32-bit little endian preserves float dynamic without clipping
subprocess.run([
"ffmpeg", "-y", "-i", input_file,
"-c:a", "pcm_f32le", "-f", "wav", temp_wav.name
], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
return temp_wav.name
def apply_reverb_wet_only(audio, samplerate, reverb_args):
"""
Apply wet-only reverb using SoX to a single channel with custom reverb args.
"""
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tin, \
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tout:
sf.write(tin.name, audio, samplerate, subtype='FLOAT')
subprocess.run(
["sox", tin.name, tout.name, "reverb", "-w"] + reverb_args,
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True
)
wet, _ = sf.read(tout.name, dtype='float32')
os.unlink(tin.name)
os.unlink(tout.name)
return wet
def sox_filter(audio, samplerate, filter_type, cutoff):
"""
Apply highpass or lowpass filter via SoX.
filter_type: 'highpass' or 'lowpass'; cutoff in Hz.
"""
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tin, \
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tout:
sf.write(tin.name, audio, samplerate, subtype='FLOAT')
subprocess.run(
["sox", tin.name, tout.name, filter_type, str(cutoff)],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True
)
out, _ = sf.read(tout.name, dtype='float32')
os.unlink(tin.name)
os.unlink(tout.name)
return out
def extract_phantom_center(input_file, rdf=0.99999):
"""
Returns FL (front left without centre), FR, and FC (phantom centre).
"""
wav = convert_to_wav_float(input_file)
data, fs = sf.read(wav, dtype='float32')
os.unlink(wav)
if data.ndim != 2 or data.shape[1] != 2:
raise ValueError("Input must be stereo 2-channel")
L, R = data[:,0], data[:,1]
M = (L + R) / 2
nperseg = fs
noverlap = nperseg // 2
_, _, ZL = signal.stft(L, fs=fs, nperseg=nperseg, noverlap=noverlap)
_, _, ZR = signal.stft(R, fs=fs, nperseg=nperseg, noverlap=noverlap)
_, _, ZM = signal.stft(M, fs=fs, nperseg=nperseg, noverlap=noverlap)
Zc = np.minimum(np.abs(ZL), np.abs(ZR)) * np.exp(1j * np.angle(ZM))
Zl_res = ZL - Zc * rdf
Zr_res = ZR - Zc * rdf
_, FL = signal.istft(Zl_res, fs=fs, nperseg=nperseg, noverlap=noverlap)
_, FR = signal.istft(Zr_res, fs=fs, nperseg=nperseg, noverlap=noverlap)
_, FC = signal.istft(Zc, fs=fs, nperseg=nperseg, noverlap=noverlap)
return fs, FL[:len(L)], FR[:len(R)], FC[:len(M)]
def create_5_1_surround(input_file, preset="music"):
# Preset-based parameters
# Reverberance (50%) HF-damping (50%) room-scale (100%) stereo-depth (100%) pre-delay (0ms) wet-gain (0dB)
if preset == "music":
hp_cutoff = 120
lfe_cutoff = 120
reverb_args = ['70', '40', '100', '95', '10', '-2']
elif preset == "speech":
hp_cutoff = 120
lfe_cutoff = 120
reverb_args = ['50', '99', '50', '70', '0', '0']
elif preset == "open":
hp_cutoff = 120
lfe_cutoff = 120
reverb_args = ['20', '50', '100', '100', '100', '0']
else:
raise ValueError(f"Unknown preset: {preset}")
# 1. Extract FL/FR/phantom centre
fs, FL, FR, FC = extract_phantom_center(input_file)
# 2. Get stereo original for reverb
wav = convert_to_wav_float(input_file)
stereo, _ = sf.read(wav, dtype='float32')
os.unlink(wav)
L_orig, R_orig = stereo[:, 0], stereo[:, 1]
# 3. Wet-only reverb with chosen settings
SL = apply_reverb_wet_only(L_orig, fs, reverb_args)
SR = apply_reverb_wet_only(R_orig, fs, reverb_args)
# 4. Highpass filter everything except LFE
FL_hp = sox_filter(FL, fs, 'highpass', hp_cutoff)
FR_hp = sox_filter(FR, fs, 'highpass', hp_cutoff)
FC_hp = sox_filter(FC, fs, 'highpass', hp_cutoff)
SL_hp = sox_filter(SL, fs, 'highpass', hp_cutoff)
SR_hp = sox_filter(SR, fs, 'highpass', hp_cutoff)
# 5. Lowpass for LFE
bass_sum = .5 * (L_orig + R_orig)
LFE = sox_filter(bass_sum, fs, 'lowpass', lfe_cutoff)
# 6. Stack and pad
channels = [FL_hp, FR_hp, FC_hp, LFE, SL_hp, SR_hp]
length = max(len(ch) for ch in channels)
def pad(x): return np.pad(x, (0, length - len(x)))
multich = np.column_stack([pad(ch) for ch in channels])
# 7. Write WAV and encode to OGG
out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
sf.write(out_wav.name, multich, fs, subtype='FLOAT')
out_wav.close()
out_ogg = tempfile.NamedTemporaryFile(suffix='.ogg', delete=False)
out_ogg.close()
subprocess.run([
"ffmpeg", "-y", "-i", out_wav.name,
"-c:a", "libvorbis", "-ac", "6", "-channel_layout", "5.1", out_ogg.name
], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
os.unlink(out_wav.name)
return out_ogg.name
# ========== Gradio UI ==========
with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
gr.Markdown("# 🎧 Stereo to 5.1 OGG Converter")
gr.Markdown("Choose music or speech preset for surround processing")
inp = gr.Audio(label="Upload stereo audio", type="filepath")
preset = gr.Dropdown(
label="Select Preset",
choices=["music", "speech", "open"],
value="music" # or whichever you want as the default
)
btn = gr.Button("Convert to 5.1 OGG")
out = gr.File(label="Download 5.1 OGG")
btn.click(fn=create_5_1_surround, inputs=[inp, preset], outputs=[out])
if __name__ == "__main__":
demo.launch() |