File size: 6,118 Bytes
62ee3db
 
26c1442
 
 
 
62ee3db
a310a15
26c1442
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13586a0
26c1442
13586a0
26c1442
 
 
 
 
13586a0
26c1442
 
 
 
 
 
 
 
13586a0
26c1442
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62ee3db
26c1442
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13586a0
 
 
 
 
 
3826453
13586a0
 
 
 
 
 
 
 
 
 
 
 
26c1442
 
13586a0
26c1442
 
 
13586a0
26c1442
13586a0
 
 
26c1442
13586a0
 
 
 
 
 
26c1442
13586a0
05d36ea
13586a0
26c1442
13586a0
26c1442
 
 
 
 
13586a0
26c1442
 
 
 
 
 
 
 
 
 
 
 
13586a0
26c1442
 
 
13586a0
 
26c1442
13586a0
 
 
 
 
26c1442
 
13586a0
 
62ee3db
 
13586a0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import numpy as np
import soundfile as sf
import subprocess
import tempfile
import os
import gradio as gr
from scipy import signal

# ========== Processing Functions ==========

def convert_to_wav_float(input_file):
    """
    Convert any input audio to 32-bit float WAV to preserve full dynamic range.
    """
    temp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
    temp_wav.close()
    # PCM 32-bit little endian preserves float dynamic without clipping
    subprocess.run([
        "ffmpeg", "-y", "-i", input_file,
        "-c:a", "pcm_f32le", "-f", "wav", temp_wav.name
    ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
    return temp_wav.name


def apply_reverb_wet_only(audio, samplerate, reverb_args):
    """
    Apply wet-only reverb using SoX to a single channel with custom reverb args.
    """
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tin, \
         tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tout:
        sf.write(tin.name, audio, samplerate, subtype='FLOAT')
        subprocess.run(
            ["sox", tin.name, tout.name, "reverb", "-w"] + reverb_args,
            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True
        )
        wet, _ = sf.read(tout.name, dtype='float32')
    os.unlink(tin.name)
    os.unlink(tout.name)
    return wet



def sox_filter(audio, samplerate, filter_type, cutoff):
    """
    Apply highpass or lowpass filter via SoX.
    filter_type: 'highpass' or 'lowpass'; cutoff in Hz.
    """
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tin, \
         tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tout:
        sf.write(tin.name, audio, samplerate, subtype='FLOAT')
        subprocess.run(
            ["sox", tin.name, tout.name, filter_type, str(cutoff)],
            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True
        )
        out, _ = sf.read(tout.name, dtype='float32')
    os.unlink(tin.name)
    os.unlink(tout.name)
    return out


def extract_phantom_center(input_file, rdf=0.99999):
    """
    Returns FL (front left without centre), FR, and FC (phantom centre).
    """
    wav = convert_to_wav_float(input_file)
    data, fs = sf.read(wav, dtype='float32')
    os.unlink(wav)
    if data.ndim != 2 or data.shape[1] != 2:
        raise ValueError("Input must be stereo 2-channel")
    L, R = data[:,0], data[:,1]
    M = (L + R) / 2
    nperseg = fs
    noverlap = nperseg // 2
    _, _, ZL = signal.stft(L, fs=fs, nperseg=nperseg, noverlap=noverlap)
    _, _, ZR = signal.stft(R, fs=fs, nperseg=nperseg, noverlap=noverlap)
    _, _, ZM = signal.stft(M, fs=fs, nperseg=nperseg, noverlap=noverlap)
    Zc = np.minimum(np.abs(ZL), np.abs(ZR)) * np.exp(1j * np.angle(ZM))
    Zl_res = ZL - Zc * rdf
    Zr_res = ZR - Zc * rdf
    _, FL = signal.istft(Zl_res, fs=fs, nperseg=nperseg, noverlap=noverlap)
    _, FR = signal.istft(Zr_res, fs=fs, nperseg=nperseg, noverlap=noverlap)
    _, FC = signal.istft(Zc, fs=fs, nperseg=nperseg, noverlap=noverlap)
    return fs, FL[:len(L)], FR[:len(R)], FC[:len(M)]


def create_5_1_surround(input_file, preset="music"):
    # Preset-based parameters
    # Reverberance (50%) HF-damping (50%) room-scale (100%) stereo-depth (100%) pre-delay (0ms) wet-gain (0dB)
    if preset == "music":
        hp_cutoff = 120
        lfe_cutoff = 120
        reverb_args = ['70', '40', '100', '95', '10', '-2']
    elif preset == "speech":
        hp_cutoff = 120
        lfe_cutoff = 120
        reverb_args = ['50', '99', '50', '70', '0', '0']
    elif preset == "open":
        hp_cutoff = 120
        lfe_cutoff = 120
        reverb_args = ['20', '50', '100', '100', '100', '0']
    else:
        raise ValueError(f"Unknown preset: {preset}")

    # 1. Extract FL/FR/phantom centre
    fs, FL, FR, FC = extract_phantom_center(input_file)

    # 2. Get stereo original for reverb
    wav = convert_to_wav_float(input_file)
    stereo, _ = sf.read(wav, dtype='float32')
    os.unlink(wav)
    L_orig, R_orig = stereo[:, 0], stereo[:, 1]

    # 3. Wet-only reverb with chosen settings
    SL = apply_reverb_wet_only(L_orig, fs, reverb_args)
    SR = apply_reverb_wet_only(R_orig, fs, reverb_args)

    # 4. Highpass filter everything except LFE
    FL_hp = sox_filter(FL, fs, 'highpass', hp_cutoff)
    FR_hp = sox_filter(FR, fs, 'highpass', hp_cutoff)
    FC_hp = sox_filter(FC, fs, 'highpass', hp_cutoff)
    SL_hp = sox_filter(SL, fs, 'highpass', hp_cutoff)
    SR_hp = sox_filter(SR, fs, 'highpass', hp_cutoff)

    # 5. Lowpass for LFE
    bass_sum = .5 * (L_orig + R_orig)
    LFE = sox_filter(bass_sum, fs, 'lowpass', lfe_cutoff)

    # 6. Stack and pad
    channels = [FL_hp, FR_hp, FC_hp, LFE, SL_hp, SR_hp]
    length = max(len(ch) for ch in channels)
    def pad(x): return np.pad(x, (0, length - len(x)))
    multich = np.column_stack([pad(ch) for ch in channels])

    # 7. Write WAV and encode to OGG
    out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
    sf.write(out_wav.name, multich, fs, subtype='FLOAT')
    out_wav.close()
    out_ogg = tempfile.NamedTemporaryFile(suffix='.ogg', delete=False)
    out_ogg.close()
    subprocess.run([
        "ffmpeg", "-y", "-i", out_wav.name,
        "-c:a", "libvorbis", "-ac", "6", "-channel_layout", "5.1", out_ogg.name
    ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
    os.unlink(out_wav.name)
    return out_ogg.name


# ========== Gradio UI ==========
with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
    gr.Markdown("# 🎧 Stereo to 5.1 OGG Converter")
    gr.Markdown("Choose music or speech preset for surround processing")

    inp = gr.Audio(label="Upload stereo audio", type="filepath")
    preset = gr.Dropdown(
    label="Select Preset",
    choices=["music", "speech", "open"],
    value="music"  # or whichever you want as the default
)
    btn = gr.Button("Convert to 5.1 OGG")
    out = gr.File(label="Download 5.1 OGG")

    btn.click(fn=create_5_1_surround, inputs=[inp, preset], outputs=[out])

if __name__ == "__main__":
    demo.launch()