Spaces:

ziqiangao
/

surroundify

Sleeping

surroundify / app.py

ziqiangao

fix incorrectly feeding process data

05d36ea about 1 month ago

6.12 kB

	import numpy as np
	import soundfile as sf
	import subprocess
	import tempfile
	import os
	import gradio as gr
	from scipy import signal

	# ========== Processing Functions ==========

	def convert_to_wav_float(input_file):
	"""
	Convert any input audio to 32-bit float WAV to preserve full dynamic range.
	"""
	temp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
	temp_wav.close()
	# PCM 32-bit little endian preserves float dynamic without clipping
	subprocess.run([
	"ffmpeg", "-y", "-i", input_file,
	"-c:a", "pcm_f32le", "-f", "wav", temp_wav.name
	], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
	return temp_wav.name


	def apply_reverb_wet_only(audio, samplerate, reverb_args):
	"""
	Apply wet-only reverb using SoX to a single channel with custom reverb args.
	"""
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tin, \
	tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tout:
	sf.write(tin.name, audio, samplerate, subtype='FLOAT')
	subprocess.run(
	["sox", tin.name, tout.name, "reverb", "-w"] + reverb_args,
	stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True
	)
	wet, _ = sf.read(tout.name, dtype='float32')
	os.unlink(tin.name)
	os.unlink(tout.name)
	return wet



	def sox_filter(audio, samplerate, filter_type, cutoff):
	"""
	Apply highpass or lowpass filter via SoX.
	filter_type: 'highpass' or 'lowpass'; cutoff in Hz.
	"""
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tin, \
	tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tout:
	sf.write(tin.name, audio, samplerate, subtype='FLOAT')
	subprocess.run(
	["sox", tin.name, tout.name, filter_type, str(cutoff)],
	stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True
	)
	out, _ = sf.read(tout.name, dtype='float32')
	os.unlink(tin.name)
	os.unlink(tout.name)
	return out


	def extract_phantom_center(input_file, rdf=0.99999):
	"""
	Returns FL (front left without centre), FR, and FC (phantom centre).
	"""
	wav = convert_to_wav_float(input_file)
	data, fs = sf.read(wav, dtype='float32')
	os.unlink(wav)
	if data.ndim != 2 or data.shape[1] != 2:
	raise ValueError("Input must be stereo 2-channel")
	L, R = data[:,0], data[:,1]
	M = (L + R) / 2
	nperseg = fs
	noverlap = nperseg // 2
	_, _, ZL = signal.stft(L, fs=fs, nperseg=nperseg, noverlap=noverlap)
	_, _, ZR = signal.stft(R, fs=fs, nperseg=nperseg, noverlap=noverlap)
	_, _, ZM = signal.stft(M, fs=fs, nperseg=nperseg, noverlap=noverlap)
	Zc = np.minimum(np.abs(ZL), np.abs(ZR)) * np.exp(1j * np.angle(ZM))
	Zl_res = ZL - Zc * rdf
	Zr_res = ZR - Zc * rdf
	_, FL = signal.istft(Zl_res, fs=fs, nperseg=nperseg, noverlap=noverlap)
	_, FR = signal.istft(Zr_res, fs=fs, nperseg=nperseg, noverlap=noverlap)
	_, FC = signal.istft(Zc, fs=fs, nperseg=nperseg, noverlap=noverlap)
	return fs, FL[:len(L)], FR[:len(R)], FC[:len(M)]


	def create_5_1_surround(input_file, preset="music"):
	# Preset-based parameters
	# Reverberance (50%) HF-damping (50%) room-scale (100%) stereo-depth (100%) pre-delay (0ms) wet-gain (0dB)
	if preset == "music":
	hp_cutoff = 120
	lfe_cutoff = 120
	reverb_args = ['70', '40', '100', '95', '10', '-2']
	elif preset == "speech":
	hp_cutoff = 120
	lfe_cutoff = 120
	reverb_args = ['50', '99', '50', '70', '0', '0']
	elif preset == "open":
	hp_cutoff = 120
	lfe_cutoff = 120
	reverb_args = ['20', '50', '100', '100', '100', '0']
	else:
	raise ValueError(f"Unknown preset: {preset}")

	# 1. Extract FL/FR/phantom centre
	fs, FL, FR, FC = extract_phantom_center(input_file)

	# 2. Get stereo original for reverb
	wav = convert_to_wav_float(input_file)
	stereo, _ = sf.read(wav, dtype='float32')
	os.unlink(wav)
	L_orig, R_orig = stereo[:, 0], stereo[:, 1]

	# 3. Wet-only reverb with chosen settings
	SL = apply_reverb_wet_only(L_orig, fs, reverb_args)
	SR = apply_reverb_wet_only(R_orig, fs, reverb_args)

	# 4. Highpass filter everything except LFE
	FL_hp = sox_filter(FL, fs, 'highpass', hp_cutoff)
	FR_hp = sox_filter(FR, fs, 'highpass', hp_cutoff)
	FC_hp = sox_filter(FC, fs, 'highpass', hp_cutoff)
	SL_hp = sox_filter(SL, fs, 'highpass', hp_cutoff)
	SR_hp = sox_filter(SR, fs, 'highpass', hp_cutoff)

	# 5. Lowpass for LFE
	bass_sum = .5 * (L_orig + R_orig)
	LFE = sox_filter(bass_sum, fs, 'lowpass', lfe_cutoff)

	# 6. Stack and pad
	channels = [FL_hp, FR_hp, FC_hp, LFE, SL_hp, SR_hp]
	length = max(len(ch) for ch in channels)
	def pad(x): return np.pad(x, (0, length - len(x)))
	multich = np.column_stack([pad(ch) for ch in channels])

	# 7. Write WAV and encode to OGG
	out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
	sf.write(out_wav.name, multich, fs, subtype='FLOAT')
	out_wav.close()
	out_ogg = tempfile.NamedTemporaryFile(suffix='.ogg', delete=False)
	out_ogg.close()
	subprocess.run([
	"ffmpeg", "-y", "-i", out_wav.name,
	"-c:a", "libvorbis", "-ac", "6", "-channel_layout", "5.1", out_ogg.name
	], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
	os.unlink(out_wav.name)
	return out_ogg.name


	# ========== Gradio UI ==========
	with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
	gr.Markdown("# 🎧 Stereo to 5.1 OGG Converter")
	gr.Markdown("Choose music or speech preset for surround processing")

	inp = gr.Audio(label="Upload stereo audio", type="filepath")
	preset = gr.Dropdown(
	label="Select Preset",
	choices=["music", "speech", "open"],
	value="music" # or whichever you want as the default
	)
	btn = gr.Button("Convert to 5.1 OGG")
	out = gr.File(label="Download 5.1 OGG")

	btn.click(fn=create_5_1_surround, inputs=[inp, preset], outputs=[out])

	if __name__ == "__main__":
	demo.launch()