Spaces:

vericudebuget
/

audio-super-resolution

Paused

App Files Files Community

audio-super-resolution / app.py

vericudebuget

Update app.py

1bb2233 verified 12 days ago

raw

history blame contribute delete

1.81 kB

	import gradio as gr
	import soundfile as sf
	import numpy as np
	import os
	from audiosr import super_resolution # Corrected import

	# Set device to CPU
	os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

	def audio_super_resolution(audio_file, guidance_scale, ddim_steps):
	"""
	Performs audio super-resolution on the input audio file.
	"""
	# The library expects a file path, so we use the temp path provided by Gradio
	waveform, sr = sf.read(audio_file)

	# The model works best with mono audio
	if len(waveform.shape) > 1:
	waveform = np.mean(waveform, axis=1)

	# Save the processed mono audio to a temporary file
	temp_input_path = "temp_mono_input.wav"
	sf.write(temp_input_path, waveform, sr)

	output_path = "output.wav"

	# Perform super-resolution using the main function from the library
	# The function handles model loading and processing.
	# We specify the device as 'cpu' for Hugging Face Spaces.
	super_resolution(
	temp_input_path,
	output_path,
	guidance_scale=float(guidance_scale),
	ddim_steps=int(ddim_steps),
	device="cpu"
	)

	return output_path

	# Create the Gradio interface
	iface = gr.Interface(
	fn=audio_super_resolution,
	inputs=[
	gr.Audio(type="filepath", label="Input Audio"),
	gr.Slider(minimum=1.0, maximum=10.0, value=3.5, step=0.1, label="Guidance Scale"),
	gr.Slider(minimum=10, maximum=200, value=50, step=1, label="DDIM Steps")
	],
	outputs=gr.Audio(type="filepath", label="Output Audio"),
	title="Versatile Audio Super Resolution",
	description="Upload an audio file to perform super-resolution. This model upscales any audio to 48kHz.",
	examples=[["example.wav", 3.5, 50]]
	)

	if __name__ == "__main__":
	iface.launch()