|
import gradio as gr |
|
import soundfile as sf |
|
import numpy as np |
|
import os |
|
from audiosr import super_resolution |
|
|
|
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" |
|
|
|
def audio_super_resolution(audio_file, guidance_scale, ddim_steps): |
|
""" |
|
Performs audio super-resolution on the input audio file. |
|
""" |
|
|
|
waveform, sr = sf.read(audio_file) |
|
|
|
|
|
if len(waveform.shape) > 1: |
|
waveform = np.mean(waveform, axis=1) |
|
|
|
|
|
temp_input_path = "temp_mono_input.wav" |
|
sf.write(temp_input_path, waveform, sr) |
|
|
|
output_path = "output.wav" |
|
|
|
|
|
|
|
|
|
super_resolution( |
|
temp_input_path, |
|
output_path, |
|
guidance_scale=float(guidance_scale), |
|
ddim_steps=int(ddim_steps), |
|
device="cpu" |
|
) |
|
|
|
return output_path |
|
|
|
|
|
iface = gr.Interface( |
|
fn=audio_super_resolution, |
|
inputs=[ |
|
gr.Audio(type="filepath", label="Input Audio"), |
|
gr.Slider(minimum=1.0, maximum=10.0, value=3.5, step=0.1, label="Guidance Scale"), |
|
gr.Slider(minimum=10, maximum=200, value=50, step=1, label="DDIM Steps") |
|
], |
|
outputs=gr.Audio(type="filepath", label="Output Audio"), |
|
title="Versatile Audio Super Resolution", |
|
description="Upload an audio file to perform super-resolution. This model upscales any audio to 48kHz.", |
|
examples=[["example.wav", 3.5, 50]] |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |