Spaces:

vericudebuget
/

audio-super-resolution

Paused

File size: 1,563 Bytes

4ed5fe0

import gradio as gr
import soundfile as sf
from huggingface_hub import hf_hub_download
from audiosr.inference import super_resolution
import numpy as np
import os

# Download the OpenVINO models
hf_hub_download(repo_id="Intel/versatile_audio_super_resolution_openvino", filename="versatile_audio_sr_base_openvino_models.zip", local_dir=".")
import zipfile
with zipfile.ZipFile("versatile_audio_sr_base_openvino_models.zip", 'r') as zip_ref:
    zip_ref.extractall(".")

# Define the model paths
model_paths = {
    'audio_sr_decoder': 'audio_sr_decoder.xml',
    'audio_sr_encoder': 'audio_sr_encoder.xml',
    'vae_feature_extract': 'vae_feature_extract.xml',
    'vocoder': 'vocoder.xml'
}

def audio_super_resolution(audio_file):
    """
    Performs audio super-resolution on the input audio file.
    """
    waveform, sr = sf.read(audio_file)
    if len(waveform.shape) > 1:
        waveform = np.mean(waveform, axis=1) # apects mono audio
    sf.write("input.wav", waveform, sr)

    # Perform super-resolution
    output = super_resolution(
        "input.wav",
        "output.wav",
        model_paths=model_paths
    )
    return "output.wav"

# Create the Gradio interface
iface = gr.Interface(
    fn=audio_super_resolution,
    inputs=gr.Audio(type="filepath", label="Input Audio"),
    outputs=gr.Audio(type="filepath", label="Output Audio"),
    title="Versatile Audio Super Resolution (OpenVINO)",
    description="Upload an audio file to perform super-resolution.",
    examples=[["example.wav"]]
)

if __name__ == "__main__":
    iface.launch()