|
import gradio as gr |
|
import soundfile as sf |
|
from huggingface_hub import hf_hub_download |
|
from audiosr.inference import super_resolution |
|
import numpy as np |
|
import os |
|
|
|
|
|
hf_hub_download(repo_id="Intel/versatile_audio_super_resolution_openvino", filename="versatile_audio_sr_base_openvino_models.zip", local_dir=".") |
|
import zipfile |
|
with zipfile.ZipFile("versatile_audio_sr_base_openvino_models.zip", 'r') as zip_ref: |
|
zip_ref.extractall(".") |
|
|
|
|
|
model_paths = { |
|
'audio_sr_decoder': 'audio_sr_decoder.xml', |
|
'audio_sr_encoder': 'audio_sr_encoder.xml', |
|
'vae_feature_extract': 'vae_feature_extract.xml', |
|
'vocoder': 'vocoder.xml' |
|
} |
|
|
|
def audio_super_resolution(audio_file): |
|
""" |
|
Performs audio super-resolution on the input audio file. |
|
""" |
|
waveform, sr = sf.read(audio_file) |
|
if len(waveform.shape) > 1: |
|
waveform = np.mean(waveform, axis=1) |
|
sf.write("input.wav", waveform, sr) |
|
|
|
|
|
output = super_resolution( |
|
"input.wav", |
|
"output.wav", |
|
model_paths=model_paths |
|
) |
|
return "output.wav" |
|
|
|
|
|
iface = gr.Interface( |
|
fn=audio_super_resolution, |
|
inputs=gr.Audio(type="filepath", label="Input Audio"), |
|
outputs=gr.Audio(type="filepath", label="Output Audio"), |
|
title="Versatile Audio Super Resolution (OpenVINO)", |
|
description="Upload an audio file to perform super-resolution.", |
|
examples=[["example.wav"]] |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |