Spaces:

vericudebuget
/

audio-super-resolution

Paused

File size: 1,806 Bytes

4ed5fe0
 
 
 
1bb2233
4ed5fe0
1bb2233
 
4ed5fe0
1bb2233
4ed5fe0
 
 
1bb2233
4ed5fe0
1bb2233
 
4ed5fe0
1bb2233
 
 
 
 
 
 
4ed5fe0
1bb2233
 
 
 
 
 
 
 
 
4ed5fe0
1bb2233
 
4ed5fe0
 
 
 
1bb2233
 
 
 
 
4ed5fe0
1bb2233
 
 
4ed5fe0

import gradio as gr
import soundfile as sf
import numpy as np
import os
from audiosr import super_resolution # Corrected import

# Set device to CPU
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

def audio_super_resolution(audio_file, guidance_scale, ddim_steps):
    """
    Performs audio super-resolution on the input audio file.
    """
    # The library expects a file path, so we use the temp path provided by Gradio
    waveform, sr = sf.read(audio_file)
    
    # The model works best with mono audio
    if len(waveform.shape) > 1:
        waveform = np.mean(waveform, axis=1)
        
    # Save the processed mono audio to a temporary file
    temp_input_path = "temp_mono_input.wav"
    sf.write(temp_input_path, waveform, sr)
    
    output_path = "output.wav"

    # Perform super-resolution using the main function from the library
    # The function handles model loading and processing.
    # We specify the device as 'cpu' for Hugging Face Spaces.
    super_resolution(
        temp_input_path,
        output_path,
        guidance_scale=float(guidance_scale),
        ddim_steps=int(ddim_steps),
        device="cpu" 
    )
    
    return output_path

# Create the Gradio interface
iface = gr.Interface(
    fn=audio_super_resolution,
    inputs=[
        gr.Audio(type="filepath", label="Input Audio"),
        gr.Slider(minimum=1.0, maximum=10.0, value=3.5, step=0.1, label="Guidance Scale"),
        gr.Slider(minimum=10, maximum=200, value=50, step=1, label="DDIM Steps")
    ],
    outputs=gr.Audio(type="filepath", label="Output Audio"),
    title="Versatile Audio Super Resolution",
    description="Upload an audio file to perform super-resolution. This model upscales any audio to 48kHz.",
    examples=[["example.wav", 3.5, 50]]
)

if __name__ == "__main__":
    iface.launch()