vericudebuget's picture
Update app.py
1bb2233 verified
import gradio as gr
import soundfile as sf
import numpy as np
import os
from audiosr import super_resolution # Corrected import
# Set device to CPU
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
def audio_super_resolution(audio_file, guidance_scale, ddim_steps):
"""
Performs audio super-resolution on the input audio file.
"""
# The library expects a file path, so we use the temp path provided by Gradio
waveform, sr = sf.read(audio_file)
# The model works best with mono audio
if len(waveform.shape) > 1:
waveform = np.mean(waveform, axis=1)
# Save the processed mono audio to a temporary file
temp_input_path = "temp_mono_input.wav"
sf.write(temp_input_path, waveform, sr)
output_path = "output.wav"
# Perform super-resolution using the main function from the library
# The function handles model loading and processing.
# We specify the device as 'cpu' for Hugging Face Spaces.
super_resolution(
temp_input_path,
output_path,
guidance_scale=float(guidance_scale),
ddim_steps=int(ddim_steps),
device="cpu"
)
return output_path
# Create the Gradio interface
iface = gr.Interface(
fn=audio_super_resolution,
inputs=[
gr.Audio(type="filepath", label="Input Audio"),
gr.Slider(minimum=1.0, maximum=10.0, value=3.5, step=0.1, label="Guidance Scale"),
gr.Slider(minimum=10, maximum=200, value=50, step=1, label="DDIM Steps")
],
outputs=gr.Audio(type="filepath", label="Output Audio"),
title="Versatile Audio Super Resolution",
description="Upload an audio file to perform super-resolution. This model upscales any audio to 48kHz.",
examples=[["example.wav", 3.5, 50]]
)
if __name__ == "__main__":
iface.launch()