import gradio as gr import soundfile as sf import numpy as np import os from audiosr import super_resolution # Corrected import # Set device to CPU os.environ["CUDA_VISIBLE_DEVICES"] = "-1" def audio_super_resolution(audio_file, guidance_scale, ddim_steps): """ Performs audio super-resolution on the input audio file. """ # The library expects a file path, so we use the temp path provided by Gradio waveform, sr = sf.read(audio_file) # The model works best with mono audio if len(waveform.shape) > 1: waveform = np.mean(waveform, axis=1) # Save the processed mono audio to a temporary file temp_input_path = "temp_mono_input.wav" sf.write(temp_input_path, waveform, sr) output_path = "output.wav" # Perform super-resolution using the main function from the library # The function handles model loading and processing. # We specify the device as 'cpu' for Hugging Face Spaces. super_resolution( temp_input_path, output_path, guidance_scale=float(guidance_scale), ddim_steps=int(ddim_steps), device="cpu" ) return output_path # Create the Gradio interface iface = gr.Interface( fn=audio_super_resolution, inputs=[ gr.Audio(type="filepath", label="Input Audio"), gr.Slider(minimum=1.0, maximum=10.0, value=3.5, step=0.1, label="Guidance Scale"), gr.Slider(minimum=10, maximum=200, value=50, step=1, label="DDIM Steps") ], outputs=gr.Audio(type="filepath", label="Output Audio"), title="Versatile Audio Super Resolution", description="Upload an audio file to perform super-resolution. This model upscales any audio to 48kHz.", examples=[["example.wav", 3.5, 50]] ) if __name__ == "__main__": iface.launch()