File size: 1,806 Bytes
4ed5fe0
 
 
 
1bb2233
4ed5fe0
1bb2233
 
4ed5fe0
1bb2233
4ed5fe0
 
 
1bb2233
4ed5fe0
1bb2233
 
4ed5fe0
1bb2233
 
 
 
 
 
 
4ed5fe0
1bb2233
 
 
 
 
 
 
 
 
4ed5fe0
1bb2233
 
4ed5fe0
 
 
 
1bb2233
 
 
 
 
4ed5fe0
1bb2233
 
 
4ed5fe0
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import gradio as gr
import soundfile as sf
import numpy as np
import os
from audiosr import super_resolution # Corrected import

# Set device to CPU
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

def audio_super_resolution(audio_file, guidance_scale, ddim_steps):
    """
    Performs audio super-resolution on the input audio file.
    """
    # The library expects a file path, so we use the temp path provided by Gradio
    waveform, sr = sf.read(audio_file)
    
    # The model works best with mono audio
    if len(waveform.shape) > 1:
        waveform = np.mean(waveform, axis=1)
        
    # Save the processed mono audio to a temporary file
    temp_input_path = "temp_mono_input.wav"
    sf.write(temp_input_path, waveform, sr)
    
    output_path = "output.wav"

    # Perform super-resolution using the main function from the library
    # The function handles model loading and processing.
    # We specify the device as 'cpu' for Hugging Face Spaces.
    super_resolution(
        temp_input_path,
        output_path,
        guidance_scale=float(guidance_scale),
        ddim_steps=int(ddim_steps),
        device="cpu" 
    )
    
    return output_path

# Create the Gradio interface
iface = gr.Interface(
    fn=audio_super_resolution,
    inputs=[
        gr.Audio(type="filepath", label="Input Audio"),
        gr.Slider(minimum=1.0, maximum=10.0, value=3.5, step=0.1, label="Guidance Scale"),
        gr.Slider(minimum=10, maximum=200, value=50, step=1, label="DDIM Steps")
    ],
    outputs=gr.Audio(type="filepath", label="Output Audio"),
    title="Versatile Audio Super Resolution",
    description="Upload an audio file to perform super-resolution. This model upscales any audio to 48kHz.",
    examples=[["example.wav", 3.5, 50]]
)

if __name__ == "__main__":
    iface.launch()