Update app.py
Browse files
app.py
CHANGED
@@ -1,49 +1,54 @@
|
|
1 |
import gradio as gr
|
2 |
import soundfile as sf
|
3 |
-
from huggingface_hub import hf_hub_download
|
4 |
-
from audiosr.inference import super_resolution
|
5 |
import numpy as np
|
6 |
import os
|
|
|
7 |
|
8 |
-
#
|
9 |
-
|
10 |
-
import zipfile
|
11 |
-
with zipfile.ZipFile("versatile_audio_sr_base_openvino_models.zip", 'r') as zip_ref:
|
12 |
-
zip_ref.extractall(".")
|
13 |
|
14 |
-
|
15 |
-
model_paths = {
|
16 |
-
'audio_sr_decoder': 'audio_sr_decoder.xml',
|
17 |
-
'audio_sr_encoder': 'audio_sr_encoder.xml',
|
18 |
-
'vae_feature_extract': 'vae_feature_extract.xml',
|
19 |
-
'vocoder': 'vocoder.xml'
|
20 |
-
}
|
21 |
-
|
22 |
-
def audio_super_resolution(audio_file):
|
23 |
"""
|
24 |
Performs audio super-resolution on the input audio file.
|
25 |
"""
|
|
|
26 |
waveform, sr = sf.read(audio_file)
|
|
|
|
|
27 |
if len(waveform.shape) > 1:
|
28 |
-
waveform = np.mean(waveform, axis=1)
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
-
# Perform super-resolution
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
36 |
)
|
37 |
-
|
|
|
38 |
|
39 |
# Create the Gradio interface
|
40 |
iface = gr.Interface(
|
41 |
fn=audio_super_resolution,
|
42 |
-
inputs=
|
|
|
|
|
|
|
|
|
43 |
outputs=gr.Audio(type="filepath", label="Output Audio"),
|
44 |
-
title="Versatile Audio Super Resolution
|
45 |
-
description="Upload an audio file to perform super-resolution.",
|
46 |
-
examples=[["example.wav"]]
|
47 |
)
|
48 |
|
49 |
if __name__ == "__main__":
|
|
|
1 |
import gradio as gr
|
2 |
import soundfile as sf
|
|
|
|
|
3 |
import numpy as np
|
4 |
import os
|
5 |
+
from audiosr import super_resolution # Corrected import
|
6 |
|
7 |
+
# Set device to CPU
|
8 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
|
|
|
|
|
|
|
9 |
|
10 |
+
def audio_super_resolution(audio_file, guidance_scale, ddim_steps):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
"""
|
12 |
Performs audio super-resolution on the input audio file.
|
13 |
"""
|
14 |
+
# The library expects a file path, so we use the temp path provided by Gradio
|
15 |
waveform, sr = sf.read(audio_file)
|
16 |
+
|
17 |
+
# The model works best with mono audio
|
18 |
if len(waveform.shape) > 1:
|
19 |
+
waveform = np.mean(waveform, axis=1)
|
20 |
+
|
21 |
+
# Save the processed mono audio to a temporary file
|
22 |
+
temp_input_path = "temp_mono_input.wav"
|
23 |
+
sf.write(temp_input_path, waveform, sr)
|
24 |
+
|
25 |
+
output_path = "output.wav"
|
26 |
|
27 |
+
# Perform super-resolution using the main function from the library
|
28 |
+
# The function handles model loading and processing.
|
29 |
+
# We specify the device as 'cpu' for Hugging Face Spaces.
|
30 |
+
super_resolution(
|
31 |
+
temp_input_path,
|
32 |
+
output_path,
|
33 |
+
guidance_scale=float(guidance_scale),
|
34 |
+
ddim_steps=int(ddim_steps),
|
35 |
+
device="cpu"
|
36 |
)
|
37 |
+
|
38 |
+
return output_path
|
39 |
|
40 |
# Create the Gradio interface
|
41 |
iface = gr.Interface(
|
42 |
fn=audio_super_resolution,
|
43 |
+
inputs=[
|
44 |
+
gr.Audio(type="filepath", label="Input Audio"),
|
45 |
+
gr.Slider(minimum=1.0, maximum=10.0, value=3.5, step=0.1, label="Guidance Scale"),
|
46 |
+
gr.Slider(minimum=10, maximum=200, value=50, step=1, label="DDIM Steps")
|
47 |
+
],
|
48 |
outputs=gr.Audio(type="filepath", label="Output Audio"),
|
49 |
+
title="Versatile Audio Super Resolution",
|
50 |
+
description="Upload an audio file to perform super-resolution. This model upscales any audio to 48kHz.",
|
51 |
+
examples=[["example.wav", 3.5, 50]]
|
52 |
)
|
53 |
|
54 |
if __name__ == "__main__":
|