Spaces:

vericudebuget
/

audio-super-resolution

Paused

App Files Files Community

vericudebuget commited on 13 days ago

Commit

1bb2233

verified ·

1 Parent(s): 223908c

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -28

app.py CHANGED Viewed

@@ -1,49 +1,54 @@
 import gradio as gr
 import soundfile as sf
-from huggingface_hub import hf_hub_download
-from audiosr.inference import super_resolution
 import numpy as np
 import os
-# Download the OpenVINO models
-hf_hub_download(repo_id="Intel/versatile_audio_super_resolution_openvino", filename="versatile_audio_sr_base_openvino_models.zip", local_dir=".")
-import zipfile
-with zipfile.ZipFile("versatile_audio_sr_base_openvino_models.zip", 'r') as zip_ref:
-    zip_ref.extractall(".")
-# Define the model paths
-model_paths = {
-    'audio_sr_decoder': 'audio_sr_decoder.xml',
-    'audio_sr_encoder': 'audio_sr_encoder.xml',
-    'vae_feature_extract': 'vae_feature_extract.xml',
-    'vocoder': 'vocoder.xml'
-}
-def audio_super_resolution(audio_file):
     """
     Performs audio super-resolution on the input audio file.
     """
     waveform, sr = sf.read(audio_file)
     if len(waveform.shape) > 1:
-        waveform = np.mean(waveform, axis=1) # apects mono audio
-    sf.write("input.wav", waveform, sr)
-    # Perform super-resolution
-    output = super_resolution(
-        "input.wav",
-        "output.wav",
-        model_paths=model_paths
     )
-    return "output.wav"
 # Create the Gradio interface
 iface = gr.Interface(
     fn=audio_super_resolution,
-    inputs=gr.Audio(type="filepath", label="Input Audio"),
     outputs=gr.Audio(type="filepath", label="Output Audio"),
-    title="Versatile Audio Super Resolution (OpenVINO)",
-    description="Upload an audio file to perform super-resolution.",
-    examples=[["example.wav"]]
 )
 if __name__ == "__main__":

 import gradio as gr
 import soundfile as sf
 import numpy as np
 import os
+from audiosr import super_resolution # Corrected import
+# Set device to CPU
+os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+def audio_super_resolution(audio_file, guidance_scale, ddim_steps):
     """
     Performs audio super-resolution on the input audio file.
     """
+    # The library expects a file path, so we use the temp path provided by Gradio
     waveform, sr = sf.read(audio_file)
+    # The model works best with mono audio
     if len(waveform.shape) > 1:
+        waveform = np.mean(waveform, axis=1)
+    # Save the processed mono audio to a temporary file
+    temp_input_path = "temp_mono_input.wav"
+    sf.write(temp_input_path, waveform, sr)
+    output_path = "output.wav"
+    # Perform super-resolution using the main function from the library
+    # The function handles model loading and processing.
+    # We specify the device as 'cpu' for Hugging Face Spaces.
+    super_resolution(
+        temp_input_path,
+        output_path,
+        guidance_scale=float(guidance_scale),
+        ddim_steps=int(ddim_steps),
+        device="cpu"
     )
+    return output_path
 # Create the Gradio interface
 iface = gr.Interface(
     fn=audio_super_resolution,
+    inputs=[
+        gr.Audio(type="filepath", label="Input Audio"),
+        gr.Slider(minimum=1.0, maximum=10.0, value=3.5, step=0.1, label="Guidance Scale"),
+        gr.Slider(minimum=10, maximum=200, value=50, step=1, label="DDIM Steps")
+    ],
     outputs=gr.Audio(type="filepath", label="Output Audio"),
+    title="Versatile Audio Super Resolution",
+    description="Upload an audio file to perform super-resolution. This model upscales any audio to 48kHz.",
+    examples=[["example.wav", 3.5, 50]]
 )
 if __name__ == "__main__":