vericudebuget commited on
Commit
1bb2233
·
verified ·
1 Parent(s): 223908c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -28
app.py CHANGED
@@ -1,49 +1,54 @@
1
  import gradio as gr
2
  import soundfile as sf
3
- from huggingface_hub import hf_hub_download
4
- from audiosr.inference import super_resolution
5
  import numpy as np
6
  import os
 
7
 
8
- # Download the OpenVINO models
9
- hf_hub_download(repo_id="Intel/versatile_audio_super_resolution_openvino", filename="versatile_audio_sr_base_openvino_models.zip", local_dir=".")
10
- import zipfile
11
- with zipfile.ZipFile("versatile_audio_sr_base_openvino_models.zip", 'r') as zip_ref:
12
- zip_ref.extractall(".")
13
 
14
- # Define the model paths
15
- model_paths = {
16
- 'audio_sr_decoder': 'audio_sr_decoder.xml',
17
- 'audio_sr_encoder': 'audio_sr_encoder.xml',
18
- 'vae_feature_extract': 'vae_feature_extract.xml',
19
- 'vocoder': 'vocoder.xml'
20
- }
21
-
22
- def audio_super_resolution(audio_file):
23
  """
24
  Performs audio super-resolution on the input audio file.
25
  """
 
26
  waveform, sr = sf.read(audio_file)
 
 
27
  if len(waveform.shape) > 1:
28
- waveform = np.mean(waveform, axis=1) # apects mono audio
29
- sf.write("input.wav", waveform, sr)
 
 
 
 
 
30
 
31
- # Perform super-resolution
32
- output = super_resolution(
33
- "input.wav",
34
- "output.wav",
35
- model_paths=model_paths
 
 
 
 
36
  )
37
- return "output.wav"
 
38
 
39
  # Create the Gradio interface
40
  iface = gr.Interface(
41
  fn=audio_super_resolution,
42
- inputs=gr.Audio(type="filepath", label="Input Audio"),
 
 
 
 
43
  outputs=gr.Audio(type="filepath", label="Output Audio"),
44
- title="Versatile Audio Super Resolution (OpenVINO)",
45
- description="Upload an audio file to perform super-resolution.",
46
- examples=[["example.wav"]]
47
  )
48
 
49
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import soundfile as sf
 
 
3
  import numpy as np
4
  import os
5
+ from audiosr import super_resolution # Corrected import
6
 
7
+ # Set device to CPU
8
+ os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 
 
 
9
 
10
+ def audio_super_resolution(audio_file, guidance_scale, ddim_steps):
 
 
 
 
 
 
 
 
11
  """
12
  Performs audio super-resolution on the input audio file.
13
  """
14
+ # The library expects a file path, so we use the temp path provided by Gradio
15
  waveform, sr = sf.read(audio_file)
16
+
17
+ # The model works best with mono audio
18
  if len(waveform.shape) > 1:
19
+ waveform = np.mean(waveform, axis=1)
20
+
21
+ # Save the processed mono audio to a temporary file
22
+ temp_input_path = "temp_mono_input.wav"
23
+ sf.write(temp_input_path, waveform, sr)
24
+
25
+ output_path = "output.wav"
26
 
27
+ # Perform super-resolution using the main function from the library
28
+ # The function handles model loading and processing.
29
+ # We specify the device as 'cpu' for Hugging Face Spaces.
30
+ super_resolution(
31
+ temp_input_path,
32
+ output_path,
33
+ guidance_scale=float(guidance_scale),
34
+ ddim_steps=int(ddim_steps),
35
+ device="cpu"
36
  )
37
+
38
+ return output_path
39
 
40
  # Create the Gradio interface
41
  iface = gr.Interface(
42
  fn=audio_super_resolution,
43
+ inputs=[
44
+ gr.Audio(type="filepath", label="Input Audio"),
45
+ gr.Slider(minimum=1.0, maximum=10.0, value=3.5, step=0.1, label="Guidance Scale"),
46
+ gr.Slider(minimum=10, maximum=200, value=50, step=1, label="DDIM Steps")
47
+ ],
48
  outputs=gr.Audio(type="filepath", label="Output Audio"),
49
+ title="Versatile Audio Super Resolution",
50
+ description="Upload an audio file to perform super-resolution. This model upscales any audio to 48kHz.",
51
+ examples=[["example.wav", 3.5, 50]]
52
  )
53
 
54
  if __name__ == "__main__":