junseok commited on
Commit
f96e2ca
·
1 Parent(s): f8273e8

new commit

Browse files
Files changed (2) hide show
  1. app.py +2 -2
  2. predict.py +5 -2
app.py CHANGED
@@ -7,7 +7,7 @@ import gradio as gr
7
  model = load_model("wavlm_ecapa.model")
8
  model.eval()
9
 
10
- def calc_spksim(inp_path, ref_path):
11
  inp_wavs, inp_wav = loadWav(inp_path)
12
  ref_wavs, ref_wav = loadWav(ref_path)
13
 
@@ -35,7 +35,7 @@ Paper is available [here](https://arxiv.org/abs/2407.18505)
35
  """
36
 
37
  iface = gr.Interface(
38
- fn=calc_spksim,
39
  inputs=(
40
  gr.Audio(label="Input Audio"),
41
  gr.Audio(label="Reference Audio")
 
7
  model = load_model("wavlm_ecapa.model")
8
  model.eval()
9
 
10
+ def calc_voxsim(inp_path, ref_path):
11
  inp_wavs, inp_wav = loadWav(inp_path)
12
  ref_wavs, ref_wav = loadWav(ref_path)
13
 
 
35
  """
36
 
37
  iface = gr.Interface(
38
+ fn=calc_voxsim,
39
  inputs=(
40
  gr.Audio(label="Input Audio"),
41
  gr.Audio(label="Reference Audio")
predict.py CHANGED
@@ -31,8 +31,11 @@ def loadWav(filename, max_frames: int = 400):
31
  max_audio = max_frames * 160 + 240
32
 
33
  # Read wav file and convert to torch tensor
34
- print(type(filename))
35
- audio, sr = librosa.load(filename, sr=16000)
 
 
 
36
  audio_org = audio.copy()
37
 
38
  audiosize = audio.shape[0]
 
31
  max_audio = max_frames * 160 + 240
32
 
33
  # Read wav file and convert to torch tensor
34
+ if type(filename) == tuple:
35
+ sr, audio = filename
36
+ audio = librosa.util.normalize(audio)
37
+ else:
38
+ audio, sr = librosa.load(filename, sr=16000)
39
  audio_org = audio.copy()
40
 
41
  audiosize = audio.shape[0]