Spaces:
Running
Running
junseok
commited on
Commit
·
ce904ba
1
Parent(s):
f96e2ca
new commit
Browse files- app.py +9 -0
- predict.py +1 -0
- score.py +5 -0
app.py
CHANGED
@@ -3,28 +3,37 @@ from predict import loadWav
|
|
3 |
import torch
|
4 |
import torch.nn.functional as F
|
5 |
import gradio as gr
|
|
|
6 |
|
7 |
model = load_model("wavlm_ecapa.model")
|
8 |
model.eval()
|
9 |
|
10 |
def calc_voxsim(inp_path, ref_path):
|
|
|
11 |
inp_wavs, inp_wav = loadWav(inp_path)
|
12 |
ref_wavs, ref_wav = loadWav(ref_path)
|
|
|
13 |
|
14 |
inp_wavs = torch.FloatTensor(inp_wavs)
|
15 |
inp_wav = torch.FloatTensor(inp_wav)
|
16 |
ref_wavs = torch.FloatTensor(ref_wavs)
|
17 |
ref_wav = torch.FloatTensor(ref_wav)
|
|
|
18 |
|
19 |
with torch.no_grad():
|
20 |
input_emb_1 = F.normalize(model.forward(inp_wavs), p=2, dim=1)
|
|
|
21 |
input_emb_2 = F.normalize(model.forward(inp_wav), p=2, dim=1)
|
|
|
22 |
ref_emb_1 = F.normalize(model.forward(ref_wavs), p=2, dim=1)
|
|
|
23 |
ref_emb_2 = F.normalize(model.forward(ref_wav), p=2, dim=1)
|
|
|
24 |
|
25 |
score_1 = torch.mean(torch.matmul(input_emb_1, ref_emb_1.T))
|
26 |
score_2 = torch.mean(torch.matmul(input_emb_2, ref_emb_2.T))
|
27 |
score = (score_1 + score_2) / 2
|
|
|
28 |
return score.detach().cpu().numpy()
|
29 |
|
30 |
description = """
|
|
|
3 |
import torch
|
4 |
import torch.nn.functional as F
|
5 |
import gradio as gr
|
6 |
+
import time
|
7 |
|
8 |
model = load_model("wavlm_ecapa.model")
|
9 |
model.eval()
|
10 |
|
11 |
def calc_voxsim(inp_path, ref_path):
|
12 |
+
start = time.time()
|
13 |
inp_wavs, inp_wav = loadWav(inp_path)
|
14 |
ref_wavs, ref_wav = loadWav(ref_path)
|
15 |
+
print("loadWav time: ", time.time() - start)
|
16 |
|
17 |
inp_wavs = torch.FloatTensor(inp_wavs)
|
18 |
inp_wav = torch.FloatTensor(inp_wav)
|
19 |
ref_wavs = torch.FloatTensor(ref_wavs)
|
20 |
ref_wav = torch.FloatTensor(ref_wav)
|
21 |
+
print("torch.FloatTensor time: ", time.time() - start)
|
22 |
|
23 |
with torch.no_grad():
|
24 |
input_emb_1 = F.normalize(model.forward(inp_wavs), p=2, dim=1)
|
25 |
+
print("input_emb_1 time: ", time.time() - start)
|
26 |
input_emb_2 = F.normalize(model.forward(inp_wav), p=2, dim=1)
|
27 |
+
print("input_emb_2 time: ", time.time() - start)
|
28 |
ref_emb_1 = F.normalize(model.forward(ref_wavs), p=2, dim=1)
|
29 |
+
print("ref_emb_1 time: ", time.time() - start)
|
30 |
ref_emb_2 = F.normalize(model.forward(ref_wav), p=2, dim=1)
|
31 |
+
print("ref_emb_2 time: ", time.time() - start)
|
32 |
|
33 |
score_1 = torch.mean(torch.matmul(input_emb_1, ref_emb_1.T))
|
34 |
score_2 = torch.mean(torch.matmul(input_emb_2, ref_emb_2.T))
|
35 |
score = (score_1 + score_2) / 2
|
36 |
+
print("score time: ", time.time() - start)
|
37 |
return score.detach().cpu().numpy()
|
38 |
|
39 |
description = """
|
predict.py
CHANGED
@@ -34,6 +34,7 @@ def loadWav(filename, max_frames: int = 400):
|
|
34 |
if type(filename) == tuple:
|
35 |
sr, audio = filename
|
36 |
audio = librosa.util.normalize(audio)
|
|
|
37 |
else:
|
38 |
audio, sr = librosa.load(filename, sr=16000)
|
39 |
audio_org = audio.copy()
|
|
|
34 |
if type(filename) == tuple:
|
35 |
sr, audio = filename
|
36 |
audio = librosa.util.normalize(audio)
|
37 |
+
print(numpy.linalg.norm(audio))
|
38 |
else:
|
39 |
audio, sr = librosa.load(filename, sr=16000)
|
40 |
audio_org = audio.copy()
|
score.py
CHANGED
@@ -25,6 +25,11 @@ def load_parameters(model, ckpt_path):
|
|
25 |
model_state[name[6:]].copy_(param)
|
26 |
else:
|
27 |
print("{} is not in the model.".format(name[6:]))
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
|
30 |
class Score:
|
|
|
25 |
model_state[name[6:]].copy_(param)
|
26 |
else:
|
27 |
print("{} is not in the model.".format(name[6:]))
|
28 |
+
else:
|
29 |
+
if name in model_state:
|
30 |
+
model_state[name].copy_(param)
|
31 |
+
else:
|
32 |
+
print("{} is not in the model.".format(name))
|
33 |
|
34 |
|
35 |
class Score:
|