Spaces:

junseok520
/

VoxSIM

Running

junseok commited on Mar 4

Commit

ce904ba

1 Parent(s): f96e2ca

new commit

Files changed (3) hide show

app.py CHANGED Viewed

@@ -3,28 +3,37 @@ from predict import loadWav
 import torch
 import torch.nn.functional as F
 import gradio as gr
 model = load_model("wavlm_ecapa.model")
 model.eval()
 def calc_voxsim(inp_path, ref_path):
     inp_wavs, inp_wav = loadWav(inp_path)
     ref_wavs, ref_wav = loadWav(ref_path)
     inp_wavs = torch.FloatTensor(inp_wavs)
     inp_wav = torch.FloatTensor(inp_wav)
     ref_wavs = torch.FloatTensor(ref_wavs)
     ref_wav = torch.FloatTensor(ref_wav)
     with torch.no_grad():
         input_emb_1 = F.normalize(model.forward(inp_wavs), p=2, dim=1)
         input_emb_2 = F.normalize(model.forward(inp_wav), p=2, dim=1)
         ref_emb_1 = F.normalize(model.forward(ref_wavs), p=2, dim=1)
         ref_emb_2 = F.normalize(model.forward(ref_wav), p=2, dim=1)
         score_1 = torch.mean(torch.matmul(input_emb_1, ref_emb_1.T))
         score_2 = torch.mean(torch.matmul(input_emb_2, ref_emb_2.T))
         score = (score_1 + score_2) / 2
         return score.detach().cpu().numpy()
 description = """

 import torch
 import torch.nn.functional as F
 import gradio as gr
+import time
 model = load_model("wavlm_ecapa.model")
 model.eval()
 def calc_voxsim(inp_path, ref_path):
+    start = time.time()
     inp_wavs, inp_wav = loadWav(inp_path)
     ref_wavs, ref_wav = loadWav(ref_path)
+    print("loadWav time: ", time.time() - start)
     inp_wavs = torch.FloatTensor(inp_wavs)
     inp_wav = torch.FloatTensor(inp_wav)
     ref_wavs = torch.FloatTensor(ref_wavs)
     ref_wav = torch.FloatTensor(ref_wav)
+    print("torch.FloatTensor time: ", time.time() - start)
     with torch.no_grad():
         input_emb_1 = F.normalize(model.forward(inp_wavs), p=2, dim=1)
+        print("input_emb_1 time: ", time.time() - start)
         input_emb_2 = F.normalize(model.forward(inp_wav), p=2, dim=1)
+        print("input_emb_2 time: ", time.time() - start)
         ref_emb_1 = F.normalize(model.forward(ref_wavs), p=2, dim=1)
+        print("ref_emb_1 time: ", time.time() - start)
         ref_emb_2 = F.normalize(model.forward(ref_wav), p=2, dim=1)
+        print("ref_emb_2 time: ", time.time() - start)
         score_1 = torch.mean(torch.matmul(input_emb_1, ref_emb_1.T))
         score_2 = torch.mean(torch.matmul(input_emb_2, ref_emb_2.T))
         score = (score_1 + score_2) / 2
+        print("score time: ", time.time() - start)
         return score.detach().cpu().numpy()
 description = """

predict.py CHANGED Viewed

@@ -34,6 +34,7 @@ def loadWav(filename, max_frames: int = 400):
     if type(filename) == tuple:
         sr, audio = filename
         audio = librosa.util.normalize(audio)
     else:
         audio, sr = librosa.load(filename, sr=16000)
     audio_org = audio.copy()

     if type(filename) == tuple:
         sr, audio = filename
         audio = librosa.util.normalize(audio)
+        print(numpy.linalg.norm(audio))
     else:
         audio, sr = librosa.load(filename, sr=16000)
     audio_org = audio.copy()

score.py CHANGED Viewed

@@ -25,6 +25,11 @@ def load_parameters(model, ckpt_path):
                 model_state[name[6:]].copy_(param)
             else:
                 print("{} is not in the model.".format(name[6:]))
 class Score:

                 model_state[name[6:]].copy_(param)
             else:
                 print("{} is not in the model.".format(name[6:]))
+        else:
+            if name in model_state:
+                model_state[name].copy_(param)
+            else:
+                print("{} is not in the model.".format(name))
 class Score: