Spaces:

Slava917
/

pronunciation-trainer

Runtime error

File size: 1,593 Bytes

25b92d1
877be96
b150df1
 
ed47f0e
 
 
 
 
9719ddd
7432f05
 
 
9719ddd
ed47f0e
 
 
b150df1
ed47f0e

import pandas as pd
import gradio as gr
import torch
import torchaudio
import warnings
from cryptography.utils import CryptographyDeprecationWarning
with warnings.catch_warnings():
    warnings.filterwarnings('ignore', category=CryptographyDeprecationWarning)
    import paramiko


df= pd.read_csv('native_words_subset.csv')

torch._C._jit_override_can_fuse_on_cpu(False)
torch._C._jit_override_can_fuse_on_gpu(False)
torch._C._jit_set_texpr_fuser_enabled(False)
torch._C._jit_set_nvfuser_enabled(False)

loader = torch.jit.load("audio_loader.pt")
model = torch.jit.load('QuartzNet_thunderspeech_3.pt')

vocab = model.text_transform.vocab.itos
vocab[-1] = ''

def convert_probs(probs):
  ids = probs.argmax(1)[0]
  s = []
  if vocab[ids[0]]: s.append(vocab[ids[0]])
  for i in range(1,len(ids)):
    if ids[i-1] != ids[i]:
      new = vocab[ids[i]]
      if new: s.append(new)
  #return '.'.join(s)
  return s
 
  
def predict(path):
  audio = loader(path)
  probs = model(audio, torch.tensor(audio.shape[0] * [audio.shape[-1]], device=audio.device))[0]
  return convert_probs(probs)
 
 
from difflib import SequenceMatcher

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()
    
def compare (word_choice, path):
  etalon = df.loc[df['replica'] == word_choice, 'transcription'].values[0]
  user = predict(path)
  similar(user, etalon) 


word_choice = gr.inputs.Dropdown(list(df['replica'].unique()), label="Choose a word")

gr.Interface(fn=compare, inputs=[gr.inputs.Audio(source='microphone', type='filepath', optional=True), word_choice], outputs= 'text').launch(debug=True)