Spaces:

ahnafsamin
/

GroTTS-FastSpeech2

Build error

File size: 1,270 Bytes

04c250d
 
f6fdf7a
 
 
04c250d
 
 
 
 
f6fdf7a
39d41fb
f6fdf7a
 
 
 
 
 
 
 
04c250d
 
 
 
293d0d2
f6fdf7a
 
 
d3fe331
04c250d
f6fdf7a
 
 
 
04c250d
 
f6fdf7a
cc8e5e7
f6fdf7a
04c250d
 
f6fdf7a

import gradio as gr
import time
import urllib.request
from pathlib import Path
import os
import torch
import scipy.io.wavfile
from espnet2.bin.tts_inference import Text2Speech
from espnet2.utils.types import str_or_none

gos_text2speech = Text2Speech.from_pretrained(
   model_tag="https://huggingface.co/ahnafsamin/FastSpeech2-gronings/blob/main/train.loss.ave_5best.pth",
   vocoder_tag="parallel_wavegan/ljspeech_parallel_wavegan.v3",
   device="cpu",
   threshold=0.5,
   minlenratio=0.0,
   maxlenratio=10.0,
   use_att_constraint=True,
   backward_window=1,
   forward_window=4,
)

def inference(text,lang):
  with torch.no_grad():
      if lang == "gronings":
          wav = gos_text2speech(text)["wav"]
          scipy.io.wavfile.write("out.wav", gos_text2speech.fs , wav.view(-1).cpu().numpy())

  return  "out.wav", "out.wav"

title = "GroTTS"
examples = [
  ['Ze gingen mit klas noar waddendiek, over en deur bragel lopen.', 'gronings']
]

gr.Interface(
    inference,
    [gr.inputs.Textbox(label="input text", lines=3), gr.inputs.Radio(choices=["gronings"], type="value", default="gronings", label="language")], 
    [gr.outputs.Audio(type="file", label="Output"), gr.outputs.File()],
    title=title,
    examples=examples
    ).launch(enable_queue=True)