Spaces:

ahnafsamin
/

GroTTS-FastSpeech2

Build error

File size: 2,232 Bytes

04c250d
 
 
 
 
 
 
293d0d2
 
04c250d
4aa9e1f
 
 
 
 
 
 
293d0d2
4aa9e1f
293d0d2
04c250d
 
 
 
293d0d2
4aa9e1f
 
293d0d2
4aa9e1f
 
 
 
04c250d
293d0d2
04c250d
 
4aa9e1f
293d0d2
4aa9e1f
04c250d
4aa9e1f
 
 
04c250d
4aa9e1f

import gradio as gr
import time
import torch
import scipy.io.wavfile
from espnet2.bin.tts_inference import Text2Speech
from espnet2.utils.types import str_or_none

tagen = "https://huggingface.co/wietsedv/tacotron2-gronings/resolve/main/tts_ljspeech_finetune_tacotron2.v5_train.loss.ave.zip" 
vocoder_tagen = "parallel_wavegan/ljspeech_parallel_wavegan.v3" 

text2speechen = Text2Speech.from_pretrained(
    model_tag=str_or_none(tagen),
    vocoder_tag=str_or_none(vocoder_tagen),
    device="cpu",
    threshold=0.5,
    minlenratio=0.0,
    maxlenratio=10.0,
    use_att_constraint=True,
    backward_window=1,
    forward_window=4,
)

def inference(text,lang):
  with torch.no_grad():
      if lang == "gronings":
          wav = text2speechen(text)["wav"]
          scipy.io.wavfile.write("out.wav",text2speechen.fs , wav.view(-1).cpu().numpy())
         
  return  "out.wav"
title = "ESPnet2-TTS"
description = "Gradio demo for ESPnet2-TTS: Extending the Edge of TTS Research. To use it, simply add your audio, or click one of the examples to load them. Read more at the links below."
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2110.07840' target='_blank'>ESPnet2-TTS: Extending the Edge of TTS Research</a> | <a href='https://github.com/espnet/espnet' target='_blank'>Github Repo</a></p>"

examples=[['This paper describes ESPnet2-TTS, an end-to-end text-to-speech (E2E-TTS) toolkit. ESPnet2-TTS extends our earlier version, ESPnet-TTS, by adding many new features, including: on-the-fly flexible pre-processing, joint training with neural vocoders, and state-of-the-art TTS models with extensions like full-band E2E text-to-waveform modeling, which simplify the training pipeline and further enhance TTS performance. The unified design of our recipes enables users to quickly reproduce state-of-the-art E2E-TTS results',"english"]]

gr.Interface(
    inference, 
    [gr.inputs.Textbox(label="input text",lines=10),gr.inputs.Radio(choices=["gronings"], type="value", default="gronings", label="language")], 
    gr.outputs.Audio(type="file", label="Output"),
    title=title,
    description=description,
    article=article,
    enable_queue=True,
    examples=examples
    ).launch(debug=True)