rutsam's picture
use 1000 words
99a10fe
raw
history blame
2.43 kB
import tempfile
from typing import Optional
import gradio as gr
import subprocess
import numpy as np
from TTS.utils.synthesizer import Synthesizer
MAX_TXT_LEN = 1000
subprocess.check_output("git lfs install", shell=True)
subprocess.check_output("git clone https://huggingface.co/DigitalUmuganda/Kinyarwanda_YourTTS",
shell=True)
def generate_audio(text):
if len(text) > MAX_TXT_LEN:
text = text[:MAX_TXT_LEN]
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
# model_path, config_path, model_item = manager.download_model(model_name)
# vocoder_name: Optional[str] = model_item["default_vocoder"]
# vocoder_path = None
# vocoder_config_path = None
# if vocoder_name is not None:
# vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
# synthesizer = Synthesizer(
# model_path, config_path, None, None, vocoder_path, vocoder_config_path,
# )
# if synthesizer is None:
# raise NameError("model not found")
#tts_engine= TextToSpeech()
# text1 = subprocess.check_output("pwd", shell=True)+ subprocess.check_output("ls Kinyarwanda_YourTTS", shell=True)
# text2 = text1.decode("utf-8")
# return text2
synthesizer = Synthesizer("./Kinyarwanda_YourTTS/model.pth",
"Kinyarwanda_YourTTS/config.json",
tts_speakers_file="Kinyarwanda_YourTTS/speakers.pth",
encoder_checkpoint="Kinyarwanda_YourTTS/SE_checkpoint.pth.tar",
encoder_config="Kinyarwanda_YourTTS/config_se.json",)
wav = synthesizer.tts(text, speaker_wav="Kinyarwanda_YourTTS/conditioning_audio.wav")
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
synthesizer.save_wav(wav, fp)
return fp.name
iface = gr.Interface(
fn=generate_audio,
inputs=[
gr.inputs.Textbox(
label="Input Text",
default="This sentence has been generated by a speech synthesis system.",
),
],
outputs=gr.outputs.Audio(type="filepath",label="Output"),
#outputs=gr.outputs.Textbox(label="Recognized speech from speechbrain model"),
title="Kinyarwanda tts Demo",
description="Kinyarwanda tts build with ",
allow_flagging=False,
flagging_options=['error', 'bad-quality', 'wrong-pronounciation'],
layout="vertical",
live=False
)
iface.launch(share=False)