rutsam's picture
upload file by file
a33e730
raw
history blame
2.65 kB
import tempfile
from typing import Optional
import gradio as gr
import numpy as np
from TTS.api import TTS
from huggingface_hub import hf_hub_download
import subprocess
MAX_TXT_LEN = 100
REPO_NAME="DigitalUmuganda/Kinyarwanda_YourTTS"
hf_hub_download(repo_id=REPO_NAME,filename="config.json")
hf_hub_download(repo_id=REPO_NAME,filename="SE_checkpoint.pth.tar")
hf_hub_download(repo_id=REPO_NAME,filename="config_se.json")
hf_hub_download(repo_id=REPO_NAME,filename="model.pth")
hf_hub_download(repo_id=REPO_NAME,filename="speakers.pth")
hf_hub_download(repo_id=REPO_NAME,filename="conditioning_audio.wav")
def generate_audio(text):
if len(text) > MAX_TXT_LEN:
text = text[:MAX_TXT_LEN]
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
# model_path, config_path, model_item = manager.download_model(model_name)
# vocoder_name: Optional[str] = model_item["default_vocoder"]
# vocoder_path = None
# vocoder_config_path = None
# if vocoder_name is not None:
# vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
# synthesizer = Synthesizer(
# model_path, config_path, None, None, vocoder_path, vocoder_config_path,
# )
# if synthesizer is None:
# raise NameError("model not found")
# tts = TTS(model_path="Kinyarwanda_YourTTS/model.pth",
# config_path="Kinyarwanda_YourTTS/config.json",
# tts_speakers_file="Kinyarwanda_YourTTS/speakers.pth",
# encoder_checkpoint="Kinyarwanda_YourTTS/SE_checkpoint.pth.tar",
# encoder_config="Kinyarwanda_YourTTS/config_se.json",)
# wav = tts.tts(text, speaker_wav="kinyarwanda_YourTTS/conditioning_audio.wav")
# return wav
text1 = subprocess.check_output("ls", shell=True)+ subprocess.check_output("ls", shell=True)
text2 = text1.decode("utf-8")
return text2
# with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
# synthesizer.save_wav(wav, fp)
# return fp.name
iface = gr.Interface(
fn=generate_audio,
inputs=[
gr.inputs.Textbox(
label="Input Text",
default="This sentence has been generated by a speech synthesis system.",
),
],
# outputs=gr.outputs.Audio(type="numpy",label="Output"),
outputs=gr.outputs.Textbox(label="Recognized speech from speechbrain model"),
title="Kinyarwanda tts Demo",
description="Kinyarwanda tts build with ",
allow_flagging=False,
flagging_options=['error', 'bad-quality', 'wrong-pronounciation'],
layout="vertical",
live=False
)
iface.launch(share=False)