import tempfile from typing import Optional import gradio as gr import numpy as np from TTS.api import TTS from huggingface_hub import hf_hub_download import subprocess MAX_TXT_LEN = 100 REPO_NAME="DigitalUmuganda/Kinyarwanda_YourTTS" hf_hub_download(repo_id=REPO_NAME,filename="config.json") hf_hub_download(repo_id=REPO_NAME,filename="SE_checkpoint.pth.tar") hf_hub_download(repo_id=REPO_NAME,filename="config_se.json") hf_hub_download(repo_id=REPO_NAME,filename="model.pth") hf_hub_download(repo_id=REPO_NAME,filename="speakers.pth") hf_hub_download(repo_id=REPO_NAME,filename="conditioning_audio.wav") def generate_audio(text): if len(text) > MAX_TXT_LEN: text = text[:MAX_TXT_LEN] print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.") # model_path, config_path, model_item = manager.download_model(model_name) # vocoder_name: Optional[str] = model_item["default_vocoder"] # vocoder_path = None # vocoder_config_path = None # if vocoder_name is not None: # vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name) # synthesizer = Synthesizer( # model_path, config_path, None, None, vocoder_path, vocoder_config_path, # ) # if synthesizer is None: # raise NameError("model not found") # tts = TTS(model_path="Kinyarwanda_YourTTS/model.pth", # config_path="Kinyarwanda_YourTTS/config.json", # tts_speakers_file="Kinyarwanda_YourTTS/speakers.pth", # encoder_checkpoint="Kinyarwanda_YourTTS/SE_checkpoint.pth.tar", # encoder_config="Kinyarwanda_YourTTS/config_se.json",) # wav = tts.tts(text, speaker_wav="kinyarwanda_YourTTS/conditioning_audio.wav") # return wav text1 = subprocess.check_output("ls", shell=True)+ subprocess.check_output("ls", shell=True) text2 = text1.decode("utf-8") return text2 # with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: # synthesizer.save_wav(wav, fp) # return fp.name iface = gr.Interface( fn=generate_audio, inputs=[ gr.inputs.Textbox( label="Input Text", default="This sentence has been generated by a speech synthesis system.", ), ], # outputs=gr.outputs.Audio(type="numpy",label="Output"), outputs=gr.outputs.Textbox(label="Recognized speech from speechbrain model"), title="Kinyarwanda tts Demo", description="Kinyarwanda tts build with ", allow_flagging=False, flagging_options=['error', 'bad-quality', 'wrong-pronounciation'], layout="vertical", live=False ) iface.launch(share=False)