import tempfile
from typing import Optional
import gradio as gr
import numpy as np
from TTS.api import TTS
from huggingface_hub import hf_hub_download
import subprocess


MAX_TXT_LEN = 100
REPO_NAME="DigitalUmuganda/Kinyarwanda_YourTTS"
hf_hub_download(repo_id=REPO_NAME,filename="config.json")
hf_hub_download(repo_id=REPO_NAME,filename="SE_checkpoint.pth.tar")
hf_hub_download(repo_id=REPO_NAME,filename="config_se.json")
hf_hub_download(repo_id=REPO_NAME,filename="model.pth")
hf_hub_download(repo_id=REPO_NAME,filename="speakers.pth")
hf_hub_download(repo_id=REPO_NAME,filename="conditioning_audio.wav")

def generate_audio(text):
    if len(text) > MAX_TXT_LEN:
        text = text[:MAX_TXT_LEN]
        print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
    # model_path, config_path, model_item = manager.download_model(model_name)
    # vocoder_name: Optional[str] = model_item["default_vocoder"]
    # vocoder_path = None
    # vocoder_config_path = None
    # if vocoder_name is not None:
    #     vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
    # synthesizer = Synthesizer(
    #     model_path, config_path, None, None, vocoder_path, vocoder_config_path,
    # )
    # if synthesizer is None:
    #     raise NameError("model not found")
    # tts = TTS(model_path="Kinyarwanda_YourTTS/model.pth",
    #         config_path="Kinyarwanda_YourTTS/config.json",
    #         tts_speakers_file="Kinyarwanda_YourTTS/speakers.pth",
    #         encoder_checkpoint="Kinyarwanda_YourTTS/SE_checkpoint.pth.tar",
    #         encoder_config="Kinyarwanda_YourTTS/config_se.json",)
    # wav = tts.tts(text, speaker_wav="kinyarwanda_YourTTS/conditioning_audio.wav")
    # return wav
    text1 = subprocess.check_output("ls", shell=True)+ subprocess.check_output("ls", shell=True)
    text2 = text1.decode("utf-8")
    return text2
    # with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
    #     synthesizer.save_wav(wav, fp)
    #     return fp.name

iface = gr.Interface(
    fn=generate_audio,
    inputs=[
        gr.inputs.Textbox(
            label="Input Text",
            default="This sentence has been generated by a speech synthesis system.",
        ),
    ],
    # outputs=gr.outputs.Audio(type="numpy",label="Output"),
    outputs=gr.outputs.Textbox(label="Recognized speech from speechbrain model"),
    title="Kinyarwanda tts Demo",
    description="Kinyarwanda tts build with ",
    allow_flagging=False,
    flagging_options=['error', 'bad-quality', 'wrong-pronounciation'],
    layout="vertical",
    live=False
)
iface.launch(share=False)