Spaces:
Sleeping
Sleeping
File size: 8,324 Bytes
7c7f25a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
import tempfile
from typing import Optional
from TTS.config import load_config
import gradio as gr
import numpy as np
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
# Classe estendida para confirmar automaticamente os termos de uso
class CustomModelManager(ModelManager):
def ask_tos(self, output_path):
print("This sentence has been generated by a speech synthesis system. tts_models/multilingual/multi-dataset/xtts_v1.1")
print(" > You must confirm the following:")
print(' | > "I have purchased a commercial license from Coqui: [email protected]"')
print(' | > "Otherwise, I agree to the terms of the non-commercial CPML: https://coqui.ai/cpml" - [y/n]')
answer = 'y' # Automaticamente aceita os termos
return answer.lower() == 'y'
# Substituir o gerenciador padrão pelo personalizado
manager = CustomModelManager()
MODELS = {}
SPEAKERS = {}
MAX_TXT_LEN = 100
MODEL_NAMES = manager.list_tts_models()
# filter out multi-speaker models and slow wavegrad vocoders
filters = ["vctk", "your_tts", "ek1"]
MODEL_NAMES = [model_name for model_name in MODEL_NAMES if not any(f in model_name for f in filters)]
EN = [el for el in MODEL_NAMES if "/en/" in el]
OTHER = [el for el in MODEL_NAMES if "/en/" not in el]
EN[0], EN[5] = EN[5], EN[0]
MODEL_NAMES = EN + OTHER
# reorder models
print(MODEL_NAMES)
def tts(text: str, model_name: str):
if len(text) > MAX_TXT_LEN:
text = text[:MAX_TXT_LEN]
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
print(text, model_name)
# download model
model_path, config_path, model_item = manager.download_model(model_name)
print(f"Model path: {model_path}")
print(f"Config path: {config_path}")
print(f"Model item: {model_item}")
if config_path is None and 'config.json' in model_item['hf_url']:
config_url = model_item['hf_url'][3] # Assuming the 4th URL is always the config.json
config_path = manager.download_from_url(config_url, model_name)
print(f"Downloaded config path: {config_path}")
if model_path is None or config_path is None:
raise ValueError("Model path or config path is None")
vocoder_name: Optional[str] = model_item["default_vocoder"]
# download vocoder
vocoder_path = None
vocoder_config_path = None
if vocoder_name is not None:
vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
# init synthesizer
synthesizer = Synthesizer(
model_path, config_path, None, None, vocoder_path, vocoder_config_path,
)
# synthesize
if synthesizer is None:
raise NameError("model not found")
wavs = synthesizer.tts(text, None)
# return output
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
synthesizer.save_wav(wavs, fp)
return fp.name
title = """<h1 align="center">🐸💬 CoquiTTS Playground </h1>"""
with gr.Blocks(analytics_enabled=False) as demo:
with gr.Row():
with gr.Column():
gr.Markdown(
"""
## <img src="https://raw.githubusercontent.com/coqui-ai/TTS/main/images/coqui-log-green-TTS.png" height="56"/>
"""
)
gr.Markdown(
"""
<br />
## 🐸Coqui.ai News
- 📣 ⓍTTS, our production TTS model that can speak 13 languages, is released [Blog Post](https://coqui.ai/blog/tts/open_xtts), [Demo](https://huggingface.co/spaces/coqui/xtts), [Docs](https://tts.readthedocs.io/en/dev/models/xtts.html)
- 📣 [🐶Bark](https://github.com/suno-ai/bark) is now available for inference with unconstrained voice cloning. [Docs](https://tts.readthedocs.io/en/dev/models/bark.html)
- 📣 You can use [~1100 Fairseq models](https://github.com/facebookresearch/fairseq/tree/main/examples/mms) with 🐸TTS.
- 📣 🐸TTS now supports 🐢Tortoise with faster inference. [Docs](https://tts.readthedocs.io/en/dev/models/tortoise.html)
- 📣 **Coqui Studio API** is landed on 🐸TTS. - [Example](https://github.com/coqui-ai/TTS/blob/dev/README.md#-python-api)
- 📣 [**Coqui Studio API**](https://docs.coqui.ai/docs) is live.
- 📣 Voice generation with prompts - **Prompt to Voice** - is live on [**Coqui Studio**](https://app.coqui.ai/auth/signin)!! - [Blog Post](https://coqui.ai/blog/tts/prompt-to-voice)
- 📣 Voice generation with fusion - **Voice fusion** - is live on [**Coqui Studio**](https://app.coqui.ai/auth/signin).
- 📣 Voice cloning is live on [**Coqui Studio**](https://app.coqui.ai/auth/signin).
<br>
"""
)
with gr.Column():
gr.Markdown(
"""
<br/>
💻 This space showcases some of the **[CoquiTTS](https://github.com/coqui-ai/TTS)** models.
<br/>
There are > 30 languages with single and multi speaker models, all thanks to our 👑 Contributors.
<br/>
Visit the links below for more.
| | |
| ------------------------------- | --------------------------------------- |
| 🐸💬 **CoquiTTS** | [Github](https://github.com/coqui-ai/TTS) |
| 💼 **Documentation** | [ReadTheDocs](https://tts.readthedocs.io/en/latest/)
| 👩💻 **Questions** | [GitHub Discussions] |
| 🗯 **Community** | [](https://discord.gg/5eXr5seRrv) |
[github issue tracker]: https://github.com/coqui-ai/tts/issues
[github discussions]: https://github.com/coqui-ai/TTS/discussions
[discord]: https://discord.gg/5eXr5seRrv
"""
)
with gr.Row():
gr.Markdown(
"""
<details>
<summary>👑 Model contributors</summary>
- <a href="https://github.com/nmstoker/" target="_blank">@nmstoker</a>
- <a href="https://github.com/kaiidams/" target="_blank">@kaiidams</a>
- <a href="https://github.com/WeberJulian/" target="_blank">@WeberJulian,</a>
- <a href="https://github.com/Edresson/" target="_blank">@Edresson</a>
- <a href="https://github.com/thorstenMueller/" target="_blank">@thorstenMueller</a>
- <a href="https://github.com/r-dh/" target="_blank">@r-dh</a>
- <a href="https://github.com/kirianguiller/" target="_blank">@kirianguiller</a>
- <a href="https://github.com/robinhad/" target="_blank">@robinhad</a>
- <a href="https://github.com/fkarabiber/" target="_blank">@fkarabiber</a>
- <a href="https://github.com/nicolalandro/" target="_blank">@nicolalandro</a>
- <a href="https://github.com/a-froghyar" target="_blank">@a-froghyar</a>
- <a href="https://github.com/manmay-nakhashi" target="_blank">@manmay-nakhashi</a>
- <a href="https://github.com/noml4u" target="_blank">@noml4u</a>
</details>
<br/>
"""
)
with gr.Row():
with gr.Column():
input_text = gr.inputs.Textbox(
label="Input Text",
default="This sentence has been generated by a speech synthesis system.",
)
model_select = gr.inputs.Dropdown(
label="Pick Model: tts_models/<language>/<dataset>/<model_name>",
choices=MODEL_NAMES,
default="tts_models/en/jenny/jenny"
)
tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
with gr.Column():
output_audio = gr.outputs.Audio(label="Output", type="filepath")
tts_button.click(
tts,
inputs=[
input_text,
model_select,
],
outputs=[output_audio],
)
demo.queue(concurrency_count=16).launch(debug=True)
|