Spaces:
Runtime error
Runtime error
| import io | |
| import os | |
| import tempfile | |
| from typing import List | |
| import TTS.api | |
| import TTS.utils.manage as manage | |
| import torch | |
| from pydub import AudioSegment | |
| import gradio as gr | |
| import config | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| def ask_tos_patch(self, output_path): | |
| print("Automatically accepting the terms of service.") | |
| return True | |
| manage.ModelManager.ask_tos = ask_tos_patch | |
| tts = TTS.api.TTS() | |
| models = {} | |
| for id, model in config.models.items(): | |
| tts.download_model_by_name(model) | |
| models[id] = TTS.api.TTS(model).to(device) | |
| def synthesize_tts( | |
| text: str = 'Hello, World!', | |
| speaker_wavs: List[gr.File] = None, | |
| speaker_idx: str = 'Ana Florence', | |
| language: str = 'ja', | |
| temperature: float = 0.65, | |
| length_penalty: float = 1.0, | |
| repetition_penalty: float = 1.9, | |
| top_k: int = 50, | |
| top_p: float = 0.8, | |
| speed: float = 1.0, | |
| enable_text_splitting: bool = True, | |
| ): | |
| temp_files = [] | |
| try: | |
| if speaker_wavs: | |
| for speaker_wav in speaker_wavs: | |
| with open(speaker_wav.name, "rb") as f: | |
| speaker_wav_bytes = f.read() | |
| try: | |
| audio = AudioSegment.from_file(io.BytesIO(speaker_wav_bytes)) | |
| wav_buffer = io.BytesIO() | |
| audio.export(wav_buffer, format="wav") | |
| wav_buffer.seek(0) | |
| except Exception as e: | |
| return f"Error processing audio file: {e}" | |
| temp_wav_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) | |
| temp_wav_file.write(wav_buffer.read()) | |
| temp_wav_file.close() | |
| temp_files.append(temp_wav_file.name) | |
| output_buffer = io.BytesIO() | |
| if temp_files: | |
| models['multi'].tts_to_file( | |
| text=text, | |
| speaker_wav=temp_files, | |
| language=language, | |
| file_path=output_buffer, | |
| temperature=temperature, | |
| length_penalty=length_penalty, | |
| repetition_penalty=repetition_penalty, | |
| top_k=top_k, | |
| top_p=top_p, | |
| speed=speed, | |
| enable_text_splitting=enable_text_splitting | |
| ) | |
| else: | |
| models['multi'].tts_to_file( | |
| text=text, | |
| speaker=speaker_idx, | |
| language=language, | |
| file_path=output_buffer, | |
| temperature=temperature, | |
| length_penalty=length_penalty, | |
| repetition_penalty=repetition_penalty, | |
| top_k=top_k, | |
| top_p=top_p, | |
| speed=speed, | |
| enable_text_splitting=enable_text_splitting | |
| ) | |
| output_buffer.seek(0) | |
| return output_buffer.read() | |
| finally: | |
| for temp_file in temp_files: | |
| if isinstance(temp_file, str) and os.path.exists(temp_file): | |
| os.remove(temp_file) | |
| inputs = [ | |
| gr.Textbox(value="Hello, World!", label="Text to Synthesize"), | |
| gr.File(file_types=["audio"], label="Speaker WAV files (optional)", file_count="multiple"), | |
| gr.Dropdown( | |
| choices=[ | |
| "Claribel Dervla", "Daisy Studious", "Gracie Wise", "Tammie Ema", "Alison Dietlinde", "Ana Florence", | |
| "Annmarie Nele", "Asya Anara", "Brenda Stern", "Gitta Nikolina", "Henriette Usha", "Sofia Hellen", | |
| "Tammy Grit", "Tanja Adelina", "Vjollca Johnnie", "Andrew Chipper", "Badr Odhiambo", "Dionisio Schuyler", | |
| "Royston Min", "Viktor Eka", "Abrahan Mack", "Adde Michal", "Baldur Sanjin", "Craig Gutsy", | |
| "Damien Black", "Gilberto Mathias", "Ilkin Urbano", "Kazuhiko Atallah", "Ludvig Milivoj", "Suad Qasim", | |
| "Torcull Diarmuid", "Viktor Menelaos", "Zacharie Aimilios", "Nova Hogarth", "Maja Ruoho", "Uta Obando", | |
| "Lidiya Szekeres", "Chandra MacFarland", "Szofi Granger", "Camilla Holmström", "Lilya Stainthorpe", | |
| "Zofija Kendrick", "Narelle Moon", "Barbora MacLean", "Alexandra Hisakawa", "Alma María", "Rosemary Okafor", | |
| "Ige Behringer", "Filip Traverse", "Damjan Chapman", "Wulf Carlevaro", "Aaron Dreschner", "Kumar Dahl", | |
| "Eugenio Mataracı", "Ferran Simen", "Xavier Hayasaka", "Luis Moray", "Marcos Rudaski" | |
| ], | |
| value="Ana Florence", | |
| label="Speaker Index" | |
| ), | |
| gr.Dropdown( | |
| choices=["en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh", "ja", "hu", "ko"], | |
| value="en", | |
| label="Language" | |
| ), | |
| gr.Slider(0, 1, value=0.65, step=0.01, label="Temperature"), | |
| gr.Slider(0.5, 2, value=1.0, step=0.1, label="Length Penalty"), | |
| gr.Slider(1.0, 10.0, value=1.9, step=0.1, label="Repetition Penalty"), | |
| gr.Slider(1, 100, value=50, step=1, label="Top-K"), | |
| gr.Slider(0, 1, value=0.8, step=0.01, label="Top-P"), | |
| gr.Slider(0.5, 2, value=1.0, step=0.01, label="Speed"), | |
| gr.Checkbox(value=True, label="Enable Text Splitting") | |
| ] | |
| outputs = gr.Audio(label="Generated Speech") | |
| gr.Interface( | |
| fn=synthesize_tts, | |
| inputs=inputs, | |
| outputs=outputs, | |
| title="Text-to-Speech Synthesis with Gradio" | |
| ).launch() | |