Spaces:
Runtime error
Runtime error
File size: 4,228 Bytes
5ca847f db3663c 5ca847f aa93b1b 5ca847f db3663c 5ca847f aa93b1b 5ca847f aa93b1b 5ca847f db3663c 51c71fc db3663c 5ca847f 51c71fc 5ca847f db3663c 5ca847f db3663c 5ca847f db3663c 5ca847f db3663c 783ad44 db3663c 5a12fa3 db3663c 5a12fa3 db3663c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import io
import os
import tempfile
from typing import List
import TTS.api
import TTS.utils.manage as manage
import torch
from pydub import AudioSegment
import gradio as gr # Gradio库
import config
device = "cuda" if torch.cuda.is_available() else "cpu"
# 定义一个函数来自动接受许可条款
def ask_tos_patch(self, output_path):
print("Automatically accepting the terms of service.")
return True
# 使用我们定义的函数替换原有的 ask_tos 方法
manage.ModelManager.ask_tos = ask_tos_patch
tts = TTS.api.TTS()
models = {}
for id, model in config.models.items():
tts.download_model_by_name(model)
models[id] = TTS.api.TTS(model).to(device)
def synthesize_tts(
text: str = 'Hello, World!',
speaker_wavs: List[gr.File] = None,
speaker_idx: str = 'Ana Florence',
language: str = 'ja',
temperature: float = 0.65,
length_penalty: float = 1.0,
repetition_penalty: int = 2.0,
top_k: int = 50,
top_p: float = 0.8,
speed: float = 1.0,
enable_text_splitting: bool = True,
):
temp_files = []
try:
if speaker_wavs:
# Process each uploaded file
for speaker_wav in speaker_wavs:
with open(speaker_wav.name, "rb") as f:
speaker_wav_bytes = f.read()
# Convert the uploaded audio file to a WAV format using pydub
try:
audio = AudioSegment.from_file(io.BytesIO(speaker_wav_bytes))
wav_buffer = io.BytesIO()
audio.export(wav_buffer, format="wav")
wav_buffer.seek(0) # Reset buffer position to the beginning
except Exception as e:
return f"Error processing audio file: {e}"
temp_wav_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
temp_wav_file.write(wav_buffer.read())
temp_wav_file.close()
temp_files.append(temp_wav_file.name)
output_buffer = io.BytesIO()
if temp_files:
models['multi'].tts_to_file(
text=text,
speaker_wav=temp_files,
language=language,
file_path=output_buffer,
temperature=temperature,
length_penalty=length_penalty,
repetition_penalty=repetition_penalty,
top_k=top_k,
top_p=top_p,
speed=speed,
enable_text_splitting=enable_text_splitting
)
else:
models['multi'].tts_to_file(
text=text,
speaker=speaker_idx,
language=language,
file_path=output_buffer,
temperature=temperature,
length_penalty=length_penalty,
repetition_penalty=repetition_penalty,
top_k=top_k,
top_p=top_p,
speed=speed,
enable_text_splitting=enable_text_splitting
)
output_buffer.seek(0)
return output_buffer.read()
finally:
for temp_file in temp_files:
if isinstance(temp_file, str) and os.path.exists(temp_file):
os.remove(temp_file)
# 创建Gradio界面
inputs = [
gr.Textbox(value="Hello, World!", label="Text to Synthesize"),
gr.File(file_types=["audio"], label="Speaker WAV files (optional)", file_count="multiple"),
gr.Textbox(value="Ana Florence", label="Speaker Index"),
gr.Textbox(value="en", label="Language"),
gr.Slider(0, 1, value=0.65, step=0.01, label="Temperature"),
gr.Slider(0.5, 2, value=1.0, step=0.1, label="Length Penalty"),
gr.Slider(1.0, 10.0, value=2.0, step=0.1, label="Repetition Penalty"),
gr.Slider(1, 100, value=50, step=1, label="Top-K"),
gr.Slider(0, 1, value=0.8, step=0.01, label="Top-P"),
gr.Slider(0.5, 2, value=1.0, step=0.01, label="Speed"),
gr.Checkbox(value=True, label="Enable Text Splitting")
]
outputs = gr.Audio(label="Generated Speech")
gr.Interface(
fn=synthesize_tts,
inputs=inputs,
outputs=outputs,
title="Text-to-Speech Synthesis with Gradio"
).launch()
|