|
import os |
|
import torchaudio |
|
import streamlit as st |
|
from tortoise.api import TextToSpeech |
|
|
|
|
|
tts = TextToSpeech() |
|
|
|
|
|
def text_to_speech(text, preset="fast"): |
|
st.write("Generating speech... This may take a while.") |
|
|
|
|
|
generated = tts.tts_with_preset(text, preset=preset) |
|
|
|
|
|
output_path = "output.wav" |
|
torchaudio.save(output_path, generated.squeeze(0).cpu(), sample_rate=24000) |
|
return output_path |
|
|
|
|
|
st.title("Text to Speech Converter") |
|
|
|
|
|
text = st.text_input("Enter text you want to convert to speech", "Hello, welcome to the Tortoise TTS text-to-speech demonstration.") |
|
|
|
|
|
if st.button("Generate Speech"): |
|
output_path = text_to_speech(text) |
|
|
|
|
|
audio_file = open(output_path, 'rb') |
|
audio_bytes = audio_file.read() |
|
st.audio(audio_bytes, format='audio/wav') |