engdialogue / app.py
englissi's picture
Create app.py
9e130e4 verified
raw
history blame
4.32 kB
import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
from io import BytesIO
def multilingual_tts(
korean_text,
british_text,
american_text,
british_text_add1,
british_text_add2,
australian_text_add1,
australian_text_add2,
american_text_add1,
additional_english_text_9,
additional_english_text_10
):
# ๊ฐ ์Œ์„ฑ์— ๋Œ€ํ•ด (์–ธ์–ด ์ฝ”๋“œ, tld, ํ…์ŠคํŠธ) ํŠœํ”Œ์„ ๋ฆฌ์ŠคํŠธ์— ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.
voices = [
("ko", "com", korean_text), # ํ•œ๊ตญ์–ด
("en", "co.uk", british_text), # ๊ธฐ์กด ์˜๊ตญ์‹
("en", "com", american_text), # ๊ธฐ์กด ๋ฏธ๊ตญ์‹
("en", "co.uk", british_text_add1), # ์ถ”๊ฐ€ ์˜๊ตญ์‹ 1
("en", "co.uk", british_text_add2), # ์ถ”๊ฐ€ ์˜๊ตญ์‹ 2
("en", "com.au", australian_text_add1), # ์ถ”๊ฐ€ ํ˜ธ์ฃผ์‹ 1
("en", "com.au", australian_text_add2), # ์ถ”๊ฐ€ ํ˜ธ์ฃผ์‹ 2
("en", "com", american_text_add1), # ์ถ”๊ฐ€ ๋ฏธ๊ตญ์‹ 1
("en", "com", additional_english_text_9), # ์ถ”๊ฐ€ ์˜์–ด 9
("en", "com", additional_english_text_10) # ์ถ”๊ฐ€ ์˜์–ด 10
]
combined_audio = AudioSegment.silent(duration=0) # ๋นˆ ์˜ค๋””์˜ค
for lang, tld, text in voices:
if text.strip(): # ํ…์ŠคํŠธ๊ฐ€ ์ž…๋ ฅ๋˜์–ด ์žˆ์„ ๋•Œ๋งŒ ์ฒ˜๋ฆฌ
tts = gTTS(text, lang=lang, tld=tld)
audio_file = BytesIO()
tts.write_to_fp(audio_file)
audio_file.seek(0)
tts_audio = AudioSegment.from_file(audio_file, format="mp3")
# ๊ฐ ์Œ์„ฑ ์‚ฌ์ด์— 500ms์˜ ์นจ๋ฌต ์ถ”๊ฐ€
combined_audio += tts_audio + AudioSegment.silent(duration=500)
# ์ตœ์ข… ๊ฒฐํ•ฉ๋œ ์˜ค๋””์˜ค๋ฅผ mp3 ํŒŒ์ผ๋กœ ์ €์žฅ
output_file = "combined_output.mp3"
combined_audio.export(output_file, format="mp3")
return output_file
with gr.Blocks() as demo:
gr.Markdown("## Multilingual TTS: Generate a Single Audio File (์ด 10๊ฐœ ์Œ์„ฑ)")
# ํ•œ๊ตญ์–ด ์ž…๋ ฅ๋ž€
korean_input = gr.Textbox(label="Enter Korean Text:", placeholder="์•ˆ๋…•ํ•˜์„ธ์š”")
# ๊ธฐ์กด ์˜์–ด ์ž…๋ ฅ๋ž€ (์˜๊ตญ์‹, ๋ฏธ๊ตญ์‹)
with gr.Row():
british_input = gr.Textbox(label="Enter British English Text:", placeholder="Hello (British)")
american_input = gr.Textbox(label="Enter American English Text:", placeholder="Hello (American)")
# ์ถ”๊ฐ€ ์˜์–ด ์ž…๋ ฅ๋ž€ (์ถ”๊ฐ€ ์˜๊ตญ์‹)
with gr.Row():
british_input_add1 = gr.Textbox(label="Enter Additional British English Text 1:", placeholder="Hi there (British)")
british_input_add2 = gr.Textbox(label="Enter Additional British English Text 2:", placeholder="Good day (British)")
# ์ถ”๊ฐ€ ์˜์–ด ์ž…๋ ฅ๋ž€ (์ถ”๊ฐ€ ํ˜ธ์ฃผ์‹)
with gr.Row():
australian_input_add1 = gr.Textbox(label="Enter Additional Australian English Text 1:", placeholder="G'day (Australian)")
australian_input_add2 = gr.Textbox(label="Enter Additional Australian English Text 2:", placeholder="How ya going? (Australian)")
# ์ถ”๊ฐ€ ์˜์–ด ์ž…๋ ฅ๋ž€ (์ถ”๊ฐ€ ๋ฏธ๊ตญ์‹ ๋ฐ ์ถ”๊ฐ€ ์˜์–ด)
with gr.Row():
american_input_add1 = gr.Textbox(label="Enter Additional American English Text 1:", placeholder="Hey (American)")
additional_english_input_9 = gr.Textbox(label="Enter Additional English Text 9:", placeholder="Additional dialogue 9 (English)")
# ๋งˆ์ง€๋ง‰ ์ถ”๊ฐ€ ์˜์–ด ์ž…๋ ฅ๋ž€
additional_english_input_10 = gr.Textbox(label="Enter Additional English Text 10:", placeholder="Additional dialogue 10 (English)")
output_audio = gr.Audio(label="Generated Speech", type="filepath")
generate_button = gr.Button("Generate Speech")
generate_button.click(
multilingual_tts,
inputs=[
korean_input,
british_input,
american_input,
british_input_add1,
british_input_add2,
australian_input_add1,
australian_input_add2,
american_input_add1,
additional_english_input_9,
additional_english_input_10
],
outputs=output_audio
)
if __name__ == "__main__":
demo.launch()