englissi commited on
Commit
9a60809
ยท
verified ยท
1 Parent(s): 109b86f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -24
app.py CHANGED
@@ -3,51 +3,86 @@ from gtts import gTTS
3
  from pydub import AudioSegment
4
  from io import BytesIO
5
 
6
- # Step 1: Define a function to generate and merge TTS audio for multiple languages
7
- def multilingual_tts(korean_text, british_english_text, american_english_text):
8
- # Language mapping
9
- texts = {
10
- "ko": korean_text,
11
- "en-gb": british_english_text, # British English
12
- "en-us": american_english_text, # American English
13
- }
14
-
15
- combined_audio = AudioSegment.silent(duration=0) # Empty audio to start
16
-
17
- for lang, text in texts.items():
18
- if text.strip(): # Process only if text is provided
19
- tld = 'co.uk' if lang == "en-gb" else 'com'
20
- tts = gTTS(text, lang="en" if lang.startswith("en") else lang, tld=tld)
 
 
 
 
 
 
 
 
 
 
 
 
21
  audio_file = BytesIO()
22
  tts.write_to_fp(audio_file)
23
  audio_file.seek(0)
24
  tts_audio = AudioSegment.from_file(audio_file, format="mp3")
25
- combined_audio += tts_audio + AudioSegment.silent(duration=500) # Add silence between languages
 
26
 
27
- # Save combined audio to a file
28
  output_file = "combined_output.mp3"
29
  combined_audio.export(output_file, format="mp3")
30
 
31
  return output_file
32
 
33
- # Step 2: Create Gradio interface
34
  with gr.Blocks() as demo:
35
- gr.Markdown("## Multilingual TTS: Generate a Single Audio File")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  with gr.Row():
38
- korean_input = gr.Textbox(label="Enter Korean Text:", placeholder="์•ˆ๋…•ํ•˜์„ธ์š”")
39
- british_english_input = gr.Textbox(label="Enter British English Text:", placeholder="Hello (British)")
40
- american_english_input = gr.Textbox(label="Enter American English Text:", placeholder="Hello (American)")
41
 
42
  output_audio = gr.Audio(label="Generated Speech", type="filepath")
43
  generate_button = gr.Button("Generate Speech")
44
 
45
  generate_button.click(
46
  multilingual_tts,
47
- inputs=[korean_input, british_english_input, american_english_input],
 
 
 
 
 
 
 
 
 
48
  outputs=output_audio
49
  )
50
 
51
- # Run the app
52
  if __name__ == "__main__":
53
  demo.launch()
 
3
  from pydub import AudioSegment
4
  from io import BytesIO
5
 
6
+ def multilingual_tts(
7
+ korean_text,
8
+ british_text, # ๊ธฐ์กด ์˜๊ตญ์‹ ์ž…๋ ฅ๋ž€
9
+ american_text, # ๊ธฐ์กด ๋ฏธ๊ตญ์‹ ์ž…๋ ฅ๋ž€
10
+ british_text_add1, # ์ถ”๊ฐ€ ์˜๊ตญ์‹ ์ž…๋ ฅ๋ž€ 1
11
+ british_text_add2, # ์ถ”๊ฐ€ ์˜๊ตญ์‹ ์ž…๋ ฅ๋ž€ 2
12
+ australian_text_add1, # ์ถ”๊ฐ€ ํ˜ธ์ฃผ์‹ ์ž…๋ ฅ๋ž€ 1
13
+ australian_text_add2, # ์ถ”๊ฐ€ ํ˜ธ์ฃผ์‹ ์ž…๋ ฅ๋ž€ 2
14
+ american_text_add1 # ์ถ”๊ฐ€ ๋ฏธ๊ตญ์‹ ์ž…๋ ฅ๋ž€ 1
15
+ ):
16
+ # ๊ฐ ์Œ์„ฑ์— ๋Œ€ํ•ด (์–ธ์–ด ์ฝ”๋“œ, tld, ํ…์ŠคํŠธ) ํŠœํ”Œ์„ ๋ฆฌ์ŠคํŠธ์— ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.
17
+ voices = [
18
+ ("ko", "com", korean_text), # ํ•œ๊ตญ์–ด
19
+ ("en", "co.uk", british_text), # ๊ธฐ์กด ์˜๊ตญ์‹
20
+ ("en", "com", american_text), # ๊ธฐ์กด ๋ฏธ๊ตญ์‹
21
+ ("en", "co.uk", british_text_add1), # ์ถ”๊ฐ€ ์˜๊ตญ์‹ 1
22
+ ("en", "co.uk", british_text_add2), # ์ถ”๊ฐ€ ์˜๊ตญ์‹ 2
23
+ ("en", "com.au", australian_text_add1), # ์ถ”๊ฐ€ ํ˜ธ์ฃผ์‹ 1
24
+ ("en", "com.au", australian_text_add2), # ์ถ”๊ฐ€ ํ˜ธ์ฃผ์‹ 2
25
+ ("en", "com", american_text_add1) # ์ถ”๊ฐ€ ๋ฏธ๊ตญ์‹ 1
26
+ ]
27
+
28
+ combined_audio = AudioSegment.silent(duration=0) # ๋นˆ ์˜ค๋””์˜ค
29
+
30
+ for lang, tld, text in voices:
31
+ if text.strip(): # ํ…์ŠคํŠธ๊ฐ€ ์ž…๋ ฅ๋˜์–ด ์žˆ์„ ๋•Œ๋งŒ ์ฒ˜๋ฆฌ
32
+ tts = gTTS(text, lang=lang, tld=tld)
33
  audio_file = BytesIO()
34
  tts.write_to_fp(audio_file)
35
  audio_file.seek(0)
36
  tts_audio = AudioSegment.from_file(audio_file, format="mp3")
37
+ # ๊ฐ ์Œ์„ฑ ์‚ฌ์ด์— 500ms์˜ ์นจ๋ฌต ์ถ”๊ฐ€
38
+ combined_audio += tts_audio + AudioSegment.silent(duration=500)
39
 
40
+ # ์ตœ์ข… ๊ฒฐํ•ฉ๋œ ์˜ค๋””์˜ค๋ฅผ mp3 ํŒŒ์ผ๋กœ ์ €์žฅ
41
  output_file = "combined_output.mp3"
42
  combined_audio.export(output_file, format="mp3")
43
 
44
  return output_file
45
 
 
46
  with gr.Blocks() as demo:
47
+ gr.Markdown("## Multilingual TTS: Generate a Single Audio File (์ด 8๊ฐœ ์Œ์„ฑ)")
48
+
49
+ # ํ•œ๊ตญ์–ด ์ž…๋ ฅ๋ž€
50
+ korean_input = gr.Textbox(label="Enter Korean Text:", placeholder="์•ˆ๋…•ํ•˜์„ธ์š”")
51
+
52
+ # ๊ธฐ์กด ์˜์–ด ์ž…๋ ฅ๋ž€ (์˜๊ตญ, ๋ฏธ๊ตญ)
53
+ with gr.Row():
54
+ british_input = gr.Textbox(label="Enter British English Text:", placeholder="Hello (British)")
55
+ american_input = gr.Textbox(label="Enter American English Text:", placeholder="Hello (American)")
56
+
57
+ # ์ถ”๊ฐ€ ์˜์–ด ์ž…๋ ฅ๋ž€
58
+ with gr.Row():
59
+ british_input_add1 = gr.Textbox(label="Enter Additional British English Text 1:", placeholder="Hi there (British)")
60
+ british_input_add2 = gr.Textbox(label="Enter Additional British English Text 2:", placeholder="Good day (British)")
61
+
62
+ with gr.Row():
63
+ australian_input_add1 = gr.Textbox(label="Enter Additional Australian English Text 1:", placeholder="G'day (Australian)")
64
+ australian_input_add2 = gr.Textbox(label="Enter Additional Australian English Text 2:", placeholder="How ya going? (Australian)")
65
 
66
  with gr.Row():
67
+ american_input_add1 = gr.Textbox(label="Enter Additional American English Text:", placeholder="Hey (American)")
 
 
68
 
69
  output_audio = gr.Audio(label="Generated Speech", type="filepath")
70
  generate_button = gr.Button("Generate Speech")
71
 
72
  generate_button.click(
73
  multilingual_tts,
74
+ inputs=[
75
+ korean_input,
76
+ british_input,
77
+ american_input,
78
+ british_input_add1,
79
+ british_input_add2,
80
+ australian_input_add1,
81
+ australian_input_add2,
82
+ american_input_add1
83
+ ],
84
  outputs=output_audio
85
  )
86
 
 
87
  if __name__ == "__main__":
88
  demo.launch()