openfree commited on
Commit
2adfcbe
ยท
verified ยท
1 Parent(s): e49bf8d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -28
app.py CHANGED
@@ -1,15 +1,15 @@
1
  import gradio as gr
2
- import openai, os, io, tempfile, wave, time
3
  from dotenv import load_dotenv
4
 
5
- # =============== ๊ณตํ†ต ์ดˆ๊ธฐํ™” ========================================
6
  load_dotenv()
7
  api_key = os.getenv("OPENAI_API_KEY")
8
  if not api_key:
9
  raise RuntimeError("OPENAI_API_KEY๋ฅผ .env ํŒŒ์ผ์— ์„ค์ •ํ•˜์„ธ์š”!")
10
  client = openai.OpenAI(api_key=api_key)
11
 
12
- # ---------- ์ง€์› ์–ธ์–ด -----------------------------------------------
13
  LANGUAGES = [
14
  "Korean", "English", "Japanese", "Chinese",
15
  "Thai", "Russian", "Vietnamese",
@@ -25,8 +25,9 @@ VOICE = {
25
  for lang in LANGUAGES
26
  }
27
 
28
- # ---------- ๊ณตํ†ต ์œ ํ‹ธ -----------------------------------------------
29
  def _gpt_translate(text: str, src: str, tgt: str) -> str:
 
30
  rsp = client.chat.completions.create(
31
  model="gpt-3.5-turbo",
32
  messages=[
@@ -45,6 +46,7 @@ def _gpt_translate(text: str, src: str, tgt: str) -> str:
45
  return rsp.choices[0].message.content.strip()
46
 
47
  def _tts(text: str, lang: str) -> str:
 
48
  out = client.audio.speech.create(
49
  model="tts-1",
50
  voice=VOICE.get(lang, "alloy"),
@@ -55,9 +57,8 @@ def _tts(text: str, lang: str) -> str:
55
  tmp.close()
56
  return tmp.name
57
 
58
- # =============== 1) ๋งˆ์ดํฌยทํŒŒ์ผ ๊ณตํ†ต ์ฒ˜๋ฆฌ ============================
59
  def translate_audio(audio_path, src, tgt):
60
- """wav/mp3 ๊ฒฝ๋กœ -> (์›๋ฌธ, ๋ฒˆ์—ญ๋ฌธ, ๋ฒˆ์—ญ TTS ๊ฒฝ๋กœ)"""
61
  with open(audio_path, "rb") as f:
62
  stt = client.audio.transcriptions.create(
63
  model="whisper-1",
@@ -72,24 +73,25 @@ def translate_audio(audio_path, src, tgt):
72
  tts_path = _tts(translated, tgt)
73
  return original, translated, tts_path
74
 
75
- # =============== 2) ์‹ค์‹œ๊ฐ„ ์ŠคํŠธ๋ฆฌ๋ฐ(๋ฒ ํƒ€) ============================
76
- STREAM_CHUNK_SEC = 4 # 4์ดˆ๋งˆ๋‹ค Whisper ํ˜ธ์ถœ
77
 
78
  def stream_generator(mic_stream, src, tgt):
79
- """generator: ๋งค chunk๋งˆ๋‹ค yield (์›๋ฌธ๋ˆ„์ , ๋ฒˆ์—ญ๋ˆ„์ )"""
80
  buffer = io.BytesIO()
81
  wav_header = None
82
  original_acc, translated_acc = "", ""
83
 
84
  while True:
85
- chunk = mic_stream.recv() # bytes
86
- if chunk is None: # ์ŠคํŠธ๋ฆผ ์ข…๋ฃŒ
87
  break
88
 
89
  if not wav_header:
90
- wav_header = chunk[:44] # WAV ํ—ค๋”(PCM 16kHz 16bit mono)
91
  buffer.write(chunk)
92
 
 
93
  if buffer.getbuffer().nbytes > 16000 * 2 * STREAM_CHUNK_SEC:
94
  wav_bytes = wav_header + buffer.getvalue()
95
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
@@ -100,9 +102,9 @@ def stream_generator(mic_stream, src, tgt):
100
  original_acc += " " + o
101
  translated_acc += " " + t
102
  yield original_acc.strip(), translated_acc.strip()
103
- buffer = io.BytesIO() # reset buffer
104
 
105
- # ๋‚จ์€ ๋ฐ์ดํ„ฐ ์ฒ˜๋ฆฌ
106
  if buffer.getbuffer().nbytes:
107
  wav_bytes = wav_header + buffer.getvalue()
108
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
@@ -112,11 +114,11 @@ def stream_generator(mic_stream, src, tgt):
112
 
113
  yield (original_acc + " " + o).strip(), (translated_acc + " " + t).strip()
114
 
115
- # =============== 3) 4๊ฐœ๊ตญ์–ด ๋™์‹œ ๋ฒˆ์—ญ ===============================
116
  FOUR_LANGS = ["English", "Chinese", "Thai", "Russian"]
117
 
118
  def translate_audio_four(audio_path, src):
119
- """ํ•œ ๋ฒˆ์˜ STT ํ›„ 4๊ฐœ ์–ธ์–ด(์˜/์ค‘/ํƒœ/๋Ÿฌ)๋กœ ๋™์‹œ ๋ฒˆ์—ญ"""
120
  with open(audio_path, "rb") as f:
121
  stt = client.audio.transcriptions.create(
122
  model="whisper-1",
@@ -127,22 +129,20 @@ def translate_audio_four(audio_path, src):
127
  if not original:
128
  return ["โš ๏ธ ์Œ์„ฑ ์ธ์‹ ์‹คํŒจ"] + [""] * 4
129
 
130
- outputs = [original]
131
  for lang in FOUR_LANGS:
132
- outputs.append(_gpt_translate(original, src, lang))
133
- return outputs # ์ด 5๊ฐœ (์›๋ฌธ + 4์–ธ์–ด)
134
-
135
- # =============== Gradio UI ==========================================
136
- # โ€ฆ (์œ„์ชฝ ๊ณตํ†ต ์ดˆ๊ธฐํ™”/ํ•จ์ˆ˜ ๋™์ผ) โ€ฆ
137
 
 
138
  with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
139
  with gr.Tabs():
140
- # โ‘  ๋งˆ์ดํฌ + ํŒŒ์ผ ๋ฒˆ์—ญ โ† ์ˆ˜์ •
141
  with gr.TabItem("๐ŸŽ™๏ธ ๋งˆ์ดํฌ/ํŒŒ์ผ ๋ฒˆ์—ญ"):
142
  src1 = gr.Dropdown(LANGUAGES, value="Korean", label="์ž…๋ ฅ")
143
  tgt1 = gr.Dropdown(LANGUAGES, value="English", label="์ถœ๋ ฅ")
144
  mic1 = gr.Audio(
145
- sources=["microphone", "upload"], # โœ… ๋‘˜ ๋‹ค ํ—ˆ์šฉ
146
  type="filepath",
147
  label="๐ŸŽค ๋…น์Œ ๋˜๋Š” ์˜ค๋””์˜ค ํŒŒ์ผ ์—…๋กœ๋“œ"
148
  )
@@ -157,7 +157,7 @@ with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
157
  outputs=[stt1, tlt1, out1]
158
  )
159
 
160
- # โ‘ก ์˜ค๋””์˜ค ํŒŒ์ผ ์ „์šฉ ๋ฒˆ์—ญ (๊ทธ๋Œ€๋กœ)
161
  with gr.TabItem("๐ŸŽง ํŒŒ์ผ ์ „์šฉ ๋ฒˆ์—ญ"):
162
  src2 = gr.Dropdown(LANGUAGES, value="Korean", label="์ž…๋ ฅ")
163
  tgt2 = gr.Dropdown(LANGUAGES, value="English", label="์ถœ๋ ฅ")
@@ -177,10 +177,48 @@ with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
177
  outputs=[stt2, tlt2, out2]
178
  )
179
 
180
- # โ‘ข ์‹ค์‹œ๊ฐ„ ์ŠคํŠธ๋ฆฌ๋ฐ ๋ฒˆ์—ญ (Beta) โ€ฆ ์ด์ „๊ณผ ๋™์ผ โ€ฆ
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
- # โ‘ฃ 4๊ฐœ ์–ธ์–ด ๋™์‹œ ๋ฒˆ์—ญ โ€ฆ ์ด์ „๊ณผ ๋™์ผ โ€ฆ
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
- # ===================== ์‹คํ–‰ ==========================================
185
  if __name__ == "__main__":
186
  app.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)
 
1
  import gradio as gr
2
+ import openai, os, io, tempfile
3
  from dotenv import load_dotenv
4
 
5
+ # ============== ํ™˜๊ฒฝ ๋ณ€์ˆ˜ & OpenAI ์ดˆ๊ธฐํ™” ===========================
6
  load_dotenv()
7
  api_key = os.getenv("OPENAI_API_KEY")
8
  if not api_key:
9
  raise RuntimeError("OPENAI_API_KEY๋ฅผ .env ํŒŒ์ผ์— ์„ค์ •ํ•˜์„ธ์š”!")
10
  client = openai.OpenAI(api_key=api_key)
11
 
12
+ # ============== ์–ธ์–ด ์„ค์ • ===========================================
13
  LANGUAGES = [
14
  "Korean", "English", "Japanese", "Chinese",
15
  "Thai", "Russian", "Vietnamese",
 
25
  for lang in LANGUAGES
26
  }
27
 
28
+ # ============== ๊ณตํ†ต ํ•จ์ˆ˜ ===========================================
29
  def _gpt_translate(text: str, src: str, tgt: str) -> str:
30
+ """GPT-3.5 ๋ฒˆ์—ญ"""
31
  rsp = client.chat.completions.create(
32
  model="gpt-3.5-turbo",
33
  messages=[
 
46
  return rsp.choices[0].message.content.strip()
47
 
48
  def _tts(text: str, lang: str) -> str:
49
+ """TTS-1 ์Œ์„ฑ ํ•ฉ์„ฑ โ†’ ์ž„์‹œ mp3 ๊ฒฝ๋กœ ๋ฐ˜ํ™˜"""
50
  out = client.audio.speech.create(
51
  model="tts-1",
52
  voice=VOICE.get(lang, "alloy"),
 
57
  tmp.close()
58
  return tmp.name
59
 
 
60
  def translate_audio(audio_path, src, tgt):
61
+ """๋‹จ์ผ ์Œ์„ฑ ํŒŒ์ผ ๋ฒˆ์—ญ(TTS ํฌํ•จ)"""
62
  with open(audio_path, "rb") as f:
63
  stt = client.audio.transcriptions.create(
64
  model="whisper-1",
 
73
  tts_path = _tts(translated, tgt)
74
  return original, translated, tts_path
75
 
76
+ # ============== ์‹ค์‹œ๊ฐ„ ์ŠคํŠธ๋ฆฌ๋ฐ ์ „์‚ฌ/๋ฒˆ์—ญ ============================
77
+ STREAM_CHUNK_SEC = 4 # Whisper ํ˜ธ์ถœ ์ฃผ๊ธฐ(์ดˆ)
78
 
79
  def stream_generator(mic_stream, src, tgt):
80
+ """๋งˆ์ดํฌ ์ŠคํŠธ๋ฆผ -> ์ฃผ๊ธฐ์  ์ฒญํฌ ๋ฒˆ์—ญ(๋ˆ„์  ์ถœ๋ ฅ)"""
81
  buffer = io.BytesIO()
82
  wav_header = None
83
  original_acc, translated_acc = "", ""
84
 
85
  while True:
86
+ chunk = mic_stream.recv()
87
+ if chunk is None: # ์ŠคํŠธ๋ฆผ ์ข…๋ฃŒ
88
  break
89
 
90
  if not wav_header:
91
+ wav_header = chunk[:44] # WAV ํ—ค๋”(16kHz 16-bit mono)
92
  buffer.write(chunk)
93
 
94
+ # ์ง€์ • ์‹œ๊ฐ„๋งŒํผ ์Œ“์ด๋ฉด Whisper ํ˜ธ์ถœ
95
  if buffer.getbuffer().nbytes > 16000 * 2 * STREAM_CHUNK_SEC:
96
  wav_bytes = wav_header + buffer.getvalue()
97
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
 
102
  original_acc += " " + o
103
  translated_acc += " " + t
104
  yield original_acc.strip(), translated_acc.strip()
105
+ buffer = io.BytesIO() # ๋ฒ„ํผ ์ดˆ๊ธฐํ™”
106
 
107
+ # ๋งˆ์ง€๋ง‰ ๋‚จ์€ ๋ฒ„ํผ ์ฒ˜๋ฆฌ
108
  if buffer.getbuffer().nbytes:
109
  wav_bytes = wav_header + buffer.getvalue()
110
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
 
114
 
115
  yield (original_acc + " " + o).strip(), (translated_acc + " " + t).strip()
116
 
117
+ # ============== 4๊ฐœ๊ตญ ๋™์‹œ ๋ฒˆ์—ญ =====================================
118
  FOUR_LANGS = ["English", "Chinese", "Thai", "Russian"]
119
 
120
  def translate_audio_four(audio_path, src):
121
+ """์Œ์„ฑ ํŒŒ์ผ โ†’ ์›๋ฌธ + 4๊ฐœ ์–ธ์–ด ๋™์‹œ ๋ฒˆ์—ญ"""
122
  with open(audio_path, "rb") as f:
123
  stt = client.audio.transcriptions.create(
124
  model="whisper-1",
 
129
  if not original:
130
  return ["โš ๏ธ ์Œ์„ฑ ์ธ์‹ ์‹คํŒจ"] + [""] * 4
131
 
132
+ outs = [original]
133
  for lang in FOUR_LANGS:
134
+ outs.append(_gpt_translate(original, src, lang))
135
+ return outs # ์ด 5๊ฐœ(์›๋ฌธ+4์–ธ์–ด)
 
 
 
136
 
137
+ # ============== Gradio UI ===========================================
138
  with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
139
  with gr.Tabs():
140
+ # โ‘  ๋งˆ์ดํฌ/ํŒŒ์ผ ๋ฒˆ์—ญ
141
  with gr.TabItem("๐ŸŽ™๏ธ ๋งˆ์ดํฌ/ํŒŒ์ผ ๋ฒˆ์—ญ"):
142
  src1 = gr.Dropdown(LANGUAGES, value="Korean", label="์ž…๋ ฅ")
143
  tgt1 = gr.Dropdown(LANGUAGES, value="English", label="์ถœ๋ ฅ")
144
  mic1 = gr.Audio(
145
+ sources=["microphone", "upload"],
146
  type="filepath",
147
  label="๐ŸŽค ๋…น์Œ ๋˜๋Š” ์˜ค๋””์˜ค ํŒŒ์ผ ์—…๋กœ๋“œ"
148
  )
 
157
  outputs=[stt1, tlt1, out1]
158
  )
159
 
160
+ # โ‘ก ํŒŒ์ผ ์ „์šฉ ๋ฒˆ์—ญ
161
  with gr.TabItem("๐ŸŽง ํŒŒ์ผ ์ „์šฉ ๋ฒˆ์—ญ"):
162
  src2 = gr.Dropdown(LANGUAGES, value="Korean", label="์ž…๋ ฅ")
163
  tgt2 = gr.Dropdown(LANGUAGES, value="English", label="์ถœ๋ ฅ")
 
177
  outputs=[stt2, tlt2, out2]
178
  )
179
 
180
+ # โ‘ข ์‹ค์‹œ๊ฐ„ ์ŠคํŠธ๋ฆฌ๋ฐ ๋ฒˆ์—ญ(Beta)
181
+ with gr.TabItem("โฑ๏ธ ์‹ค์‹œ๊ฐ„ ๋ฒˆ์—ญ (Beta)"):
182
+ gr.Markdown("๋งˆ์ดํฌ๋ฅผ ์ผœ๋ฉด 3~4์ดˆ ๊ฐ„๊ฒฉ์œผ๋กœ ์ž๋ง‰์ด ๊ฐฑ์‹ ๋ฉ๋‹ˆ๋‹ค.")
183
+ src3 = gr.Dropdown(LANGUAGES, value="Korean", label="์ž…๋ ฅ")
184
+ tgt3 = gr.Dropdown(LANGUAGES, value="English", label="์ถœ๋ ฅ")
185
+ mic3 = gr.Audio(
186
+ sources=["microphone"],
187
+ streaming=True,
188
+ label="๐ŸŽค ์‹ค์‹œ๊ฐ„ ๋งˆ์ดํฌ ์ž…๋ ฅ"
189
+ )
190
+ stt3 = gr.Textbox(label="์›๋ฌธ(์‹ค์‹œ๊ฐ„)", lines=8)
191
+ tlt3 = gr.Textbox(label="๋ฒˆ์—ญ(์‹ค์‹œ๊ฐ„)", lines=8)
192
+
193
+ def gen(audio, src_lang, tgt_lang):
194
+ yield from stream_generator(audio, src_lang, tgt_lang)
195
+
196
+ mic3.stream(gen, inputs=[src3, tgt3], outputs=[stt3, tlt3])
197
 
198
+ # โ‘ฃ 4๊ฐœ ์–ธ์–ด ๋™์‹œ ๋ฒˆ์—ญ
199
+ with gr.TabItem("๐ŸŒ 4๊ฐœ ์–ธ์–ด ๋™์‹œ"):
200
+ gr.Markdown("์ž…๋ ฅ ์Œ์„ฑ์„ **English / Chinese(็ฎ€ไฝ“) / Thai / Russian** 4๊ฐœ ์–ธ์–ด๋กœ ๋™์‹œ์— ๋ฒˆ์—ญํ•ฉ๋‹ˆ๋‹ค.")
201
+ src4 = gr.Dropdown(LANGUAGES, value="Korean", label="์ž…๋ ฅ ์–ธ์–ด")
202
+ aud4 = gr.Audio(
203
+ sources=["microphone", "upload"],
204
+ type="filepath",
205
+ label="๐ŸŽค ๋…น์Œ ๋˜๋Š” ์˜ค๋””์˜ค ํŒŒ์ผ ์—…๋กœ๋“œ"
206
+ )
207
+ btn4 = gr.Button("๋ฒˆ์—ญ")
208
+
209
+ with gr.Row():
210
+ org4 = gr.Textbox(label="์›๋ฌธ", lines=4)
211
+ en4 = gr.Textbox(label="English", lines=4)
212
+ zh4 = gr.Textbox(label="Chinese (็ฎ€ไฝ“)", lines=4)
213
+ th4 = gr.Textbox(label="Thai", lines=4)
214
+ ru4 = gr.Textbox(label="Russian", lines=4)
215
+
216
+ btn4.click(
217
+ translate_audio_four,
218
+ inputs=[aud4, src4],
219
+ outputs=[org4, en4, zh4, th4, ru4]
220
+ )
221
 
222
+ # ============== ์•ฑ ์‹คํ–‰ =============================================
223
  if __name__ == "__main__":
224
  app.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)