Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,15 @@
|
|
1 |
import gradio as gr
|
2 |
-
import openai, os, io, tempfile
|
3 |
from dotenv import load_dotenv
|
4 |
|
5 |
-
#
|
6 |
load_dotenv()
|
7 |
api_key = os.getenv("OPENAI_API_KEY")
|
8 |
if not api_key:
|
9 |
raise RuntimeError("OPENAI_API_KEY๋ฅผ .env ํ์ผ์ ์ค์ ํ์ธ์!")
|
10 |
client = openai.OpenAI(api_key=api_key)
|
11 |
|
12 |
-
#
|
13 |
LANGUAGES = [
|
14 |
"Korean", "English", "Japanese", "Chinese",
|
15 |
"Thai", "Russian", "Vietnamese",
|
@@ -25,8 +25,9 @@ VOICE = {
|
|
25 |
for lang in LANGUAGES
|
26 |
}
|
27 |
|
28 |
-
#
|
29 |
def _gpt_translate(text: str, src: str, tgt: str) -> str:
|
|
|
30 |
rsp = client.chat.completions.create(
|
31 |
model="gpt-3.5-turbo",
|
32 |
messages=[
|
@@ -45,6 +46,7 @@ def _gpt_translate(text: str, src: str, tgt: str) -> str:
|
|
45 |
return rsp.choices[0].message.content.strip()
|
46 |
|
47 |
def _tts(text: str, lang: str) -> str:
|
|
|
48 |
out = client.audio.speech.create(
|
49 |
model="tts-1",
|
50 |
voice=VOICE.get(lang, "alloy"),
|
@@ -55,9 +57,8 @@ def _tts(text: str, lang: str) -> str:
|
|
55 |
tmp.close()
|
56 |
return tmp.name
|
57 |
|
58 |
-
# =============== 1) ๋ง์ดํฌยทํ์ผ ๊ณตํต ์ฒ๋ฆฌ ============================
|
59 |
def translate_audio(audio_path, src, tgt):
|
60 |
-
"""
|
61 |
with open(audio_path, "rb") as f:
|
62 |
stt = client.audio.transcriptions.create(
|
63 |
model="whisper-1",
|
@@ -72,24 +73,25 @@ def translate_audio(audio_path, src, tgt):
|
|
72 |
tts_path = _tts(translated, tgt)
|
73 |
return original, translated, tts_path
|
74 |
|
75 |
-
#
|
76 |
-
STREAM_CHUNK_SEC = 4 #
|
77 |
|
78 |
def stream_generator(mic_stream, src, tgt):
|
79 |
-
"""
|
80 |
buffer = io.BytesIO()
|
81 |
wav_header = None
|
82 |
original_acc, translated_acc = "", ""
|
83 |
|
84 |
while True:
|
85 |
-
chunk = mic_stream.recv()
|
86 |
-
if chunk is None:
|
87 |
break
|
88 |
|
89 |
if not wav_header:
|
90 |
-
wav_header = chunk[:44]
|
91 |
buffer.write(chunk)
|
92 |
|
|
|
93 |
if buffer.getbuffer().nbytes > 16000 * 2 * STREAM_CHUNK_SEC:
|
94 |
wav_bytes = wav_header + buffer.getvalue()
|
95 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
@@ -100,9 +102,9 @@ def stream_generator(mic_stream, src, tgt):
|
|
100 |
original_acc += " " + o
|
101 |
translated_acc += " " + t
|
102 |
yield original_acc.strip(), translated_acc.strip()
|
103 |
-
buffer = io.BytesIO()
|
104 |
|
105 |
-
# ๋จ์
|
106 |
if buffer.getbuffer().nbytes:
|
107 |
wav_bytes = wav_header + buffer.getvalue()
|
108 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
@@ -112,11 +114,11 @@ def stream_generator(mic_stream, src, tgt):
|
|
112 |
|
113 |
yield (original_acc + " " + o).strip(), (translated_acc + " " + t).strip()
|
114 |
|
115 |
-
#
|
116 |
FOUR_LANGS = ["English", "Chinese", "Thai", "Russian"]
|
117 |
|
118 |
def translate_audio_four(audio_path, src):
|
119 |
-
"""
|
120 |
with open(audio_path, "rb") as f:
|
121 |
stt = client.audio.transcriptions.create(
|
122 |
model="whisper-1",
|
@@ -127,22 +129,20 @@ def translate_audio_four(audio_path, src):
|
|
127 |
if not original:
|
128 |
return ["โ ๏ธ ์์ฑ ์ธ์ ์คํจ"] + [""] * 4
|
129 |
|
130 |
-
|
131 |
for lang in FOUR_LANGS:
|
132 |
-
|
133 |
-
return
|
134 |
-
|
135 |
-
# =============== Gradio UI ==========================================
|
136 |
-
# โฆ (์์ชฝ ๊ณตํต ์ด๊ธฐํ/ํจ์ ๋์ผ) โฆ
|
137 |
|
|
|
138 |
with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
|
139 |
with gr.Tabs():
|
140 |
-
# โ
|
141 |
with gr.TabItem("๐๏ธ ๋ง์ดํฌ/ํ์ผ ๋ฒ์ญ"):
|
142 |
src1 = gr.Dropdown(LANGUAGES, value="Korean", label="์
๋ ฅ")
|
143 |
tgt1 = gr.Dropdown(LANGUAGES, value="English", label="์ถ๋ ฅ")
|
144 |
mic1 = gr.Audio(
|
145 |
-
sources=["microphone", "upload"],
|
146 |
type="filepath",
|
147 |
label="๐ค ๋
น์ ๋๋ ์ค๋์ค ํ์ผ ์
๋ก๋"
|
148 |
)
|
@@ -157,7 +157,7 @@ with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
|
|
157 |
outputs=[stt1, tlt1, out1]
|
158 |
)
|
159 |
|
160 |
-
# โก
|
161 |
with gr.TabItem("๐ง ํ์ผ ์ ์ฉ ๋ฒ์ญ"):
|
162 |
src2 = gr.Dropdown(LANGUAGES, value="Korean", label="์
๋ ฅ")
|
163 |
tgt2 = gr.Dropdown(LANGUAGES, value="English", label="์ถ๋ ฅ")
|
@@ -177,10 +177,48 @@ with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
|
|
177 |
outputs=[stt2, tlt2, out2]
|
178 |
)
|
179 |
|
180 |
-
# โข ์ค์๊ฐ ์คํธ๋ฆฌ๋ฐ ๋ฒ์ญ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
-
# โฃ 4๊ฐ ์ธ์ด ๋์ ๋ฒ์ญ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
|
184 |
-
#
|
185 |
if __name__ == "__main__":
|
186 |
app.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)
|
|
|
1 |
import gradio as gr
|
2 |
+
import openai, os, io, tempfile
|
3 |
from dotenv import load_dotenv
|
4 |
|
5 |
+
# ============== ํ๊ฒฝ ๋ณ์ & OpenAI ์ด๊ธฐํ ===========================
|
6 |
load_dotenv()
|
7 |
api_key = os.getenv("OPENAI_API_KEY")
|
8 |
if not api_key:
|
9 |
raise RuntimeError("OPENAI_API_KEY๋ฅผ .env ํ์ผ์ ์ค์ ํ์ธ์!")
|
10 |
client = openai.OpenAI(api_key=api_key)
|
11 |
|
12 |
+
# ============== ์ธ์ด ์ค์ ===========================================
|
13 |
LANGUAGES = [
|
14 |
"Korean", "English", "Japanese", "Chinese",
|
15 |
"Thai", "Russian", "Vietnamese",
|
|
|
25 |
for lang in LANGUAGES
|
26 |
}
|
27 |
|
28 |
+
# ============== ๊ณตํต ํจ์ ===========================================
|
29 |
def _gpt_translate(text: str, src: str, tgt: str) -> str:
|
30 |
+
"""GPT-3.5 ๋ฒ์ญ"""
|
31 |
rsp = client.chat.completions.create(
|
32 |
model="gpt-3.5-turbo",
|
33 |
messages=[
|
|
|
46 |
return rsp.choices[0].message.content.strip()
|
47 |
|
48 |
def _tts(text: str, lang: str) -> str:
|
49 |
+
"""TTS-1 ์์ฑ ํฉ์ฑ โ ์์ mp3 ๊ฒฝ๋ก ๋ฐํ"""
|
50 |
out = client.audio.speech.create(
|
51 |
model="tts-1",
|
52 |
voice=VOICE.get(lang, "alloy"),
|
|
|
57 |
tmp.close()
|
58 |
return tmp.name
|
59 |
|
|
|
60 |
def translate_audio(audio_path, src, tgt):
|
61 |
+
"""๋จ์ผ ์์ฑ ํ์ผ ๋ฒ์ญ(TTS ํฌํจ)"""
|
62 |
with open(audio_path, "rb") as f:
|
63 |
stt = client.audio.transcriptions.create(
|
64 |
model="whisper-1",
|
|
|
73 |
tts_path = _tts(translated, tgt)
|
74 |
return original, translated, tts_path
|
75 |
|
76 |
+
# ============== ์ค์๊ฐ ์คํธ๋ฆฌ๋ฐ ์ ์ฌ/๋ฒ์ญ ============================
|
77 |
+
STREAM_CHUNK_SEC = 4 # Whisper ํธ์ถ ์ฃผ๊ธฐ(์ด)
|
78 |
|
79 |
def stream_generator(mic_stream, src, tgt):
|
80 |
+
"""๋ง์ดํฌ ์คํธ๋ฆผ -> ์ฃผ๊ธฐ์ ์ฒญํฌ ๋ฒ์ญ(๋์ ์ถ๋ ฅ)"""
|
81 |
buffer = io.BytesIO()
|
82 |
wav_header = None
|
83 |
original_acc, translated_acc = "", ""
|
84 |
|
85 |
while True:
|
86 |
+
chunk = mic_stream.recv()
|
87 |
+
if chunk is None: # ์คํธ๋ฆผ ์ข
๋ฃ
|
88 |
break
|
89 |
|
90 |
if not wav_header:
|
91 |
+
wav_header = chunk[:44] # WAV ํค๋(16kHz 16-bit mono)
|
92 |
buffer.write(chunk)
|
93 |
|
94 |
+
# ์ง์ ์๊ฐ๋งํผ ์์ด๋ฉด Whisper ํธ์ถ
|
95 |
if buffer.getbuffer().nbytes > 16000 * 2 * STREAM_CHUNK_SEC:
|
96 |
wav_bytes = wav_header + buffer.getvalue()
|
97 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
|
|
102 |
original_acc += " " + o
|
103 |
translated_acc += " " + t
|
104 |
yield original_acc.strip(), translated_acc.strip()
|
105 |
+
buffer = io.BytesIO() # ๋ฒํผ ์ด๊ธฐํ
|
106 |
|
107 |
+
# ๋ง์ง๋ง ๋จ์ ๋ฒํผ ์ฒ๋ฆฌ
|
108 |
if buffer.getbuffer().nbytes:
|
109 |
wav_bytes = wav_header + buffer.getvalue()
|
110 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
|
|
114 |
|
115 |
yield (original_acc + " " + o).strip(), (translated_acc + " " + t).strip()
|
116 |
|
117 |
+
# ============== 4๊ฐ๊ตญ ๋์ ๋ฒ์ญ =====================================
|
118 |
FOUR_LANGS = ["English", "Chinese", "Thai", "Russian"]
|
119 |
|
120 |
def translate_audio_four(audio_path, src):
|
121 |
+
"""์์ฑ ํ์ผ โ ์๋ฌธ + 4๊ฐ ์ธ์ด ๋์ ๋ฒ์ญ"""
|
122 |
with open(audio_path, "rb") as f:
|
123 |
stt = client.audio.transcriptions.create(
|
124 |
model="whisper-1",
|
|
|
129 |
if not original:
|
130 |
return ["โ ๏ธ ์์ฑ ์ธ์ ์คํจ"] + [""] * 4
|
131 |
|
132 |
+
outs = [original]
|
133 |
for lang in FOUR_LANGS:
|
134 |
+
outs.append(_gpt_translate(original, src, lang))
|
135 |
+
return outs # ์ด 5๊ฐ(์๋ฌธ+4์ธ์ด)
|
|
|
|
|
|
|
136 |
|
137 |
+
# ============== Gradio UI ===========================================
|
138 |
with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
|
139 |
with gr.Tabs():
|
140 |
+
# โ ๋ง์ดํฌ/ํ์ผ ๋ฒ์ญ
|
141 |
with gr.TabItem("๐๏ธ ๋ง์ดํฌ/ํ์ผ ๋ฒ์ญ"):
|
142 |
src1 = gr.Dropdown(LANGUAGES, value="Korean", label="์
๋ ฅ")
|
143 |
tgt1 = gr.Dropdown(LANGUAGES, value="English", label="์ถ๋ ฅ")
|
144 |
mic1 = gr.Audio(
|
145 |
+
sources=["microphone", "upload"],
|
146 |
type="filepath",
|
147 |
label="๐ค ๋
น์ ๋๋ ์ค๋์ค ํ์ผ ์
๋ก๋"
|
148 |
)
|
|
|
157 |
outputs=[stt1, tlt1, out1]
|
158 |
)
|
159 |
|
160 |
+
# โก ํ์ผ ์ ์ฉ ๋ฒ์ญ
|
161 |
with gr.TabItem("๐ง ํ์ผ ์ ์ฉ ๋ฒ์ญ"):
|
162 |
src2 = gr.Dropdown(LANGUAGES, value="Korean", label="์
๋ ฅ")
|
163 |
tgt2 = gr.Dropdown(LANGUAGES, value="English", label="์ถ๋ ฅ")
|
|
|
177 |
outputs=[stt2, tlt2, out2]
|
178 |
)
|
179 |
|
180 |
+
# โข ์ค์๊ฐ ์คํธ๋ฆฌ๋ฐ ๋ฒ์ญ(Beta)
|
181 |
+
with gr.TabItem("โฑ๏ธ ์ค์๊ฐ ๋ฒ์ญ (Beta)"):
|
182 |
+
gr.Markdown("๋ง์ดํฌ๋ฅผ ์ผ๋ฉด 3~4์ด ๊ฐ๊ฒฉ์ผ๋ก ์๋ง์ด ๊ฐฑ์ ๋ฉ๋๋ค.")
|
183 |
+
src3 = gr.Dropdown(LANGUAGES, value="Korean", label="์
๋ ฅ")
|
184 |
+
tgt3 = gr.Dropdown(LANGUAGES, value="English", label="์ถ๋ ฅ")
|
185 |
+
mic3 = gr.Audio(
|
186 |
+
sources=["microphone"],
|
187 |
+
streaming=True,
|
188 |
+
label="๐ค ์ค์๊ฐ ๋ง์ดํฌ ์
๋ ฅ"
|
189 |
+
)
|
190 |
+
stt3 = gr.Textbox(label="์๋ฌธ(์ค์๊ฐ)", lines=8)
|
191 |
+
tlt3 = gr.Textbox(label="๋ฒ์ญ(์ค์๊ฐ)", lines=8)
|
192 |
+
|
193 |
+
def gen(audio, src_lang, tgt_lang):
|
194 |
+
yield from stream_generator(audio, src_lang, tgt_lang)
|
195 |
+
|
196 |
+
mic3.stream(gen, inputs=[src3, tgt3], outputs=[stt3, tlt3])
|
197 |
|
198 |
+
# โฃ 4๊ฐ ์ธ์ด ๋์ ๋ฒ์ญ
|
199 |
+
with gr.TabItem("๐ 4๊ฐ ์ธ์ด ๋์"):
|
200 |
+
gr.Markdown("์
๋ ฅ ์์ฑ์ **English / Chinese(็ฎไฝ) / Thai / Russian** 4๊ฐ ์ธ์ด๋ก ๋์์ ๋ฒ์ญํฉ๋๋ค.")
|
201 |
+
src4 = gr.Dropdown(LANGUAGES, value="Korean", label="์
๋ ฅ ์ธ์ด")
|
202 |
+
aud4 = gr.Audio(
|
203 |
+
sources=["microphone", "upload"],
|
204 |
+
type="filepath",
|
205 |
+
label="๐ค ๋
น์ ๋๋ ์ค๋์ค ํ์ผ ์
๋ก๋"
|
206 |
+
)
|
207 |
+
btn4 = gr.Button("๋ฒ์ญ")
|
208 |
+
|
209 |
+
with gr.Row():
|
210 |
+
org4 = gr.Textbox(label="์๋ฌธ", lines=4)
|
211 |
+
en4 = gr.Textbox(label="English", lines=4)
|
212 |
+
zh4 = gr.Textbox(label="Chinese (็ฎไฝ)", lines=4)
|
213 |
+
th4 = gr.Textbox(label="Thai", lines=4)
|
214 |
+
ru4 = gr.Textbox(label="Russian", lines=4)
|
215 |
+
|
216 |
+
btn4.click(
|
217 |
+
translate_audio_four,
|
218 |
+
inputs=[aud4, src4],
|
219 |
+
outputs=[org4, en4, zh4, th4, ru4]
|
220 |
+
)
|
221 |
|
222 |
+
# ============== ์ฑ ์คํ =============================================
|
223 |
if __name__ == "__main__":
|
224 |
app.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)
|