Spaces:
Running
on
Zero
Running
on
Zero
File size: 8,558 Bytes
b3067c5 2adfcbe 6bdc489 b3067c5 2adfcbe 6bdc489 a609646 6bdc489 2adfcbe 32b3c75 a609646 32b3c75 5897b48 2adfcbe 5897b48 2adfcbe a609646 5897b48 a609646 5897b48 a609646 5897b48 2adfcbe 5897b48 a609646 2adfcbe 5897b48 a609646 5897b48 a609646 5897b48 a609646 2adfcbe 5897b48 a609646 2adfcbe a609646 2adfcbe a609646 5897b48 a609646 2adfcbe a609646 5897b48 2adfcbe 5897b48 a609646 5897b48 a609646 5897b48 a609646 2adfcbe 5897b48 2adfcbe a609646 5897b48 a609646 5897b48 2adfcbe 5897b48 2adfcbe 5897b48 2adfcbe 5897b48 2adfcbe e49bf8d 2adfcbe 7cce69a 2adfcbe e49bf8d 5897b48 e49bf8d 2adfcbe e49bf8d 5897b48 a609646 2adfcbe e49bf8d 5897b48 e49bf8d a609646 5897b48 a609646 2adfcbe e49bf8d 2adfcbe b3067c5 2adfcbe b3067c5 5897b48 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
import gradio as gr
import openai, os, io, tempfile
from dotenv import load_dotenv
# ============== ํ๊ฒฝ ๋ณ์ & OpenAI ์ด๊ธฐํ ===========================
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise RuntimeError("OPENAI_API_KEY๋ฅผ .env ํ์ผ์ ์ค์ ํ์ธ์!")
client = openai.OpenAI(api_key=api_key)
# ============== ์ธ์ด ์ค์ ===========================================
LANGUAGES = [
"Korean", "English", "Japanese", "Chinese",
"Thai", "Russian", "Vietnamese",
"Spanish", "French"
]
LANG_CODE = {
"Korean": "ko", "English": "en", "Japanese": "ja", "Chinese": "zh",
"Thai": "th", "Russian": "ru", "Vietnamese": "vi",
"Spanish": "es", "French": "fr"
}
VOICE = {
lang: ("nova" if lang in ["Korean", "Japanese", "Chinese"] else "alloy")
for lang in LANGUAGES
}
# ============== ๊ณตํต ํจ์ ===========================================
def _gpt_translate(text: str, src: str, tgt: str) -> str:
"""GPT-3.5 ๋ฒ์ญ"""
rsp = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": (
f"You are a professional translator. Translate the following {src} text to {tgt}. "
f"Only provide the translation without additional commentary."
)
},
{"role": "user", "content": text}
],
temperature=0.3,
max_tokens=2048
)
return rsp.choices[0].message.content.strip()
def _tts(text: str, lang: str) -> str:
"""TTS-1 ์์ฑ ํฉ์ฑ โ ์์ mp3 ๊ฒฝ๋ก ๋ฐํ"""
out = client.audio.speech.create(
model="tts-1",
voice=VOICE.get(lang, "alloy"),
input=text[:4096]
)
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tmp.write(out.content)
tmp.close()
return tmp.name
def translate_audio(audio_path, src, tgt):
"""๋จ์ผ ์์ฑ ํ์ผ ๋ฒ์ญ(TTS ํฌํจ)"""
with open(audio_path, "rb") as f:
stt = client.audio.transcriptions.create(
model="whisper-1",
file=f,
language=LANG_CODE.get(src)
)
original = stt.text.strip()
if not original:
return "โ ๏ธ ์์ฑ ์ธ์ ์คํจ", "", None
translated = _gpt_translate(original, src, tgt)
tts_path = _tts(translated, tgt)
return original, translated, tts_path
# ============== ์ค์๊ฐ ์คํธ๋ฆฌ๋ฐ ์ ์ฌ/๋ฒ์ญ ============================
STREAM_CHUNK_SEC = 4 # Whisper ํธ์ถ ์ฃผ๊ธฐ(์ด)
def stream_generator(mic_stream, src, tgt):
"""๋ง์ดํฌ ์คํธ๋ฆผ -> ์ฃผ๊ธฐ์ ์ฒญํฌ ๋ฒ์ญ(๋์ ์ถ๋ ฅ)"""
buffer = io.BytesIO()
wav_header = None
original_acc, translated_acc = "", ""
while True:
chunk = mic_stream.recv()
if chunk is None: # ์คํธ๋ฆผ ์ข
๋ฃ
break
if not wav_header:
wav_header = chunk[:44] # WAV ํค๋(16kHz 16-bit mono)
buffer.write(chunk)
# ์ง์ ์๊ฐ๋งํผ ์์ด๋ฉด Whisper ํธ์ถ
if buffer.getbuffer().nbytes > 16000 * 2 * STREAM_CHUNK_SEC:
wav_bytes = wav_header + buffer.getvalue()
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
tmp.write(wav_bytes)
tmp.close()
o, t, _ = translate_audio(tmp.name, src, tgt)
original_acc += " " + o
translated_acc += " " + t
yield original_acc.strip(), translated_acc.strip()
buffer = io.BytesIO() # ๋ฒํผ ์ด๊ธฐํ
# ๋ง์ง๋ง ๋จ์ ๋ฒํผ ์ฒ๋ฆฌ
if buffer.getbuffer().nbytes:
wav_bytes = wav_header + buffer.getvalue()
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
tmp.write(wav_bytes)
tmp.close()
o, t, _ = translate_audio(tmp.name, src, tgt)
yield (original_acc + " " + o).strip(), (translated_acc + " " + t).strip()
# ============== 4๊ฐ๊ตญ ๋์ ๋ฒ์ญ =====================================
FOUR_LANGS = ["English", "Chinese", "Thai", "Russian"]
def translate_audio_four(audio_path, src):
"""์์ฑ ํ์ผ โ ์๋ฌธ + 4๊ฐ ์ธ์ด ๋์ ๋ฒ์ญ"""
with open(audio_path, "rb") as f:
stt = client.audio.transcriptions.create(
model="whisper-1",
file=f,
language=LANG_CODE.get(src)
)
original = stt.text.strip()
if not original:
return ["โ ๏ธ ์์ฑ ์ธ์ ์คํจ"] + [""] * 4
outs = [original]
for lang in FOUR_LANGS:
outs.append(_gpt_translate(original, src, lang))
return outs # ์ด 5๊ฐ(์๋ฌธ+4์ธ์ด)
# ============== Gradio UI ===========================================
with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
with gr.Tabs():
# โ ๋ง์ดํฌ/ํ์ผ ๋ฒ์ญ
with gr.TabItem("๐๏ธ ๋ง์ดํฌ/ํ์ผ ๋ฒ์ญ"):
src1 = gr.Dropdown(LANGUAGES, value="Korean", label="์
๋ ฅ")
tgt1 = gr.Dropdown(LANGUAGES, value="English", label="์ถ๋ ฅ")
mic1 = gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="๐ค ๋
น์ ๋๋ ์ค๋์ค ํ์ผ ์
๋ก๋"
)
btn1 = gr.Button("๋ฒ์ญ")
stt1 = gr.Textbox(label="์๋ฌธ", lines=5)
tlt1 = gr.Textbox(label="๋ฒ์ญ", lines=5)
out1 = gr.Audio(label="TTS", type="filepath", autoplay=True)
btn1.click(
translate_audio,
inputs=[mic1, src1, tgt1],
outputs=[stt1, tlt1, out1]
)
# โก ํ์ผ ์ ์ฉ ๋ฒ์ญ
with gr.TabItem("๐ง ํ์ผ ์ ์ฉ ๋ฒ์ญ"):
src2 = gr.Dropdown(LANGUAGES, value="Korean", label="์
๋ ฅ")
tgt2 = gr.Dropdown(LANGUAGES, value="English", label="์ถ๋ ฅ")
file2 = gr.Audio(
sources=["upload"],
type="filepath",
label="์ค๋์ค ํ์ผ ์
๋ก๋"
)
btn2 = gr.Button("๋ฒ์ญ")
stt2 = gr.Textbox(label="์๋ฌธ", lines=5)
tlt2 = gr.Textbox(label="๋ฒ์ญ", lines=5)
out2 = gr.Audio(label="TTS", type="filepath", autoplay=True)
btn2.click(
translate_audio,
inputs=[file2, src2, tgt2],
outputs=[stt2, tlt2, out2]
)
# โข ์ค์๊ฐ ์คํธ๋ฆฌ๋ฐ ๋ฒ์ญ(Beta)
with gr.TabItem("โฑ๏ธ ์ค์๊ฐ ๋ฒ์ญ (Beta)"):
gr.Markdown("๋ง์ดํฌ๋ฅผ ์ผ๋ฉด 3~4์ด ๊ฐ๊ฒฉ์ผ๋ก ์๋ง์ด ๊ฐฑ์ ๋ฉ๋๋ค.")
src3 = gr.Dropdown(LANGUAGES, value="Korean", label="์
๋ ฅ")
tgt3 = gr.Dropdown(LANGUAGES, value="English", label="์ถ๋ ฅ")
mic3 = gr.Audio(
sources=["microphone"],
streaming=True,
label="๐ค ์ค์๊ฐ ๋ง์ดํฌ ์
๋ ฅ"
)
stt3 = gr.Textbox(label="์๋ฌธ(์ค์๊ฐ)", lines=8)
tlt3 = gr.Textbox(label="๋ฒ์ญ(์ค์๊ฐ)", lines=8)
def gen(audio, src_lang, tgt_lang):
yield from stream_generator(audio, src_lang, tgt_lang)
mic3.stream(gen, inputs=[src3, tgt3], outputs=[stt3, tlt3])
# โฃ 4๊ฐ ์ธ์ด ๋์ ๋ฒ์ญ
with gr.TabItem("๐ 4๊ฐ ์ธ์ด ๋์"):
gr.Markdown("์
๋ ฅ ์์ฑ์ **English / Chinese(็ฎไฝ) / Thai / Russian** 4๊ฐ ์ธ์ด๋ก ๋์์ ๋ฒ์ญํฉ๋๋ค.")
src4 = gr.Dropdown(LANGUAGES, value="Korean", label="์
๋ ฅ ์ธ์ด")
aud4 = gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="๐ค ๋
น์ ๋๋ ์ค๋์ค ํ์ผ ์
๋ก๋"
)
btn4 = gr.Button("๋ฒ์ญ")
with gr.Row():
org4 = gr.Textbox(label="์๋ฌธ", lines=4)
en4 = gr.Textbox(label="English", lines=4)
zh4 = gr.Textbox(label="Chinese (็ฎไฝ)", lines=4)
th4 = gr.Textbox(label="Thai", lines=4)
ru4 = gr.Textbox(label="Russian", lines=4)
btn4.click(
translate_audio_four,
inputs=[aud4, src4],
outputs=[org4, en4, zh4, th4, ru4]
)
# ============== ์ฑ ์คํ =============================================
if __name__ == "__main__":
app.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)
|