Spaces:
Sleeping
Sleeping
import os, asyncio, json, tempfile, websockets, pdfplumber | |
import gradio as gr | |
import openai | |
from dotenv import load_dotenv | |
# โโโ 0. ์ด๊ธฐํ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
load_dotenv() | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
if not openai.api_key: | |
raise RuntimeError("OPENAI_API_KEY ๊ฐ .env ์ ์์ต๋๋ค!") | |
LANG = ["Korean","English","Japanese","Chinese", | |
"Thai","Russian","Vietnamese","Spanish","French"] | |
VOICE = {l: ("nova" if l in ["Korean","Japanese","Chinese"] else "alloy") | |
for l in LANG} | |
FOUR = ["English","Chinese","Thai","Russian"] | |
WS_URL = "wss://api.openai.com/v1/audio/transcriptions/stream" | |
# โโโ 1. ๊ณตํต GPT ๋ฒ์ญ / TTS โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
async def gpt_translate(text, src, tgt): | |
rsp = await openai.AsyncClient().chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[{"role":"system", | |
"content":f"Translate {src} โ {tgt}. Return only the text."}, | |
{"role":"user","content":text}], | |
temperature=0.3,max_tokens=2048) | |
return rsp.choices[0].message.content.strip() | |
async def gpt_tts(text, lang): | |
rsp = await openai.AsyncClient().audio.speech.create( | |
model="tts-1", voice=VOICE[lang], input=text[:4096]) | |
tmp = tempfile.NamedTemporaryFile(delete=False,suffix=".mp3") | |
tmp.write(rsp.content); tmp.close(); return tmp.name | |
# โโโ 2. PDF ๋ฒ์ญ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
def translate_pdf(file, src, tgt): | |
if not file: return "โ ๏ธ PDF ์ ๋ก๋ ํ์", "" | |
with pdfplumber.open(file.name) as pdf: | |
text = "\n".join(p.extract_text() or "" for p in pdf.pages[:5]).strip() | |
if not text: | |
return "โ ๏ธ ํ ์คํธ ์ถ์ถ ์คํจ", "" | |
return text, asyncio.run(gpt_translate(text, src, tgt)) | |
# โโโ 3. WebSocket STT ํฌํผ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
async def ws_stt_generator(audio_queue: asyncio.Queue): | |
""" | |
๋ฐฑ๊ทธ๋ผ์ด๋ ํ์คํฌ: | |
- audio_queue ๋ก๋ถํฐ chunk(bytes) ์์ | |
- WS ๋ก ์ ์ก, ์๋ฒ event ์์ โ yield (partial text, final?) | |
""" | |
async with websockets.connect( | |
WS_URL, | |
extra_headers={"Authorization": f"Bearer {openai.api_key}"}, | |
max_size=None | |
) as ws: | |
async def sender(): | |
while True: | |
chunk = await audio_queue.get() | |
if chunk is None: # ์ข ๋ฃ ํ๋๊ทธ | |
await ws.send(json.dumps({"terminate": True})) | |
break | |
await ws.send(chunk) | |
asyncio.create_task(sender()) | |
async for msg in ws: | |
data = json.loads(msg) | |
yield data["text"], data.get("final", False) | |
# โโโ 4. Gradio ์คํธ๋ฆผ ํธ๋ค๋ฌ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
async def realtime_single(mic, src, tgt, state): | |
""" | |
mic: bytes chunk (Gradio ์๋) | |
state: {"queue": Queue, "task": Task, "orig": str, "trans": str} | |
""" | |
if state is None: | |
state = {"queue": asyncio.Queue(), "task": None, "orig":"", "trans":""} | |
if mic is None: # ์คํธ๋ฆผ ์ข ๋ฃ | |
await state["queue"].put(None) | |
return state["orig"], state["trans"], state | |
# ์ฒซ ํธ์ถ์ด๋ฉด WS ํ์คํฌ ์์ | |
if state["task"] is None: | |
async def run_ws(): | |
async for text, final in ws_stt_generator(state["queue"]): | |
state["orig"] += (" " if state["orig"] else "") + text | |
add = await gpt_translate(text, src, tgt) | |
state["trans"] += (" " if state["trans"] else "") + add | |
state["task"] = asyncio.create_task(run_ws()) | |
# ๋ง์ดํฌ chunk enqueue | |
await state["queue"].put(mic) | |
return state["orig"], state["trans"], state | |
async def realtime_four(mic, src, state): | |
if state is None: | |
state = {"queue": asyncio.Queue(), "task": None, | |
"orig":"", "English":"", "Chinese":"", "Thai":"", "Russian":""} | |
if mic is None: | |
await state["queue"].put(None) | |
return tuple(state[k] for k in | |
["orig","English","Chinese","Thai","Russian"]) + (state,) | |
if state["task"] is None: | |
async def run_ws(): | |
async for text, _ in ws_stt_generator(state["queue"]): | |
state["orig"] += (" "+text) | |
for lang in FOUR: | |
state[lang] += (" "+ await gpt_translate(text, src, lang)) | |
state["task"] = asyncio.create_task(run_ws()) | |
await state["queue"].put(mic) | |
return tuple(state[k] for k in | |
["orig","English","Chinese","Thai","Russian"]) + (state,) | |
# โโโ 5. UI โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
with gr.Blocks(title="SMARTok Demo") as demo: | |
with gr.Tabs(): | |
# ํญ 1 โ ์ค๋์ค ๋ฒ์ญ | |
with gr.TabItem("๐๏ธ ์ค๋์ค"): | |
src1=gr.Dropdown(LANG,value="Korean",label="์ ๋ ฅ") | |
tgt1=gr.Dropdown(LANG,value="English",label="์ถ๋ ฅ") | |
aud1=gr.Audio(sources=["microphone","upload"],type="filepath") | |
btn1=gr.Button("๋ฒ์ญ") | |
o1=gr.Textbox(label="์๋ฌธ"); t1=gr.Textbox(label="๋ฒ์ญ") | |
a1=gr.Audio(label="TTS",type="filepath",autoplay=True) | |
btn1.click(lambda a,s,t: translate_pdf.__wrapped__ if False else translate_pdf, | |
[aud1,src1,tgt1],[o1,t1,a1]) # dummy, ์ ์ง์ฉ | |
# ํญ 2 โ PDF ๋ฒ์ญ | |
with gr.TabItem("๐ PDF"): | |
src2=gr.Dropdown(LANG,value="Korean",label="์ ๋ ฅ") | |
tgt2=gr.Dropdown(LANG,value="English",label="์ถ๋ ฅ") | |
pdf=gr.File(file_types=[".pdf"]) | |
btn2=gr.Button("๋ฒ์ญ") | |
o2=gr.Textbox(label="์ถ์ถ ์๋ฌธ",lines=15) | |
t2=gr.Textbox(label="๋ฒ์ญ ๊ฒฐ๊ณผ",lines=15) | |
btn2.click(translate_pdf:=translate_pdf,[pdf,src2,tgt2],[o2,t2]) | |
# ํญ 3 โ ์ค์๊ฐ 1์ธ์ด | |
with gr.TabItem("โฑ๏ธ ์ค์๊ฐ 1"): | |
src3=gr.Dropdown(LANG,value="Korean",label="์ ๋ ฅ") | |
tgt3=gr.Dropdown(LANG,value="English",label="์ถ๋ ฅ") | |
mic3=gr.Audio(sources=["microphone"],streaming=True) | |
o3=gr.Textbox(label="์๋ฌธ(์ค์๊ฐ)",lines=8) | |
t3=gr.Textbox(label="๋ฒ์ญ(์ค์๊ฐ)",lines=8) | |
st3=gr.State() | |
mic3.stream(realtime_single,inputs=[src3,tgt3,st3], | |
outputs=[o3,t3,st3]) | |
# ํญ 4 โ ์ค์๊ฐ 4์ธ์ด | |
with gr.TabItem("๐ ์ค์๊ฐ 4"): | |
src4=gr.Dropdown(LANG,value="Korean",label="์ ๋ ฅ") | |
mic4=gr.Audio(sources=["microphone"],streaming=True) | |
o4=gr.Textbox(label="์๋ฌธ",lines=8) | |
e4=gr.Textbox(label="English",lines=8) | |
c4=gr.Textbox(label="Chinese(็ฎไฝ)",lines=8) | |
th4=gr.Textbox(label="Thai",lines=8) | |
r4=gr.Textbox(label="Russian",lines=8) | |
st4=gr.State() | |
mic4.stream(realtime_four,inputs=[src4,st4], | |
outputs=[o4,e4,c4,th4,r4,st4]) | |
demo.launch(server_name="0.0.0.0",server_port=7860,debug=True) | |