Spaces:
Running
on
Zero
Running
on
Zero
File size: 8,238 Bytes
effad1c 364ce74 effad1c b3067c5 364ce74 6bdc489 6b6f26e 364ce74 b3067c5 364ce74 6bdc489 364ce74 6bdc489 364ce74 6b6f26e 364ce74 fd022eb 364ce74 392a5eb 364ce74 a609646 364ce74 a609646 364ce74 a609646 364ce74 6b6f26e 364ce74 6b6f26e 364ce74 fd022eb 364ce74 6b6f26e 364ce74 fd022eb 364ce74 6b6f26e 364ce74 6b6f26e 364ce74 6b6f26e 364ce74 6b6f26e 364ce74 6b6f26e 364ce74 7cce69a 364ce74 392a5eb 364ce74 6b6f26e 364ce74 6b6f26e 364ce74 6b6f26e 364ce74 392a5eb 364ce74 6b6f26e 364ce74 6b6f26e 364ce74 6b6f26e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
"""
SMARTok ๋ฐ๋ชจ โ ์ด๋ฏธ์ง OCRยท์ค์๊ฐ ํญ ์ค๋ฅ ์์ ๋ณธ
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
โข ์ด๋ฏธ์ง โ ocrmypdf (+ghostscript) ์ฐ์ , ์คํจ ์ pytesseract ์ง์ OCR
โข ์ค์๊ฐ 1ยท4์ธ์ด ํญ : State ์ธ์/์ถ๋ ฅ ๊ฐ์ ๋ง์ถฐ ๊ฒฝ๊ณ ์ ๊ฑฐ
"""
import gradio as gr
import openai, os, io, tempfile, mimetypes
from dotenv import load_dotenv
from PIL import Image
import pdfplumber, pytesseract, ocrmypdf, subprocess, shlex
# โโโโโ 0. Init โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
load_dotenv()
client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
LANG = ["Korean","English","Japanese","Chinese",
"Thai","Russian","Vietnamese","Spanish","French"]
LC = {"Korean":"ko","English":"en","Japanese":"ja","Chinese":"zh",
"Thai":"th","Russian":"ru","Vietnamese":"vi","Spanish":"es","French":"fr"}
VOICE= {l:("nova" if l in ["Korean","Japanese","Chinese"] else "alloy") for l in LANG}
FOUR = ["English","Chinese","Thai","Russian"]
CHUNK = 4 # sec
# โโโโโ 1. Helpers โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
def _safe(v): return None if v is None else (v["name"] if isinstance(v,dict) else v)
def _gpt(txt, src, tgt):
rsp = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role":"system",
"content":f"Translate {src} โ {tgt}. Return only the translation."},
{"role":"user","content":txt}],
temperature=0.3,max_tokens=4096)
return rsp.choices[0].message.content.strip()
def _tts(txt, lang):
out = client.audio.speech.create(model="tts-1",voice=VOICE.get(lang,"alloy"),
input=txt[:4096])
f = tempfile.NamedTemporaryFile(delete=False,suffix=".mp3")
f.write(out.content); f.close(); return f.name
# โโโโโ 2. Single Audio translate โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
def trans_audio(inp, src, tgt):
p=_safe(inp)
if not p or not os.path.exists(p): return "โ ๏ธ ํ์ผ ํ์","",None
with open(p,"rb") as f:
stt=client.audio.transcriptions.create(model="whisper-1",file=f,
language=LC.get(src))
orig=stt.text.strip();
if not orig: return "โ ๏ธ ์ธ์ ์คํจ","",None
trans=_gpt(orig,src,tgt)
return orig,trans,_tts(trans,tgt)
# โโโโโ 3. Doc/Image translate โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
def trans_doc(file_in, src, tgt):
p=_safe(file_in)
if not p or not os.path.exists(p): return "โ ๏ธ ํ์ผ ์
๋ก๋",""
ext=os.path.splitext(p)[1].lower()
mime=mimetypes.guess_type(p)[0] or ""
try:
if ext==".pdf" or "pdf" in mime: # PDF
with pdfplumber.open(p) as pdf:
txt="\n".join(pg.extract_text() or "" for pg in pdf.pages[:5])
else: # ์ด๋ฏธ์ง
tmp_pdf=tempfile.NamedTemporaryFile(delete=False,suffix=".pdf").name
Image.open(p).save(tmp_pdf,"PDF")
ocr_pdf=tempfile.NamedTemporaryFile(delete=False,suffix=".pdf").name
try:
ocrmypdf.ocr(tmp_pdf,ocr_pdf,
lang=LC.get(src,"eng"),deskew=True,optimize=0,
progress_bar=False)
with pdfplumber.open(ocr_pdf) as pdf:
txt="\n".join(pg.extract_text() or "" for pg in pdf.pages)
except Exception: # gs ์๊ฑฐ๋ ocrmypdf ์คํจ โ ์ง์ OCR
txt=pytesseract.image_to_string(Image.open(p), lang=LC.get(src,"eng"))
except Exception as e:
return f"โ ์ถ์ถ ์ค๋ฅ: {e}",""
txt=txt.strip()
if not txt: return "โ ๏ธ ํ
์คํธ ์ถ์ถ ์คํจ",""
return txt,_gpt(txt,src,tgt)
# โโโโโ 4. Real-time single lang โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
def stream_one(path, src, tgt, state):
state=state or {"o":"","t":""}
if not path or not os.path.exists(path): return state["o"],state["t"],state
with open(path,"rb") as f:
stt=client.audio.transcriptions.create(model="whisper-1",file=f,
language=LC.get(src))
full=stt.text.strip(); new=full[len(state["o"]):]
if new:
state["o"]=full
state["t"]+=" "+_gpt(new,src,tgt)
return state["o"],state["t"].strip(),state
# โโโโโ 5. Real-time 4 langs โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
def stream_four(path, src, state):
state=state or {k:"" for k in ["o"]+FOUR}
if not path or not os.path.exists(path):
return state["o"],state["English"],state["Chinese"],state["Thai"],state["Russian"],state
with open(path,"rb") as f:
stt=client.audio.transcriptions.create(model="whisper-1",file=f,
language=LC.get(src))
full=stt.text.strip(); new=full[len(state["o"]):]
if new:
state["o"]=full
for l in FOUR:
state[l]+=" "+_gpt(new,src,l)
return (state["o"].strip(),state["English"].strip(),state["Chinese"].strip(),
state["Thai"].strip(),state["Russian"].strip(),state)
# โโโโโ 6. UI โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
with gr.Blocks(title="SMARTok Demo",theme=gr.themes.Soft()) as app:
with gr.Tabs():
# ํญ1
with gr.TabItem("๐๏ธ ์ค๋์ค ๋ฒ์ญ"):
s1=gr.Dropdown(LANG,value="Korean",label="์
๋ ฅ")
t1=gr.Dropdown(LANG,value="English",label="์ถ๋ ฅ")
a1=gr.Audio(sources=["microphone","upload"],type="filepath")
btn1=gr.Button("๋ฒ์ญ")
o1=gr.Textbox(label="์๋ฌธ",lines=5); tr1=gr.Textbox(label="๋ฒ์ญ",lines=5)
aud1=gr.Audio(label="TTS",type="filepath",autoplay=True)
btn1.click(trans_audio,[a1,s1,t1],[o1,tr1,aud1])
# ํญ2
with gr.TabItem("๐ ๋ฌธ์ยท์ด๋ฏธ์ง ๋ฒ์ญ"):
s2=gr.Dropdown(LANG,value="Korean",label="์
๋ ฅ")
t2=gr.Dropdown(LANG,value="English",label="์ถ๋ ฅ")
f2=gr.File(file_types=[".pdf",".png",".jpg",".jpeg",".bmp",".tiff",".gif"])
btn2=gr.Button("๋ฒ์ญ")
o2=gr.Textbox(label="์ถ์ถ ์๋ฌธ",lines=15); tr2=gr.Textbox(label="๋ฒ์ญ ๊ฒฐ๊ณผ",lines=15)
btn2.click(trans_doc,[f2,s2,t2],[o2,tr2])
# ํญ3
with gr.TabItem("โฑ๏ธ ์ค์๊ฐ 1์ธ์ด"):
s3=gr.Dropdown(LANG,value="Korean",label="์
๋ ฅ"); t3=gr.Dropdown(LANG,value="English",label="์ถ๋ ฅ")
mic3=gr.Audio(sources=["microphone"],streaming=True)
o3=tr3=gr.Textbox(lines=8,label="์๋ฌธ / ๋ฒ์ญ")
st3=gr.State()
mic3.stream(stream_one,inputs=[s3,t3,st3],outputs=[o3,tr3,st3])
# ํญ4
with gr.TabItem("๐ ์ค์๊ฐ 4์ธ์ด"):
s4=gr.Dropdown(LANG,value="Korean",label="์
๋ ฅ ์ธ์ด")
mic4=gr.Audio(sources=["microphone"],streaming=True)
o4=gr.Textbox(label="์๋ฌธ",lines=8); e4=gr.Textbox(label="English",lines=8)
c4=gr.Textbox(label="Chinese(็ฎไฝ)",lines=8); th4=gr.Textbox(label="Thai",lines=8); r4=gr.Textbox(label="Russian",lines=8)
st4=gr.State()
mic4.stream(stream_four,inputs=[s4,st4],
outputs=[o4,e4,c4,th4,r4,st4])
# โโโโโ 7. Run โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
if __name__=="__main__":
app.launch(server_name="0.0.0.0",server_port=7860,share=False,debug=True)
|