Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,203 +1,155 @@
|
|
1 |
"""
|
2 |
-
SMARTok
|
3 |
-
|
4 |
-
โข
|
5 |
-
โข ํญ
|
6 |
-
โข ํญ3 โฑ๏ธ ์ค์๊ฐ 1์ธ์ด ๋ฒ์ญ : ๋ง์ดํฌ โ 1๊ฐ ์ธ์ด ์ค์๊ฐ ์๋ง
|
7 |
-
โข ํญ4 ๐ ์ค์๊ฐ 4๊ฐ ์ธ์ด ๋ฒ์ญ : ๋ง์ดํฌ โ ์ยท์คยทํยท๋ฌ ๋์ ์๋ง
|
8 |
-
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
9 |
-
ํ์ apt : tesseract-ocr libtesseract-dev ocrmypdf ffmpeg
|
10 |
-
ํ์ pip : gradio>=5.33 openai python-dotenv pdfplumber ocrmypdf pillow
|
11 |
"""
|
12 |
|
13 |
import gradio as gr
|
14 |
-
import openai, os, io, tempfile, mimetypes
|
15 |
from dotenv import load_dotenv
|
16 |
-
import pdfplumber, ocrmypdf
|
17 |
from PIL import Image
|
|
|
18 |
|
19 |
-
#
|
20 |
load_dotenv()
|
21 |
-
|
22 |
-
if not api_key:
|
23 |
-
raise RuntimeError("OPENAI_API_KEY๋ฅผ .env ํ์ผ์ ์ค์ ํ์ธ์!")
|
24 |
-
client = openai.OpenAI(api_key=api_key)
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
]
|
31 |
-
LANG_CODE = {
|
32 |
-
"Korean":"ko","English":"en","Japanese":"ja","Chinese":"zh",
|
33 |
-
"Thai":"th","Russian":"ru","Vietnamese":"vi",
|
34 |
-
"Spanish":"es","French":"fr"
|
35 |
-
}
|
36 |
-
VOICE = {l: ("nova" if l in ["Korean","Japanese","Chinese"] else "alloy")
|
37 |
-
for l in LANGUAGES}
|
38 |
FOUR = ["English","Chinese","Thai","Russian"]
|
39 |
-
|
40 |
|
41 |
-
#
|
42 |
-
def _safe(v):
|
43 |
-
if v is None:
|
44 |
-
return None
|
45 |
-
return v["name"] if isinstance(v, dict) else v
|
46 |
|
47 |
-
def _gpt(
|
48 |
rsp = client.chat.completions.create(
|
49 |
model="gpt-3.5-turbo",
|
50 |
-
messages=[
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
{"role":"user","content":text}
|
55 |
-
],
|
56 |
-
temperature=0.3,max_tokens=4096
|
57 |
-
)
|
58 |
return rsp.choices[0].message.content.strip()
|
59 |
|
60 |
-
def _tts(
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
)
|
66 |
-
tmp = tempfile.NamedTemporaryFile(delete=False,suffix=".mp3")
|
67 |
-
tmp.write(rsp.content); tmp.close()
|
68 |
-
return tmp.name
|
69 |
|
70 |
-
#
|
71 |
-
def
|
72 |
-
p
|
73 |
-
if not p or not os.path.exists(p):
|
74 |
-
return "โ ๏ธ ์์ฑ ํ์ผ ํ์", "", None
|
75 |
with open(p,"rb") as f:
|
76 |
-
stt
|
77 |
-
|
78 |
-
orig
|
79 |
-
if not orig:
|
80 |
-
|
81 |
-
|
82 |
-
return orig, trans, _tts(trans, tgt)
|
83 |
|
84 |
-
#
|
85 |
-
def
|
86 |
-
p
|
87 |
-
if not p or not os.path.exists(p):
|
88 |
-
|
89 |
-
|
90 |
-
mime = mimetypes.guess_type(p)[0] or ""
|
91 |
try:
|
92 |
-
# PDF
|
93 |
-
if ext==".pdf" or "pdf" in mime:
|
94 |
with pdfplumber.open(p) as pdf:
|
95 |
-
txt
|
96 |
-
# ์ด๋ฏธ์ง
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
|
|
106 |
except Exception as e:
|
107 |
-
return f"โ ์ถ์ถ ์ค๋ฅ: {e}",
|
108 |
-
txt
|
109 |
-
if not txt:
|
110 |
-
|
111 |
-
return txt, _gpt(txt, src, tgt)
|
112 |
|
113 |
-
#
|
114 |
-
def
|
115 |
-
state
|
116 |
-
if not
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
full = stt.text.strip()
|
122 |
-
new = full[len(state["orig"]):]
|
123 |
if new:
|
124 |
-
state["
|
125 |
-
state["
|
126 |
-
return state["
|
127 |
|
128 |
-
#
|
129 |
-
def
|
130 |
-
state
|
131 |
-
if not
|
132 |
-
return state["
|
133 |
-
with open(
|
134 |
-
stt
|
135 |
-
|
136 |
-
full
|
137 |
-
new = full[len(state["orig"]):]
|
138 |
if new:
|
139 |
-
state["
|
140 |
-
for
|
141 |
-
state[
|
142 |
-
return (state["
|
143 |
-
state["
|
144 |
-
state["Chinese"].strip(),
|
145 |
-
state["Thai"].strip(),
|
146 |
-
state["Russian"].strip(),
|
147 |
-
state)
|
148 |
|
149 |
-
#
|
150 |
-
with gr.Blocks(title="SMARTok Demo",
|
151 |
with gr.Tabs():
|
152 |
-
# ํญ
|
153 |
with gr.TabItem("๐๏ธ ์ค๋์ค ๋ฒ์ญ"):
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
o1
|
159 |
-
|
160 |
-
|
161 |
-
res1.click(translate_audio,[aud1,src1,tgt1],[o1,t1,a1])
|
162 |
|
163 |
-
# ํญ
|
164 |
with gr.TabItem("๐ ๋ฌธ์ยท์ด๋ฏธ์ง ๋ฒ์ญ"):
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
o2 = gr.Textbox(label="์ถ์ถ ์๋ฌธ",lines=15)
|
172 |
-
t2 = gr.Textbox(label="๋ฒ์ญ ๊ฒฐ๊ณผ",lines=15)
|
173 |
-
doc2.click(translate_doc,[file2,src2,tgt2],[o2,t2])
|
174 |
|
175 |
-
# ํญ
|
176 |
with gr.TabItem("โฑ๏ธ ์ค์๊ฐ 1์ธ์ด"):
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
st3 = gr.State()
|
183 |
-
mic3.stream(stream_single,
|
184 |
-
inputs=[src3,tgt3,st3],
|
185 |
-
outputs=[o3,t3,st3])
|
186 |
|
187 |
-
# ํญ
|
188 |
-
with gr.TabItem("๐ ์ค์๊ฐ 4
|
189 |
-
|
190 |
-
mic4
|
191 |
-
o4
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
r4 = gr.Textbox(label="Russian",lines=8)
|
196 |
-
st4 = gr.State()
|
197 |
-
mic4.stream(stream_multi,
|
198 |
-
inputs=[src4,st4],
|
199 |
outputs=[o4,e4,c4,th4,r4,st4])
|
200 |
|
201 |
-
#
|
202 |
-
if __name__
|
203 |
app.launch(server_name="0.0.0.0",server_port=7860,share=False,debug=True)
|
|
|
1 |
"""
|
2 |
+
SMARTok ๋ฐ๋ชจ โ ์ด๋ฏธ์ง OCRยท์ค์๊ฐ ํญ ์ค๋ฅ ์์ ๋ณธ
|
3 |
+
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
4 |
+
โข ์ด๋ฏธ์ง โ ocrmypdf (+ghostscript) ์ฐ์ , ์คํจ ์ pytesseract ์ง์ OCR
|
5 |
+
โข ์ค์๊ฐ 1ยท4์ธ์ด ํญ : State ์ธ์/์ถ๋ ฅ ๊ฐ์ ๋ง์ถฐ ๊ฒฝ๊ณ ์ ๊ฑฐ
|
|
|
|
|
|
|
|
|
|
|
6 |
"""
|
7 |
|
8 |
import gradio as gr
|
9 |
+
import openai, os, io, tempfile, mimetypes
|
10 |
from dotenv import load_dotenv
|
|
|
11 |
from PIL import Image
|
12 |
+
import pdfplumber, pytesseract, ocrmypdf, subprocess, shlex
|
13 |
|
14 |
+
# โโโโโ 0. Init โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
15 |
load_dotenv()
|
16 |
+
client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
|
|
|
|
|
|
|
17 |
|
18 |
+
LANG = ["Korean","English","Japanese","Chinese",
|
19 |
+
"Thai","Russian","Vietnamese","Spanish","French"]
|
20 |
+
LC = {"Korean":"ko","English":"en","Japanese":"ja","Chinese":"zh",
|
21 |
+
"Thai":"th","Russian":"ru","Vietnamese":"vi","Spanish":"es","French":"fr"}
|
22 |
+
VOICE= {l:("nova" if l in ["Korean","Japanese","Chinese"] else "alloy") for l in LANG}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
FOUR = ["English","Chinese","Thai","Russian"]
|
24 |
+
CHUNK = 4 # sec
|
25 |
|
26 |
+
# โโโโโ 1. Helpers โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
27 |
+
def _safe(v): return None if v is None else (v["name"] if isinstance(v,dict) else v)
|
|
|
|
|
|
|
28 |
|
29 |
+
def _gpt(txt, src, tgt):
|
30 |
rsp = client.chat.completions.create(
|
31 |
model="gpt-3.5-turbo",
|
32 |
+
messages=[{"role":"system",
|
33 |
+
"content":f"Translate {src} โ {tgt}. Return only the translation."},
|
34 |
+
{"role":"user","content":txt}],
|
35 |
+
temperature=0.3,max_tokens=4096)
|
|
|
|
|
|
|
|
|
36 |
return rsp.choices[0].message.content.strip()
|
37 |
|
38 |
+
def _tts(txt, lang):
|
39 |
+
out = client.audio.speech.create(model="tts-1",voice=VOICE.get(lang,"alloy"),
|
40 |
+
input=txt[:4096])
|
41 |
+
f = tempfile.NamedTemporaryFile(delete=False,suffix=".mp3")
|
42 |
+
f.write(out.content); f.close(); return f.name
|
|
|
|
|
|
|
|
|
43 |
|
44 |
+
# โโโโโ 2. Single Audio translate โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
45 |
+
def trans_audio(inp, src, tgt):
|
46 |
+
p=_safe(inp)
|
47 |
+
if not p or not os.path.exists(p): return "โ ๏ธ ํ์ผ ํ์","",None
|
|
|
48 |
with open(p,"rb") as f:
|
49 |
+
stt=client.audio.transcriptions.create(model="whisper-1",file=f,
|
50 |
+
language=LC.get(src))
|
51 |
+
orig=stt.text.strip();
|
52 |
+
if not orig: return "โ ๏ธ ์ธ์ ์คํจ","",None
|
53 |
+
trans=_gpt(orig,src,tgt)
|
54 |
+
return orig,trans,_tts(trans,tgt)
|
|
|
55 |
|
56 |
+
# โโโโโ 3. Doc/Image translate โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
57 |
+
def trans_doc(file_in, src, tgt):
|
58 |
+
p=_safe(file_in)
|
59 |
+
if not p or not os.path.exists(p): return "โ ๏ธ ํ์ผ ์
๋ก๋",""
|
60 |
+
ext=os.path.splitext(p)[1].lower()
|
61 |
+
mime=mimetypes.guess_type(p)[0] or ""
|
|
|
62 |
try:
|
63 |
+
if ext==".pdf" or "pdf" in mime: # PDF
|
|
|
64 |
with pdfplumber.open(p) as pdf:
|
65 |
+
txt="\n".join(pg.extract_text() or "" for pg in pdf.pages[:5])
|
66 |
+
else: # ์ด๋ฏธ์ง
|
67 |
+
tmp_pdf=tempfile.NamedTemporaryFile(delete=False,suffix=".pdf").name
|
68 |
+
Image.open(p).save(tmp_pdf,"PDF")
|
69 |
+
ocr_pdf=tempfile.NamedTemporaryFile(delete=False,suffix=".pdf").name
|
70 |
+
try:
|
71 |
+
ocrmypdf.ocr(tmp_pdf,ocr_pdf,
|
72 |
+
lang=LC.get(src,"eng"),deskew=True,optimize=0,
|
73 |
+
progress_bar=False)
|
74 |
+
with pdfplumber.open(ocr_pdf) as pdf:
|
75 |
+
txt="\n".join(pg.extract_text() or "" for pg in pdf.pages)
|
76 |
+
except Exception: # gs ์๊ฑฐ๋ ocrmypdf ์คํจ โ ์ง์ OCR
|
77 |
+
txt=pytesseract.image_to_string(Image.open(p), lang=LC.get(src,"eng"))
|
78 |
except Exception as e:
|
79 |
+
return f"โ ์ถ์ถ ์ค๋ฅ: {e}",""
|
80 |
+
txt=txt.strip()
|
81 |
+
if not txt: return "โ ๏ธ ํ
์คํธ ์ถ์ถ ์คํจ",""
|
82 |
+
return txt,_gpt(txt,src,tgt)
|
|
|
83 |
|
84 |
+
# โโโโโ 4. Real-time single lang โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
85 |
+
def stream_one(path, src, tgt, state):
|
86 |
+
state=state or {"o":"","t":""}
|
87 |
+
if not path or not os.path.exists(path): return state["o"],state["t"],state
|
88 |
+
with open(path,"rb") as f:
|
89 |
+
stt=client.audio.transcriptions.create(model="whisper-1",file=f,
|
90 |
+
language=LC.get(src))
|
91 |
+
full=stt.text.strip(); new=full[len(state["o"]):]
|
|
|
|
|
92 |
if new:
|
93 |
+
state["o"]=full
|
94 |
+
state["t"]+=" "+_gpt(new,src,tgt)
|
95 |
+
return state["o"],state["t"].strip(),state
|
96 |
|
97 |
+
# โโโโโ 5. Real-time 4 langs โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
98 |
+
def stream_four(path, src, state):
|
99 |
+
state=state or {k:"" for k in ["o"]+FOUR}
|
100 |
+
if not path or not os.path.exists(path):
|
101 |
+
return state["o"],state["English"],state["Chinese"],state["Thai"],state["Russian"],state
|
102 |
+
with open(path,"rb") as f:
|
103 |
+
stt=client.audio.transcriptions.create(model="whisper-1",file=f,
|
104 |
+
language=LC.get(src))
|
105 |
+
full=stt.text.strip(); new=full[len(state["o"]):]
|
|
|
106 |
if new:
|
107 |
+
state["o"]=full
|
108 |
+
for l in FOUR:
|
109 |
+
state[l]+=" "+_gpt(new,src,l)
|
110 |
+
return (state["o"].strip(),state["English"].strip(),state["Chinese"].strip(),
|
111 |
+
state["Thai"].strip(),state["Russian"].strip(),state)
|
|
|
|
|
|
|
|
|
112 |
|
113 |
+
# โโโโโ 6. UI โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
114 |
+
with gr.Blocks(title="SMARTok Demo",theme=gr.themes.Soft()) as app:
|
115 |
with gr.Tabs():
|
116 |
+
# ํญ1
|
117 |
with gr.TabItem("๐๏ธ ์ค๋์ค ๋ฒ์ญ"):
|
118 |
+
s1=gr.Dropdown(LANG,value="Korean",label="์
๋ ฅ")
|
119 |
+
t1=gr.Dropdown(LANG,value="English",label="์ถ๋ ฅ")
|
120 |
+
a1=gr.Audio(sources=["microphone","upload"],type="filepath")
|
121 |
+
btn1=gr.Button("๋ฒ์ญ")
|
122 |
+
o1=gr.Textbox(label="์๋ฌธ",lines=5); tr1=gr.Textbox(label="๋ฒ์ญ",lines=5)
|
123 |
+
aud1=gr.Audio(label="TTS",type="filepath",autoplay=True)
|
124 |
+
btn1.click(trans_audio,[a1,s1,t1],[o1,tr1,aud1])
|
|
|
125 |
|
126 |
+
# ํญ2
|
127 |
with gr.TabItem("๐ ๋ฌธ์ยท์ด๋ฏธ์ง ๋ฒ์ญ"):
|
128 |
+
s2=gr.Dropdown(LANG,value="Korean",label="์
๋ ฅ")
|
129 |
+
t2=gr.Dropdown(LANG,value="English",label="์ถ๋ ฅ")
|
130 |
+
f2=gr.File(file_types=[".pdf",".png",".jpg",".jpeg",".bmp",".tiff",".gif"])
|
131 |
+
btn2=gr.Button("๋ฒ์ญ")
|
132 |
+
o2=gr.Textbox(label="์ถ์ถ ์๋ฌธ",lines=15); tr2=gr.Textbox(label="๋ฒ์ญ ๊ฒฐ๊ณผ",lines=15)
|
133 |
+
btn2.click(trans_doc,[f2,s2,t2],[o2,tr2])
|
|
|
|
|
|
|
134 |
|
135 |
+
# ํญ3
|
136 |
with gr.TabItem("โฑ๏ธ ์ค์๊ฐ 1์ธ์ด"):
|
137 |
+
s3=gr.Dropdown(LANG,value="Korean",label="์
๋ ฅ"); t3=gr.Dropdown(LANG,value="English",label="์ถ๋ ฅ")
|
138 |
+
mic3=gr.Audio(sources=["microphone"],streaming=True)
|
139 |
+
o3=tr3=gr.Textbox(lines=8,label="์๋ฌธ / ๋ฒ์ญ")
|
140 |
+
st3=gr.State()
|
141 |
+
mic3.stream(stream_one,inputs=[s3,t3,st3],outputs=[o3,tr3,st3])
|
|
|
|
|
|
|
|
|
142 |
|
143 |
+
# ํญ4
|
144 |
+
with gr.TabItem("๐ ์ค์๊ฐ 4์ธ์ด"):
|
145 |
+
s4=gr.Dropdown(LANG,value="Korean",label="์
๋ ฅ ์ธ์ด")
|
146 |
+
mic4=gr.Audio(sources=["microphone"],streaming=True)
|
147 |
+
o4=gr.Textbox(label="์๋ฌธ",lines=8); e4=gr.Textbox(label="English",lines=8)
|
148 |
+
c4=gr.Textbox(label="Chinese(็ฎไฝ)",lines=8); th4=gr.Textbox(label="Thai",lines=8); r4=gr.Textbox(label="Russian",lines=8)
|
149 |
+
st4=gr.State()
|
150 |
+
mic4.stream(stream_four,inputs=[s4,st4],
|
|
|
|
|
|
|
|
|
151 |
outputs=[o4,e4,c4,th4,r4,st4])
|
152 |
|
153 |
+
# โโโโโ 7. Run โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
154 |
+
if __name__=="__main__":
|
155 |
app.launch(server_name="0.0.0.0",server_port=7860,share=False,debug=True)
|