openfree commited on
Commit
364ce74
ยท
verified ยท
1 Parent(s): 15d8a01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -166
app.py CHANGED
@@ -1,203 +1,155 @@
1
  """
2
- SMARTok ์‹ค์‹œ๊ฐ„ ๋‹ค๊ตญ์–ด ๋ฐ๋ชจ (์™„์ „ ์ˆ˜์ •๋ณธ)
3
- โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
4
- โ€ข ํƒญ1 ๐ŸŽ™๏ธ ์˜ค๋””์˜ค ๋ฒˆ์—ญ : ๋งˆ์ดํฌ/ํŒŒ์ผ โ†’ ๋ฒˆ์—ญ + TTS
5
- โ€ข ํƒญ2 ๐Ÿ“„ ๋ฌธ์„œยท์ด๋ฏธ์ง€ ๋ฒˆ์—ญ : PDF / ์ด๋ฏธ์ง€(OCR) โ†’ ๋ฒˆ์—ญ
6
- โ€ข ํƒญ3 โฑ๏ธ ์‹ค์‹œ๊ฐ„ 1์–ธ์–ด ๋ฒˆ์—ญ : ๋งˆ์ดํฌ โ†’ 1๊ฐœ ์–ธ์–ด ์‹ค์‹œ๊ฐ„ ์ž๋ง‰
7
- โ€ข ํƒญ4 ๐ŸŒ ์‹ค์‹œ๊ฐ„ 4๊ฐœ ์–ธ์–ด ๋ฒˆ์—ญ : ๋งˆ์ดํฌ โ†’ ์˜ยท์ค‘ยทํƒœยท๋Ÿฌ ๋™์‹œ ์ž๋ง‰
8
- โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
9
- ํ•„์ˆ˜ apt : tesseract-ocr libtesseract-dev ocrmypdf ffmpeg
10
- ํ•„์ˆ˜ pip : gradio>=5.33 openai python-dotenv pdfplumber ocrmypdf pillow
11
  """
12
 
13
  import gradio as gr
14
- import openai, os, io, tempfile, mimetypes, json, uuid
15
  from dotenv import load_dotenv
16
- import pdfplumber, ocrmypdf
17
  from PIL import Image
 
18
 
19
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 0. ๊ณตํ†ต ์ดˆ๊ธฐํ™” โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
20
  load_dotenv()
21
- api_key = os.getenv("OPENAI_API_KEY")
22
- if not api_key:
23
- raise RuntimeError("OPENAI_API_KEY๋ฅผ .env ํŒŒ์ผ์— ์„ค์ •ํ•˜์„ธ์š”!")
24
- client = openai.OpenAI(api_key=api_key)
25
 
26
- LANGUAGES = [
27
- "Korean", "English", "Japanese", "Chinese",
28
- "Thai", "Russian", "Vietnamese",
29
- "Spanish", "French"
30
- ]
31
- LANG_CODE = {
32
- "Korean":"ko","English":"en","Japanese":"ja","Chinese":"zh",
33
- "Thai":"th","Russian":"ru","Vietnamese":"vi",
34
- "Spanish":"es","French":"fr"
35
- }
36
- VOICE = {l: ("nova" if l in ["Korean","Japanese","Chinese"] else "alloy")
37
- for l in LANGUAGES}
38
  FOUR = ["English","Chinese","Thai","Russian"]
39
- STREAM_SEC = 4 # Whisper ํ˜ธ์ถœ ์ฃผ๊ธฐ
40
 
41
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 1. ์œ ํ‹ธ ํ•จ์ˆ˜ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
42
- def _safe(v):
43
- if v is None:
44
- return None
45
- return v["name"] if isinstance(v, dict) else v
46
 
47
- def _gpt(text, src, tgt):
48
  rsp = client.chat.completions.create(
49
  model="gpt-3.5-turbo",
50
- messages=[
51
- {"role":"system",
52
- "content":f"Translate the following {src} text to {tgt}. "
53
- "Return only the translation."},
54
- {"role":"user","content":text}
55
- ],
56
- temperature=0.3,max_tokens=4096
57
- )
58
  return rsp.choices[0].message.content.strip()
59
 
60
- def _tts(text, lang):
61
- rsp = client.audio.speech.create(
62
- model="tts-1",
63
- voice=VOICE.get(lang,"alloy"),
64
- input=text[:4096]
65
- )
66
- tmp = tempfile.NamedTemporaryFile(delete=False,suffix=".mp3")
67
- tmp.write(rsp.content); tmp.close()
68
- return tmp.name
69
 
70
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 2. ์˜ค๋””์˜ค(๋‹จ๊ฑด) ๋ฒˆ์—ญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
71
- def translate_audio(audio_in, src, tgt):
72
- p = _safe(audio_in)
73
- if not p or not os.path.exists(p):
74
- return "โš ๏ธ ์Œ์„ฑ ํŒŒ์ผ ํ•„์š”", "", None
75
  with open(p,"rb") as f:
76
- stt = client.audio.transcriptions.create(
77
- model="whisper-1", file=f, language=LANG_CODE.get(src))
78
- orig = stt.text.strip()
79
- if not orig:
80
- return "โš ๏ธ ์Œ์„ฑ ์ธ์‹ ์‹คํŒจ", "", None
81
- trans = _gpt(orig, src, tgt)
82
- return orig, trans, _tts(trans, tgt)
83
 
84
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 3. ๋ฌธ์„œ / ์ด๋ฏธ์ง€ ๋ฒˆ์—ญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
85
- def translate_doc(file_in, src, tgt):
86
- p = _safe(file_in)
87
- if not p or not os.path.exists(p):
88
- return "โš ๏ธ PDF/์ด๋ฏธ์ง€ ์—…๋กœ๋“œ", ""
89
- ext = os.path.splitext(p)[1].lower()
90
- mime = mimetypes.guess_type(p)[0] or ""
91
  try:
92
- # PDF ๊ทธ๋Œ€๋กœ
93
- if ext==".pdf" or "pdf" in mime:
94
  with pdfplumber.open(p) as pdf:
95
- txt = "\n".join(pg.extract_text() or "" for pg in pdf.pages[:5])
96
- # ์ด๋ฏธ์ง€ โ†’ OCR PDF
97
- else:
98
- img_pdf = tempfile.NamedTemporaryFile(delete=False,suffix=".pdf").name
99
- Image.open(p).save(img_pdf,"PDF")
100
- ocr_pdf = tempfile.NamedTemporaryFile(delete=False,suffix=".pdf").name
101
- ocrmypdf.ocr(img_pdf, ocr_pdf,
102
- lang=LANG_CODE.get(src,"eng"),
103
- deskew=True,optimize=0,progress_bar=False)
104
- with pdfplumber.open(ocr_pdf) as pdf:
105
- txt = "\n".join(pg.extract_text() or "" for pg in pdf.pages)
 
 
106
  except Exception as e:
107
- return f"โŒ ์ถ”์ถœ ์˜ค๋ฅ˜: {e}", ""
108
- txt = txt.strip()
109
- if not txt:
110
- return "โš ๏ธ ํ…์ŠคํŠธ ์ถ”์ถœ ์‹คํŒจ", ""
111
- return txt, _gpt(txt, src, tgt)
112
 
113
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 4. ์‹ค์‹œ๊ฐ„ 1์–ธ์–ด โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
114
- def stream_single(audio_path, src, tgt, state):
115
- state = state or {"orig":"", "trans":""}
116
- if not audio_path or not os.path.exists(audio_path):
117
- return state["orig"], state["trans"], state
118
- with open(audio_path,"rb") as f:
119
- stt = client.audio.transcriptions.create(
120
- model="whisper-1", file=f, language=LANG_CODE.get(src))
121
- full = stt.text.strip()
122
- new = full[len(state["orig"]):]
123
  if new:
124
- state["orig"] = full
125
- state["trans"] += " " + _gpt(new, src, tgt)
126
- return state["orig"], state["trans"].strip(), state
127
 
128
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€๏ฟฝ๏ฟฝ๏ฟฝโ”€โ”€ 5. ์‹ค์‹œ๊ฐ„ 4์–ธ์–ด โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
129
- def stream_multi(audio_path, src, state):
130
- state = state or {k:"" for k in ["orig"]+FOUR}
131
- if not audio_path or not os.path.exists(audio_path):
132
- return state["orig"],state["English"],state["Chinese"],state["Thai"],state["Russian"],state
133
- with open(audio_path,"rb") as f:
134
- stt = client.audio.transcriptions.create(
135
- model="whisper-1", file=f, language=LANG_CODE.get(src))
136
- full = stt.text.strip()
137
- new = full[len(state["orig"]):]
138
  if new:
139
- state["orig"] = full
140
- for lang in FOUR:
141
- state[lang] += " " + _gpt(new, src, lang)
142
- return (state["orig"].strip(),
143
- state["English"].strip(),
144
- state["Chinese"].strip(),
145
- state["Thai"].strip(),
146
- state["Russian"].strip(),
147
- state)
148
 
149
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 6. Gradio UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
150
- with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
151
  with gr.Tabs():
152
- # ํƒญ 1
153
  with gr.TabItem("๐ŸŽ™๏ธ ์˜ค๋””์˜ค ๋ฒˆ์—ญ"):
154
- src1 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ ์–ธ์–ด")
155
- tgt1 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ ์–ธ์–ด")
156
- aud1 = gr.Audio(sources=["microphone","upload"],type="filepath")
157
- res1 = gr.Button("๋ฒˆ์—ญ")
158
- o1 = gr.Textbox(label="์›๋ฌธ",lines=5)
159
- t1 = gr.Textbox(label="๋ฒˆ์—ญ",lines=5)
160
- a1 = gr.Audio(label="TTS",type="filepath",autoplay=True)
161
- res1.click(translate_audio,[aud1,src1,tgt1],[o1,t1,a1])
162
 
163
- # ํƒญ 2
164
  with gr.TabItem("๐Ÿ“„ ๋ฌธ์„œยท์ด๋ฏธ์ง€ ๋ฒˆ์—ญ"):
165
- src2 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ ์–ธ์–ด")
166
- tgt2 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ ์–ธ์–ด")
167
- file2= gr.File(label="PDF/์ด๋ฏธ์ง€ ์—…๋กœ๋“œ",
168
- file_types=[".pdf",".png",".jpg",".jpeg",
169
- ".bmp",".tiff",".gif"])
170
- doc2 = gr.Button("๋ฒˆ์—ญ")
171
- o2 = gr.Textbox(label="์ถ”์ถœ ์›๋ฌธ",lines=15)
172
- t2 = gr.Textbox(label="๋ฒˆ์—ญ ๊ฒฐ๊ณผ",lines=15)
173
- doc2.click(translate_doc,[file2,src2,tgt2],[o2,t2])
174
 
175
- # ํƒญ 3
176
  with gr.TabItem("โฑ๏ธ ์‹ค์‹œ๊ฐ„ 1์–ธ์–ด"):
177
- src3 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ ์–ธ์–ด")
178
- tgt3 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ ์–ธ์–ด")
179
- mic3 = gr.Audio(sources=["microphone"],streaming=True)
180
- o3 = gr.Textbox(label="์›๋ฌธ(์‹ค์‹œ๊ฐ„)",lines=8)
181
- t3 = gr.Textbox(label="๋ฒˆ์—ญ(์‹ค์‹œ๊ฐ„)",lines=8)
182
- st3 = gr.State()
183
- mic3.stream(stream_single,
184
- inputs=[src3,tgt3,st3],
185
- outputs=[o3,t3,st3])
186
 
187
- # ํƒญ 4
188
- with gr.TabItem("๐ŸŒ ์‹ค์‹œ๊ฐ„ 4๊ฐœ ์–ธ์–ด"):
189
- src4 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ ์–ธ์–ด")
190
- mic4 = gr.Audio(sources=["microphone"],streaming=True)
191
- o4 = gr.Textbox(label="์›๋ฌธ",lines=8)
192
- e4 = gr.Textbox(label="English",lines=8)
193
- c4 = gr.Textbox(label="Chinese(็ฎ€ไฝ“)",lines=8)
194
- th4 = gr.Textbox(label="Thai",lines=8)
195
- r4 = gr.Textbox(label="Russian",lines=8)
196
- st4 = gr.State()
197
- mic4.stream(stream_multi,
198
- inputs=[src4,st4],
199
  outputs=[o4,e4,c4,th4,r4,st4])
200
 
201
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 7. ์‹คํ–‰ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
202
- if __name__ == "__main__":
203
  app.launch(server_name="0.0.0.0",server_port=7860,share=False,debug=True)
 
1
  """
2
+ SMARTok ๋ฐ๋ชจ โ€“ ์ด๋ฏธ์ง€ OCRยท์‹ค์‹œ๊ฐ„ ํƒญ ์˜ค๋ฅ˜ ์ˆ˜์ •๋ณธ
3
+ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
4
+ โ€ข ์ด๋ฏธ์ง€ โ†’ ocrmypdf (+ghostscript) ์šฐ์„ , ์‹คํŒจ ์‹œ pytesseract ์ง์ ‘ OCR
5
+ โ€ข ์‹ค์‹œ๊ฐ„ 1ยท4์–ธ์–ด ํƒญ : State ์ธ์ž/์ถœ๋ ฅ ๊ฐœ์ˆ˜ ๋งž์ถฐ ๊ฒฝ๊ณ  ์ œ๊ฑฐ
 
 
 
 
 
6
  """
7
 
8
  import gradio as gr
9
+ import openai, os, io, tempfile, mimetypes
10
  from dotenv import load_dotenv
 
11
  from PIL import Image
12
+ import pdfplumber, pytesseract, ocrmypdf, subprocess, shlex
13
 
14
+ # โ”€โ”€โ”€โ”€โ”€ 0. Init โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
15
  load_dotenv()
16
+ client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
 
 
 
17
 
18
+ LANG = ["Korean","English","Japanese","Chinese",
19
+ "Thai","Russian","Vietnamese","Spanish","French"]
20
+ LC = {"Korean":"ko","English":"en","Japanese":"ja","Chinese":"zh",
21
+ "Thai":"th","Russian":"ru","Vietnamese":"vi","Spanish":"es","French":"fr"}
22
+ VOICE= {l:("nova" if l in ["Korean","Japanese","Chinese"] else "alloy") for l in LANG}
 
 
 
 
 
 
 
23
  FOUR = ["English","Chinese","Thai","Russian"]
24
+ CHUNK = 4 # sec
25
 
26
+ # โ”€โ”€โ”€โ”€โ”€ 1. Helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
27
+ def _safe(v): return None if v is None else (v["name"] if isinstance(v,dict) else v)
 
 
 
28
 
29
+ def _gpt(txt, src, tgt):
30
  rsp = client.chat.completions.create(
31
  model="gpt-3.5-turbo",
32
+ messages=[{"role":"system",
33
+ "content":f"Translate {src} โ†’ {tgt}. Return only the translation."},
34
+ {"role":"user","content":txt}],
35
+ temperature=0.3,max_tokens=4096)
 
 
 
 
36
  return rsp.choices[0].message.content.strip()
37
 
38
+ def _tts(txt, lang):
39
+ out = client.audio.speech.create(model="tts-1",voice=VOICE.get(lang,"alloy"),
40
+ input=txt[:4096])
41
+ f = tempfile.NamedTemporaryFile(delete=False,suffix=".mp3")
42
+ f.write(out.content); f.close(); return f.name
 
 
 
 
43
 
44
+ # โ”€โ”€โ”€โ”€โ”€ 2. Single Audio translate โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
45
+ def trans_audio(inp, src, tgt):
46
+ p=_safe(inp)
47
+ if not p or not os.path.exists(p): return "โš ๏ธ ํŒŒ์ผ ํ•„์š”","",None
 
48
  with open(p,"rb") as f:
49
+ stt=client.audio.transcriptions.create(model="whisper-1",file=f,
50
+ language=LC.get(src))
51
+ orig=stt.text.strip();
52
+ if not orig: return "โš ๏ธ ์ธ์‹ ์‹คํŒจ","",None
53
+ trans=_gpt(orig,src,tgt)
54
+ return orig,trans,_tts(trans,tgt)
 
55
 
56
+ # โ”€โ”€โ”€โ”€โ”€ 3. Doc/Image translate โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
57
+ def trans_doc(file_in, src, tgt):
58
+ p=_safe(file_in)
59
+ if not p or not os.path.exists(p): return "โš ๏ธ ํŒŒ์ผ ์—…๋กœ๋“œ",""
60
+ ext=os.path.splitext(p)[1].lower()
61
+ mime=mimetypes.guess_type(p)[0] or ""
 
62
  try:
63
+ if ext==".pdf" or "pdf" in mime: # PDF
 
64
  with pdfplumber.open(p) as pdf:
65
+ txt="\n".join(pg.extract_text() or "" for pg in pdf.pages[:5])
66
+ else: # ์ด๋ฏธ์ง€
67
+ tmp_pdf=tempfile.NamedTemporaryFile(delete=False,suffix=".pdf").name
68
+ Image.open(p).save(tmp_pdf,"PDF")
69
+ ocr_pdf=tempfile.NamedTemporaryFile(delete=False,suffix=".pdf").name
70
+ try:
71
+ ocrmypdf.ocr(tmp_pdf,ocr_pdf,
72
+ lang=LC.get(src,"eng"),deskew=True,optimize=0,
73
+ progress_bar=False)
74
+ with pdfplumber.open(ocr_pdf) as pdf:
75
+ txt="\n".join(pg.extract_text() or "" for pg in pdf.pages)
76
+ except Exception: # gs ์—†๊ฑฐ๋‚˜ ocrmypdf ์‹คํŒจ โ†’ ์ง์ ‘ OCR
77
+ txt=pytesseract.image_to_string(Image.open(p), lang=LC.get(src,"eng"))
78
  except Exception as e:
79
+ return f"โŒ ์ถ”์ถœ ์˜ค๋ฅ˜: {e}",""
80
+ txt=txt.strip()
81
+ if not txt: return "โš ๏ธ ํ…์ŠคํŠธ ์ถ”์ถœ ์‹คํŒจ",""
82
+ return txt,_gpt(txt,src,tgt)
 
83
 
84
+ # โ”€โ”€โ”€โ”€โ”€ 4. Real-time single lang โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
85
+ def stream_one(path, src, tgt, state):
86
+ state=state or {"o":"","t":""}
87
+ if not path or not os.path.exists(path): return state["o"],state["t"],state
88
+ with open(path,"rb") as f:
89
+ stt=client.audio.transcriptions.create(model="whisper-1",file=f,
90
+ language=LC.get(src))
91
+ full=stt.text.strip(); new=full[len(state["o"]):]
 
 
92
  if new:
93
+ state["o"]=full
94
+ state["t"]+=" "+_gpt(new,src,tgt)
95
+ return state["o"],state["t"].strip(),state
96
 
97
+ # โ”€โ”€โ”€โ”€โ”€ 5. Real-time 4 langs โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
98
+ def stream_four(path, src, state):
99
+ state=state or {k:"" for k in ["o"]+FOUR}
100
+ if not path or not os.path.exists(path):
101
+ return state["o"],state["English"],state["Chinese"],state["Thai"],state["Russian"],state
102
+ with open(path,"rb") as f:
103
+ stt=client.audio.transcriptions.create(model="whisper-1",file=f,
104
+ language=LC.get(src))
105
+ full=stt.text.strip(); new=full[len(state["o"]):]
 
106
  if new:
107
+ state["o"]=full
108
+ for l in FOUR:
109
+ state[l]+=" "+_gpt(new,src,l)
110
+ return (state["o"].strip(),state["English"].strip(),state["Chinese"].strip(),
111
+ state["Thai"].strip(),state["Russian"].strip(),state)
 
 
 
 
112
 
113
+ # โ”€โ”€โ”€โ”€โ”€ 6. UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
114
+ with gr.Blocks(title="SMARTok Demo",theme=gr.themes.Soft()) as app:
115
  with gr.Tabs():
116
+ # ํƒญ1
117
  with gr.TabItem("๐ŸŽ™๏ธ ์˜ค๋””์˜ค ๋ฒˆ์—ญ"):
118
+ s1=gr.Dropdown(LANG,value="Korean",label="์ž…๋ ฅ")
119
+ t1=gr.Dropdown(LANG,value="English",label="์ถœ๋ ฅ")
120
+ a1=gr.Audio(sources=["microphone","upload"],type="filepath")
121
+ btn1=gr.Button("๋ฒˆ์—ญ")
122
+ o1=gr.Textbox(label="์›๋ฌธ",lines=5); tr1=gr.Textbox(label="๋ฒˆ์—ญ",lines=5)
123
+ aud1=gr.Audio(label="TTS",type="filepath",autoplay=True)
124
+ btn1.click(trans_audio,[a1,s1,t1],[o1,tr1,aud1])
 
125
 
126
+ # ํƒญ2
127
  with gr.TabItem("๐Ÿ“„ ๋ฌธ์„œยท์ด๋ฏธ์ง€ ๋ฒˆ์—ญ"):
128
+ s2=gr.Dropdown(LANG,value="Korean",label="์ž…๋ ฅ")
129
+ t2=gr.Dropdown(LANG,value="English",label="์ถœ๋ ฅ")
130
+ f2=gr.File(file_types=[".pdf",".png",".jpg",".jpeg",".bmp",".tiff",".gif"])
131
+ btn2=gr.Button("๋ฒˆ์—ญ")
132
+ o2=gr.Textbox(label="์ถ”์ถœ ์›๋ฌธ",lines=15); tr2=gr.Textbox(label="๋ฒˆ์—ญ ๊ฒฐ๊ณผ",lines=15)
133
+ btn2.click(trans_doc,[f2,s2,t2],[o2,tr2])
 
 
 
134
 
135
+ # ํƒญ3
136
  with gr.TabItem("โฑ๏ธ ์‹ค์‹œ๊ฐ„ 1์–ธ์–ด"):
137
+ s3=gr.Dropdown(LANG,value="Korean",label="์ž…๋ ฅ"); t3=gr.Dropdown(LANG,value="English",label="์ถœ๋ ฅ")
138
+ mic3=gr.Audio(sources=["microphone"],streaming=True)
139
+ o3=tr3=gr.Textbox(lines=8,label="์›๋ฌธ / ๋ฒˆ์—ญ")
140
+ st3=gr.State()
141
+ mic3.stream(stream_one,inputs=[s3,t3,st3],outputs=[o3,tr3,st3])
 
 
 
 
142
 
143
+ # ํƒญ4
144
+ with gr.TabItem("๐ŸŒ ์‹ค์‹œ๊ฐ„ 4์–ธ์–ด"):
145
+ s4=gr.Dropdown(LANG,value="Korean",label="์ž…๋ ฅ ์–ธ์–ด")
146
+ mic4=gr.Audio(sources=["microphone"],streaming=True)
147
+ o4=gr.Textbox(label="์›๋ฌธ",lines=8); e4=gr.Textbox(label="English",lines=8)
148
+ c4=gr.Textbox(label="Chinese(็ฎ€ไฝ“)",lines=8); th4=gr.Textbox(label="Thai",lines=8); r4=gr.Textbox(label="Russian",lines=8)
149
+ st4=gr.State()
150
+ mic4.stream(stream_four,inputs=[s4,st4],
 
 
 
 
151
  outputs=[o4,e4,c4,th4,r4,st4])
152
 
153
+ # โ”€โ”€โ”€โ”€โ”€ 7. Run โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
154
+ if __name__=="__main__":
155
  app.launch(server_name="0.0.0.0",server_port=7860,share=False,debug=True)