openfree commited on
Commit
0fdb888
ยท
verified ยท
1 Parent(s): f73d82f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +232 -130
app.py CHANGED
@@ -1,155 +1,257 @@
1
  """
2
- SMARTok ๋ฐ๋ชจ โ€“ ์ด๋ฏธ์ง€ OCRยท์‹ค์‹œ๊ฐ„ ํƒญ ์˜ค๋ฅ˜ ์ˆ˜์ •๋ณธ
3
- โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
4
- โ€ข ์ด๋ฏธ์ง€ โ†’ ocrmypdf (+ghostscript) ์šฐ์„ , ์‹คํŒจ ์‹œ pytesseract ์ง์ ‘ OCR
5
- โ€ข ์‹ค์‹œ๊ฐ„ 1ยท4์–ธ์–ด ํƒญ : State ์ธ์ž/์ถœ๋ ฅ ๊ฐœ์ˆ˜ ๋งž์ถฐ ๊ฒฝ๊ณ  ์ œ๊ฑฐ
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  """
7
 
8
  import gradio as gr
9
  import openai, os, io, tempfile, mimetypes
10
  from dotenv import load_dotenv
11
  from PIL import Image
12
- import pdfplumber, pytesseract, ocrmypdf, subprocess, shlex
13
 
14
- # โ”€โ”€โ”€โ”€โ”€ 0. Init โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
15
  load_dotenv()
16
- client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- LANG = ["Korean","English","Japanese","Chinese",
19
- "Thai","Russian","Vietnamese","Spanish","French"]
20
- LC = {"Korean":"ko","English":"en","Japanese":"ja","Chinese":"zh",
21
- "Thai":"th","Russian":"ru","Vietnamese":"vi","Spanish":"es","French":"fr"}
22
- VOICE= {l:("nova" if l in ["Korean","Japanese","Chinese"] else "alloy") for l in LANG}
23
- FOUR = ["English","Chinese","Thai","Russian"]
24
- CHUNK = 4 # sec
25
 
26
- # โ”€โ”€โ”€โ”€โ”€ 1. Helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
27
- def _safe(v): return None if v is None else (v["name"] if isinstance(v,dict) else v)
 
 
 
 
28
 
29
- def _gpt(txt, src, tgt):
30
- rsp = client.chat.completions.create(
 
31
  model="gpt-3.5-turbo",
32
- messages=[{"role":"system",
33
- "content":f"Translate {src} โ†’ {tgt}. Return only the translation."},
34
- {"role":"user","content":txt}],
35
- temperature=0.3,max_tokens=4096)
36
- return rsp.choices[0].message.content.strip()
37
-
38
- def _tts(txt, lang):
39
- out = client.audio.speech.create(model="tts-1",voice=VOICE.get(lang,"alloy"),
40
- input=txt[:4096])
41
- f = tempfile.NamedTemporaryFile(delete=False,suffix=".mp3")
42
- f.write(out.content); f.close(); return f.name
43
-
44
- # โ”€โ”€โ”€โ”€โ”€ 2. Single Audio translate โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
45
- def trans_audio(inp, src, tgt):
46
- p=_safe(inp)
47
- if not p or not os.path.exists(p): return "โš ๏ธ ํŒŒ์ผ ํ•„์š”","",None
48
- with open(p,"rb") as f:
49
- stt=client.audio.transcriptions.create(model="whisper-1",file=f,
50
- language=LC.get(src))
51
- orig=stt.text.strip();
52
- if not orig: return "โš ๏ธ ์ธ์‹ ์‹คํŒจ","",None
53
- trans=_gpt(orig,src,tgt)
54
- return orig,trans,_tts(trans,tgt)
55
-
56
- # โ”€โ”€โ”€โ”€โ”€ 3. Doc/Image translate โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
57
- def trans_doc(file_in, src, tgt):
58
- p=_safe(file_in)
59
- if not p or not os.path.exists(p): return "โš ๏ธ ํŒŒ์ผ ์—…๋กœ๋“œ",""
60
- ext=os.path.splitext(p)[1].lower()
61
- mime=mimetypes.guess_type(p)[0] or ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  try:
63
- if ext==".pdf" or "pdf" in mime: # PDF
64
- with pdfplumber.open(p) as pdf:
65
- txt="\n".join(pg.extract_text() or "" for pg in pdf.pages[:5])
66
- else: # ์ด๋ฏธ์ง€
67
- tmp_pdf=tempfile.NamedTemporaryFile(delete=False,suffix=".pdf").name
68
- Image.open(p).save(tmp_pdf,"PDF")
69
- ocr_pdf=tempfile.NamedTemporaryFile(delete=False,suffix=".pdf").name
 
 
 
 
70
  try:
71
- ocrmypdf.ocr(tmp_pdf,ocr_pdf,
72
- lang=LC.get(src,"eng"),deskew=True,optimize=0,
73
- progress_bar=False)
 
 
 
74
  with pdfplumber.open(ocr_pdf) as pdf:
75
- txt="\n".join(pg.extract_text() or "" for pg in pdf.pages)
76
- except Exception: # gs ์—†๊ฑฐ๋‚˜ ocrmypdf ์‹คํŒจ โ†’ ์ง์ ‘ OCR
77
- txt=pytesseract.image_to_string(Image.open(p), lang=LC.get(src,"eng"))
 
 
 
78
  except Exception as e:
79
- return f"โŒ ์ถ”์ถœ ์˜ค๋ฅ˜: {e}",""
80
- txt=txt.strip()
81
- if not txt: return "โš ๏ธ ํ…์ŠคํŠธ ์ถ”์ถœ ์‹คํŒจ",""
82
- return txt,_gpt(txt,src,tgt)
83
-
84
- # โ”€โ”€โ”€โ”€โ”€ 4. Real-time single lang โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
85
- def stream_one(path, src, tgt, state):
86
- state=state or {"o":"","t":""}
87
- if not path or not os.path.exists(path): return state["o"],state["t"],state
88
- with open(path,"rb") as f:
89
- stt=client.audio.transcriptions.create(model="whisper-1",file=f,
90
- language=LC.get(src))
91
- full=stt.text.strip(); new=full[len(state["o"]):]
 
 
 
 
 
 
 
 
 
 
92
  if new:
93
- state["o"]=full
94
- state["t"]+=" "+_gpt(new,src,tgt)
95
- return state["o"],state["t"].strip(),state
96
-
97
- # โ”€โ”€โ”€โ”€โ”€ 5. Real-time 4 langs โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
98
- def stream_four(path, src, state):
99
- state=state or {k:"" for k in ["o"]+FOUR}
100
- if not path or not os.path.exists(path):
101
- return state["o"],state["English"],state["Chinese"],state["Thai"],state["Russian"],state
102
- with open(path,"rb") as f:
103
- stt=client.audio.transcriptions.create(model="whisper-1",file=f,
104
- language=LC.get(src))
105
- full=stt.text.strip(); new=full[len(state["o"]):]
 
 
 
 
 
 
 
 
 
 
106
  if new:
107
- state["o"]=full
108
- for l in FOUR:
109
- state[l]+=" "+_gpt(new,src,l)
110
- return (state["o"].strip(),state["English"].strip(),state["Chinese"].strip(),
111
- state["Thai"].strip(),state["Russian"].strip(),state)
112
-
113
- # โ”€โ”€โ”€โ”€โ”€ 6. UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
114
- with gr.Blocks(title="SMARTok Demo",theme=gr.themes.Soft()) as app:
 
115
  with gr.Tabs():
116
- # ํƒญ1
117
  with gr.TabItem("๐ŸŽ™๏ธ ์˜ค๋””์˜ค ๋ฒˆ์—ญ"):
118
- s1=gr.Dropdown(LANG,value="Korean",label="์ž…๋ ฅ")
119
- t1=gr.Dropdown(LANG,value="English",label="์ถœ๋ ฅ")
120
- a1=gr.Audio(sources=["microphone","upload"],type="filepath")
121
- btn1=gr.Button("๋ฒˆ์—ญ")
122
- o1=gr.Textbox(label="์›๋ฌธ",lines=5); tr1=gr.Textbox(label="๋ฒˆ์—ญ",lines=5)
123
- aud1=gr.Audio(label="TTS",type="filepath",autoplay=True)
124
- btn1.click(trans_audio,[a1,s1,t1],[o1,tr1,aud1])
125
-
126
- # ํƒญ2
 
127
  with gr.TabItem("๐Ÿ“„ ๋ฌธ์„œยท์ด๋ฏธ์ง€ ๋ฒˆ์—ญ"):
128
- s2=gr.Dropdown(LANG,value="Korean",label="์ž…๋ ฅ")
129
- t2=gr.Dropdown(LANG,value="English",label="์ถœ๋ ฅ")
130
- f2=gr.File(file_types=[".pdf",".png",".jpg",".jpeg",".bmp",".tiff",".gif"])
131
- btn2=gr.Button("๋ฒˆ์—ญ")
132
- o2=gr.Textbox(label="์ถ”์ถœ ์›๋ฌธ",lines=15); tr2=gr.Textbox(label="๋ฒˆ์—ญ ๊ฒฐ๊ณผ",lines=15)
133
- btn2.click(trans_doc,[f2,s2,t2],[o2,tr2])
134
-
135
- # ํƒญ3
 
 
 
136
  with gr.TabItem("โฑ๏ธ ์‹ค์‹œ๊ฐ„ 1์–ธ์–ด"):
137
- s3=gr.Dropdown(LANG,value="Korean",label="์ž…๋ ฅ"); t3=gr.Dropdown(LANG,value="English",label="์ถœ๋ ฅ")
138
- mic3=gr.Audio(sources=["microphone"],streaming=True)
139
- o3=tr3=gr.Textbox(lines=8,label="์›๋ฌธ / ๋ฒˆ์—ญ")
140
- st3=gr.State()
141
- mic3.stream(stream_one,inputs=[s3,t3,st3],outputs=[o3,tr3,st3])
 
 
 
142
 
143
- # ํƒญ4
144
  with gr.TabItem("๐ŸŒ ์‹ค์‹œ๊ฐ„ 4์–ธ์–ด"):
145
- s4=gr.Dropdown(LANG,value="Korean",label="์ž…๋ ฅ ์–ธ์–ด")
146
- mic4=gr.Audio(sources=["microphone"],streaming=True)
147
- o4=gr.Textbox(label="์›๋ฌธ",lines=8); e4=gr.Textbox(label="English",lines=8)
148
- c4=gr.Textbox(label="Chinese(็ฎ€ไฝ“)",lines=8); th4=gr.Textbox(label="Thai",lines=8); r4=gr.Textbox(label="Russian",lines=8)
149
- st4=gr.State()
150
- mic4.stream(stream_four,inputs=[s4,st4],
151
- outputs=[o4,e4,c4,th4,r4,st4])
152
-
153
- # โ”€โ”€โ”€โ”€โ”€ 7. Run โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
154
- if __name__=="__main__":
155
- app.launch(server_name="0.0.0.0",server_port=7860,share=False,debug=True)
 
 
 
 
 
 
 
1
  """
2
+ SMARTok ๋ฐ๋ชจ โ€“ ์ตœ์ข… ์•ˆ์ •ํŒ (2025-06-09)
3
+ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
4
+ โ— ํƒญ1 ๐ŸŽ™๏ธ ์˜ค๋””์˜ค ๋ฒˆ์—ญ : ๋งˆ์ดํฌ/ํŒŒ์ผ โ†’ ๋ฒˆ์—ญ + TTS
5
+ โ— ํƒญ2 ๐Ÿ“„ ๋ฌธ์„œโ€ง์ด๋ฏธ์ง€ ๋ฒˆ์—ญ : PDF / ์ด๋ฏธ์ง€(OCR) โ†’ ๋ฒˆ์—ญ
6
+ โ— ํƒญ3 โฑ๏ธ ์‹ค์‹œ๊ฐ„ 1์–ธ์–ด ๋ฒˆ์—ญ : ๋งˆ์ดํฌ โ†’ ์‹ค์‹œ๊ฐ„ ์ž๋ง‰(1๊ฐœ ์–ธ์–ด)
7
+ โ— ํƒญ4 ๐ŸŒ ์‹ค์‹œ๊ฐ„ 4์–ธ์–ด ๋ฒˆ์—ญ : ๋งˆ์ดํฌ โ†’ ์˜ยท์ค‘ยทํƒœยท๋Ÿฌ ๋™์‹œ ์ž๋ง‰
8
+ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
9
+ ํ•„์ˆ˜ APT ํŒจํ‚ค์ง€ (packages.txt)
10
+ tesseract-ocr
11
+ libtesseract-dev
12
+ ghostscript
13
+ tesseract-ocr-kor tesseract-ocr-eng
14
+ tesseract-ocr-rus tesseract-ocr-tha
15
+ tesseract-ocr-chi-sim
16
+ ffmpeg
17
+
18
+ ํ•„์ˆ˜ PIP ํŒจํ‚ค์ง€ (requirements.txt)
19
+ gradio>=5.33
20
+ openai
21
+ python-dotenv
22
+ pdfplumber
23
+ ocrmypdf
24
+ pytesseract
25
+ pillow
26
  """
27
 
28
  import gradio as gr
29
  import openai, os, io, tempfile, mimetypes
30
  from dotenv import load_dotenv
31
  from PIL import Image
32
+ import pdfplumber, ocrmypdf, pytesseract
33
 
34
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 0. ์ดˆ๊ธฐํ™” โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
35
  load_dotenv()
36
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
37
+ if not OPENAI_API_KEY:
38
+ raise RuntimeError("OPENAI_API_KEY ๊ฐ€ .env ์— ์—†์Šต๋‹ˆ๋‹ค!")
39
+ client = openai.OpenAI(api_key=OPENAI_API_KEY)
40
+
41
+ LANGUAGES = [
42
+ "Korean", "English", "Japanese", "Chinese",
43
+ "Thai", "Russian", "Vietnamese",
44
+ "Spanish", "French"
45
+ ]
46
+ LANG_CODE = {
47
+ "Korean": "kor", "English": "eng", "Japanese": "jpn", "Chinese": "chi_sim",
48
+ "Thai": "tha", "Russian": "rus", "Vietnamese": "vie",
49
+ "Spanish": "spa", "French": "fra"
50
+ }
51
+ VOICE = {l: ("nova" if l in ["Korean", "Japanese", "Chinese"] else "alloy")
52
+ for l in LANGUAGES}
53
 
54
+ FOUR = ["English", "Chinese", "Thai", "Russian"] # ๋™์‹œ ๋ฒˆ์—ญ ๋Œ€์ƒ
55
+ STREAM_SEC = 4 # ์‹ค์‹œ๊ฐ„ ์ฒญํฌ ๊ธธ์ด
 
 
 
 
 
56
 
57
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 1. ๊ณตํ†ต ํ•จ์ˆ˜ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
58
+ def _safe(val):
59
+ """Gradio File/Audio โ†’ ๊ฒฝ๋กœ"""
60
+ if val is None:
61
+ return None
62
+ return val["name"] if isinstance(val, dict) else val
63
 
64
+ def _gpt_translate(text: str, src: str, tgt: str) -> str:
65
+ """GPT-3.5 ๋ฒˆ์—ญ"""
66
+ resp = client.chat.completions.create(
67
  model="gpt-3.5-turbo",
68
+ messages=[
69
+ {"role": "system",
70
+ "content": f"Translate the following {src} text to {tgt}. "
71
+ "Return only the translated text."},
72
+ {"role": "user", "content": text}
73
+ ],
74
+ temperature=0.3,
75
+ max_tokens=4096
76
+ )
77
+ return resp.choices[0].message.content.strip()
78
+
79
+ def _tts(text: str, lang: str) -> str:
80
+ """ํ…์ŠคํŠธ๋ฅผ mp3(TTS-1)๋กœ ๋ณ€ํ™˜ ํ›„ ๊ฒฝ๋กœ ๋ฐ˜ํ™˜"""
81
+ out = client.audio.speech.create(
82
+ model="tts-1",
83
+ voice=VOICE.get(lang, "alloy"),
84
+ input=text[:4096]
85
+ )
86
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
87
+ tmp.write(out.content)
88
+ tmp.close()
89
+ return tmp.name
90
+
91
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 2. ์˜ค๋””์˜ค(๋‹จ๊ฑด) ๋ฒˆ์—ญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
92
+ def translate_audio(audio_in, src, tgt):
93
+ path = _safe(audio_in)
94
+ if not path or not os.path.exists(path):
95
+ return "โš ๏ธ ์Œ์„ฑ ํŒŒ์ผ์„ ๋…น์Œํ•˜๊ฑฐ๋‚˜ ์—…๋กœ๋“œํ•˜์„ธ์š”.", "", None
96
+
97
+ with open(path, "rb") as f:
98
+ stt = client.audio.transcriptions.create(
99
+ model="whisper-1", file=f, language=LANG_CODE.get(src, "eng")
100
+ )
101
+
102
+ origin = stt.text.strip()
103
+ if not origin:
104
+ return "โš ๏ธ ์Œ์„ฑ ์ธ์‹ ์‹คํŒจ", "", None
105
+
106
+ translated = _gpt_translate(origin, src, tgt)
107
+ tts_path = _tts(translated, tgt)
108
+ return origin, translated, tts_path
109
+
110
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 3. ๋ฌธ์„œ/์ด๋ฏธ์ง€ ๋ฒˆ์—ญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
111
+ def translate_doc(file_in, src, tgt):
112
+ path = _safe(file_in)
113
+ if not path or not os.path.exists(path):
114
+ return "โš ๏ธ PDF ๋˜๋Š” ์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜์„ธ์š”.", ""
115
+
116
+ ext = os.path.splitext(path)[1].lower()
117
+ mime = mimetypes.guess_type(path)[0] or ""
118
+ text = ""
119
+
120
  try:
121
+ # (A) PDF ์ง์ ‘ ํ…์ŠคํŠธ ์ถ”์ถœ
122
+ if ext == ".pdf" or "pdf" in mime:
123
+ with pdfplumber.open(path) as pdf:
124
+ text = "\n".join(page.extract_text() or "" for page in pdf.pages[:5])
125
+
126
+ # (B) ์ด๋ฏธ์ง€ โ†’ OCR PDF โ†’ ํ…์ŠคํŠธ
127
+ else:
128
+ tmp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
129
+ Image.open(path).save(tmp_pdf, "PDF")
130
+
131
+ ocr_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
132
  try:
133
+ # OCR ๋ ˆ์ด์–ด ์‚ฝ์ž… (์–ธ์–ด ๋ฐ์ดํ„ฐ ์—†๋Š” ๊ฒฝ์šฐ ์‹คํŒจํ•  ์ˆ˜ ์žˆ์Œ)
134
+ ocrmypdf.ocr(
135
+ tmp_pdf, ocr_pdf,
136
+ lang=f"{LANG_CODE.get(src,'eng')}+eng",
137
+ deskew=True, optimize=0, progress_bar=False
138
+ )
139
  with pdfplumber.open(ocr_pdf) as pdf:
140
+ text = "\n".join(p.extract_text() or "" for p in pdf.pages)
141
+ except Exception:
142
+ # ocrmypdf ์‹คํŒจ โ†’ pytesseract ์ง์ ‘
143
+ text = pytesseract.image_to_string(
144
+ Image.open(path), lang=LANG_CODE.get(src, "eng")
145
+ )
146
  except Exception as e:
147
+ return f"โŒ ํ…์ŠคํŠธ ์ถ”์ถœ ์˜ค๋ฅ˜: {e}", ""
148
+
149
+ text = text.strip()
150
+ if not text:
151
+ return "โš ๏ธ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.", ""
152
+
153
+ return text, _gpt_translate(text, src, tgt)
154
+
155
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 4. ์‹ค์‹œ๊ฐ„ 1์–ธ์–ด ์ŠคํŠธ๋ฆผ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
156
+ def stream_one(audio_path, src, tgt, state):
157
+ """state = {'orig': str, 'trans': str}"""
158
+ state = state or {"orig": "", "trans": ""}
159
+
160
+ if not audio_path or not os.path.exists(audio_path):
161
+ return state["orig"], state["trans"], state
162
+
163
+ with open(audio_path, "rb") as f:
164
+ stt = client.audio.transcriptions.create(
165
+ model="whisper-1", file=f, language=LANG_CODE.get(src, "eng")
166
+ )
167
+
168
+ full = stt.text.strip()
169
+ new = full[len(state["orig"]):]
170
  if new:
171
+ state["orig"] = full
172
+ state["trans"] += " " + _gpt_translate(new, src, tgt)
173
+
174
+ return state["orig"], state["trans"].strip(), state
175
+
176
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 5. ์‹ค์‹œ๊ฐ„ 4์–ธ์–ด ์ŠคํŠธ๋ฆผ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
177
+ def stream_four(audio_path, src, state):
178
+ """
179
+ state keys: orig / English / Chinese / Thai / Russian
180
+ """
181
+ state = state or {k: "" for k in ["orig"] + FOUR}
182
+
183
+ if not audio_path or not os.path.exists(audio_path):
184
+ return (state["orig"], state["English"], state["Chinese"],
185
+ state["Thai"], state["Russian"], state)
186
+
187
+ with open(audio_path, "rb") as f:
188
+ stt = client.audio.transcriptions.create(
189
+ model="whisper-1", file=f, language=LANG_CODE.get(src, "eng")
190
+ )
191
+
192
+ full = stt.text.strip()
193
+ new = full[len(state["orig"]):]
194
  if new:
195
+ state["orig"] = full
196
+ for tgt in FOUR:
197
+ state[tgt] += " " + _gpt_translate(new, src, tgt)
198
+
199
+ return (state["orig"].strip(), state["English"].strip(), state["Chinese"].strip(),
200
+ state["Thai"].strip(), state["Russian"].strip(), state)
201
+
202
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 6. Gradio UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
203
+ with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
204
  with gr.Tabs():
205
+ # ํƒญ 1 โ”€ ์˜ค๋””์˜ค ๋ฒˆ์—ญ
206
  with gr.TabItem("๐ŸŽ™๏ธ ์˜ค๋””์˜ค ๋ฒˆ์—ญ"):
207
+ s1 = gr.Dropdown(LANGUAGES, value="Korean", label="์ž…๋ ฅ ์–ธ์–ด")
208
+ t1 = gr.Dropdown(LANGUAGES, value="English", label="์ถœ๋ ฅ ์–ธ์–ด")
209
+ aud1 = gr.Audio(sources=["microphone", "upload"], type="filepath")
210
+ btn1 = gr.Button("๋ฒˆ์—ญ")
211
+ o1 = gr.Textbox(label="์›๋ฌธ", lines=5)
212
+ tr1 = gr.Textbox(label="๋ฒˆ์—ญ", lines=5)
213
+ a1 = gr.Audio(label="TTS", type="filepath", autoplay=True)
214
+ btn1.click(translate_audio, [aud1, s1, t1], [o1, tr1, a1])
215
+
216
+ # ํƒญ 2 โ”€ ๋ฌธ์„œยท์ด๋ฏธ์ง€ ๋ฒˆ์—ญ
217
  with gr.TabItem("๐Ÿ“„ ๋ฌธ์„œยท์ด๋ฏธ์ง€ ๋ฒˆ์—ญ"):
218
+ s2 = gr.Dropdown(LANGUAGES, value="Korean", label="์ž…๋ ฅ ์–ธ์–ด")
219
+ t2 = gr.Dropdown(LANGUAGES, value="English", label="์ถœ๋ ฅ ์–ธ์–ด")
220
+ file2 = gr.File(label="PDF / ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ",
221
+ file_types=[".pdf", ".png", ".jpg", ".jpeg",
222
+ ".bmp", ".tiff", ".gif"])
223
+ btn2 = gr.Button("๋ฒˆ์—ญ")
224
+ o2 = gr.Textbox(label="์ถ”์ถœ ์›๋ฌธ", lines=15)
225
+ tr2 = gr.Textbox(label="๋ฒˆ์—ญ ๊ฒฐ๊ณผ", lines=15)
226
+ btn2.click(translate_doc, [file2, s2, t2], [o2, tr2])
227
+
228
+ # ํƒญ 3 โ”€ ์‹ค์‹œ๊ฐ„ 1์–ธ์–ด ๋ฒˆ์—ญ
229
  with gr.TabItem("โฑ๏ธ ์‹ค์‹œ๊ฐ„ 1์–ธ์–ด"):
230
+ s3 = gr.Dropdown(LANGUAGES, value="Korean", label="์ž…๋ ฅ ์–ธ์–ด")
231
+ t3 = gr.Dropdown(LANGUAGES, value="English", label="์ถœ๋ ฅ ์–ธ์–ด")
232
+ mic3 = gr.Audio(sources=["microphone"], streaming=True)
233
+ o3 = gr.Textbox(label="์›๋ฌธ(์‹ค์‹œ๊ฐ„)", lines=8)
234
+ tr3 = gr.Textbox(label="๋ฒˆ์—ญ(์‹ค์‹œ๊ฐ„)", lines=8)
235
+ st3 = gr.State()
236
+ mic3.stream(stream_one, inputs=[s3, t3, st3],
237
+ outputs=[o3, tr3, st3])
238
 
239
+ # ํƒญ 4 โ”€ ์‹ค์‹œ๊ฐ„ 4์–ธ์–ด ๋ฒˆ์—ญ
240
  with gr.TabItem("๐ŸŒ ์‹ค์‹œ๊ฐ„ 4์–ธ์–ด"):
241
+ s4 = gr.Dropdown(LANGUAGES, value="Korean", label="์ž…๋ ฅ ์–ธ์–ด")
242
+ mic4 = gr.Audio(sources=["microphone"], streaming=True)
243
+ o4 = gr.Textbox(label="์›๋ฌธ", lines=8)
244
+ e4 = gr.Textbox(label="English", lines=8)
245
+ c4 = gr.Textbox(label="Chinese(็ฎ€ไฝ“)", lines=8)
246
+ th4 = gr.Textbox(label="Thai", lines=8)
247
+ r4 = gr.Textbox(label="Russian", lines=8)
248
+ st4 = gr.State()
249
+ mic4.stream(stream_four, inputs=[s4, st4],
250
+ outputs=[o4, e4, c4, th4, r4, st4])
251
+
252
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 7. ์‹คํ–‰ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
253
+ if __name__ == "__main__":
254
+ app.launch(server_name="0.0.0.0",
255
+ server_port=7860,
256
+ share=False,
257
+ debug=True)