openfree commited on
Commit
6b6f26e
ยท
verified ยท
1 Parent(s): 0b2bf70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -208
app.py CHANGED
@@ -1,19 +1,22 @@
1
  """
2
- SMARTok ํ•ต์‹ฌ ๋ฐ๋ชจ
3
- โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
4
- โœ“ ํƒญ 1 : ์˜ค๋””์˜ค(๋…น์ŒยทํŒŒ์ผ) ๋‹จ๊ฑด ๋ฒˆ์—ญ + TTS ์žฌ์ƒ
5
- โœ“ ํƒญ 2 : PDF / ์ด๋ฏธ์ง€ ๋ฒˆ์—ญ (Tesseract ์—†์œผ๋ฉด PDF๋งŒ ์•ˆ๋‚ด)
6
- โœ“ ํƒญ 3 : ์‹ค์‹œ๊ฐ„ 1๊ฐœ ์–ธ์–ด(์„ ํƒํ˜•) ๋ฒˆ์—ญ
7
- โœ“ ํƒญ 4 : ์‹ค์‹œ๊ฐ„ 4๊ฐœ ์–ธ์–ด(์˜ยท์ค‘(๊ฐ„)ยทํƒœยท๋Ÿฌ) ๋™์‹œ ๋ฒˆ์—ญ
8
- โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
9
- Python โ‰ฅ3.10, Gradio 4.x, OpenAI Python SDK ํ•„์š”
 
10
  """
11
 
12
  import gradio as gr
13
- import openai, os, io, tempfile, mimetypes
14
  from dotenv import load_dotenv
 
 
15
 
16
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 0. ๊ณตํ†ต ์ดˆ๊ธฐํ™” โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
17
  load_dotenv()
18
  api_key = os.getenv("OPENAI_API_KEY")
19
  if not api_key:
@@ -32,234 +35,169 @@ LANG_CODE = {
32
  }
33
  VOICE = {l: ("nova" if l in ["Korean","Japanese","Chinese"] else "alloy")
34
  for l in LANGUAGES}
 
 
35
 
36
- FOUR_LANGS = ["English", "Chinese", "Thai", "Russian"] # ์‹ค์‹œ๊ฐ„ ๋™์‹œ ๋ฒˆ์—ญ์šฉ
37
- STREAM_SEC = 4 # Whisper ํ˜ธ์ถœ ๊ฐ„๊ฒฉ(์ดˆ) โ€“ 3~4 ์ดˆ ์ •๋„ ์ง€์—ฐ
38
-
39
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 1. ์œ ํ‹ธ ํ•จ์ˆ˜ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
40
- def _safe_path(v):
41
- """Gradio File/Audio ์ž…๋ ฅ โ†’ ์‹ค์ œ ๊ฒฝ๋กœ ์ถ”์ถœ"""
42
  if v is None:
43
  return None
44
- return v.get("name") if isinstance(v, dict) else v
45
 
46
- def _gpt_translate(text: str, src: str, tgt: str) -> str:
47
- """GPT-3.5-turbo ๋ฒˆ์—ญ (์„ค๋ช… ์—†์ด ๊ฒฐ๊ณผ๋งŒ)"""
48
  rsp = client.chat.completions.create(
49
  model="gpt-3.5-turbo",
50
  messages=[
51
  {"role":"system",
52
- "content":f"You are a professional translator. Translate the following {src} text to {tgt}. "
53
- f"Only provide the translated text."},
54
  {"role":"user","content":text}
55
  ],
56
  temperature=0.3,max_tokens=4096
57
  )
58
  return rsp.choices[0].message.content.strip()
59
 
60
- def _tts(text: str, lang: str) -> str:
61
- """OpenAI TTS-1 โ‡’ MP3 ํŒŒ์ผ ๊ฒฝ๋กœ ๋ฐ˜ํ™˜"""
62
- out = client.audio.speech.create(
63
  model="tts-1",
64
  voice=VOICE.get(lang,"alloy"),
65
  input=text[:4096]
66
  )
67
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
68
- tmp.write(out.content)
69
- tmp.close()
70
  return tmp.name
71
 
72
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 2. ๋‹จ๊ฑด ์˜ค๋””์˜ค ๋ฒˆ์—ญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
73
  def translate_audio(audio_in, src, tgt):
74
- path = _safe_path(audio_in)
75
- if not path or not os.path.exists(path):
76
- return "โš ๏ธ ์Œ์„ฑ ํŒŒ์ผ์„ ๋…น์Œ-์—…๋กœ๋“œํ•˜์„ธ์š”.", "", None
77
-
78
- with open(path,"rb") as f:
79
  stt = client.audio.transcriptions.create(
80
- model="whisper-1",
81
- file=f,
82
- language=LANG_CODE.get(src)
83
- )
84
- original = stt.text.strip()
85
- if not original:
86
  return "โš ๏ธ ์Œ์„ฑ ์ธ์‹ ์‹คํŒจ", "", None
87
-
88
- translated = _gpt_translate(original, src, tgt)
89
- tts_path = _tts(translated, tgt)
90
- return original, translated, tts_path
91
-
92
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 3. PDF / ์ด๋ฏธ์ง€ ๋ฒˆ์—ญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
93
- def translate_document(file_in, src, tgt):
94
- path = _safe_path(file_in)
95
- if not path or not os.path.exists(path):
96
- return "โš ๏ธ PDF(๋˜๋Š” ์ด๋ฏธ์ง€) ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์„ธ์š”.", ""
97
-
98
- ext = os.path.splitext(path)[1].lower()
99
- mime = mimetypes.guess_type(path)[0] or ""
100
- text = ""
101
-
102
  try:
103
- if ext == ".pdf" or "pdf" in mime:
104
- import pdfplumber
105
- with pdfplumber.open(path) as pdf:
106
- pages = pdf.pages[:5] # ๋ฐ๋ชจ: 5์ชฝ ์ œํ•œ
107
- text = "\n".join(p.extract_text() or "" for p in pages)
108
  else:
109
- # ์ด๋ฏธ์ง€์˜ ๊ฒฝ์šฐ Tesseract ํ•„์š”
110
- try:
111
- from PIL import Image
112
- import pytesseract
113
- text = pytesseract.image_to_string(Image.open(path))
114
- except Exception:
115
- return "โš ๏ธ ์„œ๋ฒ„์— Tesseract OCR๊ฐ€ ์—†์–ด์„œ ์ด๋ฏธ์ง€ OCR์€ ์ง€์›๋˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. PDF๋งŒ ์‚ฌ์šฉํ•˜์„ธ์š”.", ""
 
116
  except Exception as e:
117
- return f"โŒ ํ…์ŠคํŠธ ์ถ”์ถœ ์‹คํŒจ: {e}", ""
118
-
119
- text = text.strip()
120
- if not text:
121
- return "โš ๏ธ ํ…์ŠคํŠธ๊ฐ€ ์ถ”์ถœ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.", ""
122
-
123
- translated = _gpt_translate(text, src, tgt)
124
- return text, translated
125
-
126
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 4. ์‹ค์‹œ๊ฐ„ 1๊ฐœ ์–ธ์–ด ๋ฒˆ์—ญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
127
- def stream_single(mic_stream, src, tgt):
128
- buf, header = io.BytesIO(), None
129
- o_acc, t_acc = "", ""
130
- while True:
131
- chunk = mic_stream.recv()
132
- if chunk is None:
133
- break
134
- if header is None:
135
- header = chunk[:44]
136
- buf.write(chunk)
137
- if buf.getbuffer().nbytes > 16000*2*STREAM_SEC:
138
- wav = header + buf.getvalue()
139
- with tempfile.NamedTemporaryFile(delete=False,suffix=".wav") as tmp:
140
- tmp.write(wav); tmp.close()
141
- o, t, _ = translate_audio(tmp.name, src, tgt)
142
- o_acc += " " + o
143
- t_acc += " " + t
144
- yield o_acc.strip(), t_acc.strip()
145
- buf = io.BytesIO()
146
-
147
- if buf.getbuffer().nbytes:
148
- wav = header + buf.getvalue()
149
- with tempfile.NamedTemporaryFile(delete=False,suffix=".wav") as tmp:
150
- tmp.write(wav); tmp.close()
151
- o, t, _ = translate_audio(tmp.name, src, tgt)
152
- yield (o_acc+" "+o).strip(), (t_acc+" "+t).strip()
153
-
154
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 5. ์‹ค์‹œ๊ฐ„ 4๊ฐœ ์–ธ์–ด ๋ฒˆ์—ญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
155
- def stream_multi(mic_stream, src):
156
- buf, header = io.BytesIO(), None
157
- acc = {lang:"" for lang in ["orig"]+FOUR_LANGS}
158
-
159
- while True:
160
- chunk = mic_stream.recv()
161
- if chunk is None:
162
- break
163
- if header is None:
164
- header = chunk[:44]
165
- buf.write(chunk)
166
- if buf.getbuffer().nbytes > 16000*2*STREAM_SEC:
167
- wav = header + buf.getvalue()
168
- with tempfile.NamedTemporaryFile(delete=False,suffix=".wav") as tmp:
169
- tmp.write(wav); tmp.close()
170
- with open(tmp.name,"rb") as f:
171
- stt = client.audio.transcriptions.create(
172
- model="whisper-1", file=f,
173
- language=LANG_CODE.get(src)
174
- )
175
- orig = stt.text.strip()
176
- if orig:
177
- acc["orig"] += " " + orig
178
- for lang in FOUR_LANGS:
179
- acc[lang] += " " + _gpt_translate(orig, src, lang)
180
- yield (acc["orig"].strip(),
181
- acc["English"].strip(),
182
- acc["Chinese"].strip(),
183
- acc["Thai"].strip(),
184
- acc["Russian"].strip())
185
- buf = io.BytesIO()
186
-
187
- if buf.getbuffer().nbytes:
188
- wav = header + buf.getvalue()
189
- with tempfile.NamedTemporaryFile(delete=False,suffix=".wav") as tmp:
190
- tmp.write(wav); tmp.close()
191
- with open(tmp.name,"rb") as f:
192
- stt = client.audio.transcriptions.create(
193
- model="whisper-1", file=f,
194
- language=LANG_CODE.get(src)
195
- )
196
- orig = stt.text.strip()
197
- if orig:
198
- acc["orig"] += " " + orig
199
- for lang in FOUR_LANGS:
200
- acc[lang] += " " + _gpt_translate(orig, src, lang)
201
- yield (acc["orig"].strip(),
202
- acc["English"].strip(),
203
- acc["Chinese"].strip(),
204
- acc["Thai"].strip(),
205
- acc["Russian"].strip())
206
-
207
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 6. Gradio UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
208
  with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
209
  with gr.Tabs():
210
- # ํƒญ 1 โ”€ ์˜ค๋””์˜ค ๋ฒˆ์—ญ
211
  with gr.TabItem("๐ŸŽ™๏ธ ์˜ค๋””์˜ค ๋ฒˆ์—ญ"):
212
- src1 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ")
213
- tgt1 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ")
214
- aud1 = gr.Audio(sources=["microphone","upload"],
215
- type="filepath",
216
- label="๋…น์Œ ๋˜๋Š” ์˜ค๋””์˜ค ํŒŒ์ผ ์—…๋กœ๋“œ")
217
- btn1 = gr.Button("๋ฒˆ์—ญ")
218
- stt1 = gr.Textbox(label="์›๋ฌธ", lines=5)
219
- tlt1 = gr.Textbox(label="๋ฒˆ์—ญ", lines=5)
220
- out1 = gr.Audio(label="TTS",type="filepath",autoplay=True)
221
- btn1.click(translate_audio,[aud1,src1,tgt1],[stt1,tlt1,out1])
222
-
223
- # ํƒญ 2 โ”€ ๋ฌธ์„œ/์ด๋ฏธ์ง€ ๋ฒˆ์—ญ
224
- with gr.TabItem("๐Ÿ“„ ๋ฌธ์„œ/์ด๋ฏธ์ง€ ๋ฒˆ์—ญ"):
225
- src2 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ")
226
- tgt2 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ")
227
- file2= gr.File(label="PDF ๋˜๋Š” ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ",
228
- file_types=[".pdf",".png",".jpg",".jpeg",".bmp",".tiff",".gif"])
229
- btn2 = gr.Button("๋ฒˆ์—ญ")
230
- org2 = gr.Textbox(label="์ถ”์ถœ ์›๋ฌธ",lines=15)
231
- trs2 = gr.Textbox(label="๋ฒˆ์—ญ ๊ฒฐ๊ณผ",lines=15)
232
- btn2.click(translate_document,[file2,src2,tgt2],[org2,trs2])
233
-
234
- # ํƒญ 3 โ”€ ์‹ค์‹œ๊ฐ„ 1์–ธ์–ด ๋ฒˆ์—ญ
235
  with gr.TabItem("โฑ๏ธ ์‹ค์‹œ๊ฐ„ 1์–ธ์–ด"):
236
- src3 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ")
237
- tgt3 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ")
238
- mic3 = gr.Audio(sources=["microphone"],
239
- streaming=True,
240
- label="์‹ค์‹œ๊ฐ„ ๋งˆ์ดํฌ")
241
- stt3 = gr.Textbox(label="์›๋ฌธ(์‹ค์‹œ๊ฐ„)",lines=8)
242
- tlt3 = gr.Textbox(label="๋ฒˆ์—ญ(์‹ค์‹œ๊ฐ„)",lines=8)
243
- mic3.stream(stream_single,inputs=[src3,tgt3],outputs=[stt3,tlt3])
244
-
245
- # ํƒญ 4 โ”€ ์‹ค์‹œ๊ฐ„ 4๊ฐœ ์–ธ์–ด ๋ฒˆ์—ญ
 
246
  with gr.TabItem("๐ŸŒ ์‹ค์‹œ๊ฐ„ 4๊ฐœ ์–ธ์–ด"):
247
- gr.Markdown("๋งˆ์ดํฌ ์ž…๋ ฅ์„ 3-4 ์ดˆ ๊ฐ„๊ฒฉ์œผ๋กœ **English / Chinese(็ฎ€ไฝ“) / Thai / Russian** 4๊ฐœ ์–ธ์–ด๋กœ ๋™์‹œ ๋ฒˆ์—ญํ•ฉ๋‹ˆ๋‹ค.")
248
  src4 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ ์–ธ์–ด")
249
- mic4 = gr.Audio(sources=["microphone"],
250
- streaming=True,
251
- label="์‹ค์‹œ๊ฐ„ ๋งˆ์ดํฌ")
252
- o4 = gr.Textbox(label="์›๋ฌธ",lines=8)
253
- e4 = gr.Textbox(label="English",lines=8)
254
- z4 = gr.Textbox(label="Chinese(็ฎ€ไฝ“)",lines=8)
255
- t4 = gr.Textbox(label="Thai",lines=8)
256
- r4 = gr.Textbox(label="Russian",lines=8)
257
- mic4.stream(stream_multi,inputs=[src4],
258
- outputs=[o4,e4,z4,t4,r4])
259
-
260
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 7. ์‹คํ–‰ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
261
  if __name__ == "__main__":
262
- app.launch(server_name="0.0.0.0",
263
- server_port=7860,
264
- share=False,
265
- debug=True)
 
1
  """
2
+ SMARTok ์‹ค์‹œ๊ฐ„ ๋‹ค๊ตญ์–ด ๋ฐ๋ชจ (์™„์ „ ์ˆ˜์ •๋ณธ)
3
+ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
4
+ โ€ข ํƒญ1 ๐ŸŽ™๏ธ ์˜ค๋””์˜ค ๋ฒˆ์—ญ : ๋งˆ์ดํฌ/ํŒŒ์ผ โ†’ ๋ฒˆ์—ญ + TTS
5
+ โ€ข ํƒญ2 ๐Ÿ“„ ๋ฌธ์„œยท์ด๋ฏธ์ง€ ๋ฒˆ์—ญ : PDF / ์ด๋ฏธ์ง€(OCR) โ†’ ๋ฒˆ์—ญ
6
+ โ€ข ํƒญ3 โฑ๏ธ ์‹ค์‹œ๊ฐ„ 1์–ธ์–ด ๋ฒˆ์—ญ : ๋งˆ์ดํฌ โ†’ 1๊ฐœ ์–ธ์–ด ์‹ค์‹œ๊ฐ„ ์ž๋ง‰
7
+ โ€ข ํƒญ4 ๐ŸŒ ์‹ค์‹œ๊ฐ„ 4๊ฐœ ์–ธ์–ด ๋ฒˆ์—ญ : ๋งˆ์ดํฌ โ†’ ์˜ยท์ค‘ยทํƒœยท๋Ÿฌ ๋™์‹œ ์ž๋ง‰
8
+ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
9
+ ํ•„์ˆ˜ apt : tesseract-ocr libtesseract-dev ocrmypdf ffmpeg
10
+ ํ•„์ˆ˜ pip : gradio>=5.33 openai python-dotenv pdfplumber ocrmypdf pillow
11
  """
12
 
13
  import gradio as gr
14
+ import openai, os, io, tempfile, mimetypes, json, uuid
15
  from dotenv import load_dotenv
16
+ import pdfplumber, ocrmypdf
17
+ from PIL import Image
18
 
19
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 0. ๊ณตํ†ต ์ดˆ๊ธฐํ™” โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
20
  load_dotenv()
21
  api_key = os.getenv("OPENAI_API_KEY")
22
  if not api_key:
 
35
  }
36
  VOICE = {l: ("nova" if l in ["Korean","Japanese","Chinese"] else "alloy")
37
  for l in LANGUAGES}
38
+ FOUR = ["English","Chinese","Thai","Russian"]
39
+ STREAM_SEC = 4 # Whisper ํ˜ธ์ถœ ์ฃผ๊ธฐ
40
 
41
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 1. ์œ ํ‹ธ ํ•จ์ˆ˜ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
42
+ def _safe(v):
 
 
 
 
43
  if v is None:
44
  return None
45
+ return v["name"] if isinstance(v, dict) else v
46
 
47
+ def _gpt(text, src, tgt):
 
48
  rsp = client.chat.completions.create(
49
  model="gpt-3.5-turbo",
50
  messages=[
51
  {"role":"system",
52
+ "content":f"Translate the following {src} text to {tgt}. "
53
+ "Return only the translation."},
54
  {"role":"user","content":text}
55
  ],
56
  temperature=0.3,max_tokens=4096
57
  )
58
  return rsp.choices[0].message.content.strip()
59
 
60
+ def _tts(text, lang):
61
+ rsp = client.audio.speech.create(
 
62
  model="tts-1",
63
  voice=VOICE.get(lang,"alloy"),
64
  input=text[:4096]
65
  )
66
+ tmp = tempfile.NamedTemporaryFile(delete=False,suffix=".mp3")
67
+ tmp.write(rsp.content); tmp.close()
 
68
  return tmp.name
69
 
70
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 2. ์˜ค๋””์˜ค(๋‹จ๊ฑด) ๋ฒˆ์—ญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
71
  def translate_audio(audio_in, src, tgt):
72
+ p = _safe(audio_in)
73
+ if not p or not os.path.exists(p):
74
+ return "โš ๏ธ ์Œ์„ฑ ํŒŒ์ผ ํ•„์š”", "", None
75
+ with open(p,"rb") as f:
 
76
  stt = client.audio.transcriptions.create(
77
+ model="whisper-1", file=f, language=LANG_CODE.get(src))
78
+ orig = stt.text.strip()
79
+ if not orig:
 
 
 
80
  return "โš ๏ธ ์Œ์„ฑ ์ธ์‹ ์‹คํŒจ", "", None
81
+ trans = _gpt(orig, src, tgt)
82
+ return orig, trans, _tts(trans, tgt)
83
+
84
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 3. ๋ฌธ์„œ / ์ด๋ฏธ์ง€ ๋ฒˆ์—ญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
85
+ def translate_doc(file_in, src, tgt):
86
+ p = _safe(file_in)
87
+ if not p or not os.path.exists(p):
88
+ return "โš ๏ธ PDF/์ด๋ฏธ์ง€ ์—…๋กœ๋“œ", ""
89
+ ext = os.path.splitext(p)[1].lower()
90
+ mime = mimetypes.guess_type(p)[0] or ""
 
 
 
 
 
91
  try:
92
+ # PDF ๊ทธ๋Œ€๋กœ
93
+ if ext==".pdf" or "pdf" in mime:
94
+ with pdfplumber.open(p) as pdf:
95
+ txt = "\n".join(pg.extract_text() or "" for pg in pdf.pages[:5])
96
+ # ์ด๋ฏธ์ง€ โ†’ OCR PDF
97
  else:
98
+ img_pdf = tempfile.NamedTemporaryFile(delete=False,suffix=".pdf").name
99
+ Image.open(p).save(img_pdf,"PDF")
100
+ ocr_pdf = tempfile.NamedTemporaryFile(delete=False,suffix=".pdf").name
101
+ ocrmypdf.ocr(img_pdf, ocr_pdf,
102
+ lang=LANG_CODE.get(src,"eng"),
103
+ deskew=True,optimize=0,progress_bar=False)
104
+ with pdfplumber.open(ocr_pdf) as pdf:
105
+ txt = "\n".join(pg.extract_text() or "" for pg in pdf.pages)
106
  except Exception as e:
107
+ return f"โŒ ์ถ”์ถœ ์˜ค๋ฅ˜: {e}", ""
108
+ txt = txt.strip()
109
+ if not txt:
110
+ return "โš ๏ธ ํ…์ŠคํŠธ ์ถ”์ถœ ์‹คํŒจ", ""
111
+ return txt, _gpt(txt, src, tgt)
112
+
113
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 4. ์‹ค์‹œ๊ฐ„ 1์–ธ์–ด โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
114
+ def stream_single(audio_path, src, tgt, state):
115
+ state = state or {"orig":"", "trans":""}
116
+ if not audio_path or not os.path.exists(audio_path):
117
+ return state["orig"], state["trans"], state
118
+ with open(audio_path,"rb") as f:
119
+ stt = client.audio.transcriptions.create(
120
+ model="whisper-1", file=f, language=LANG_CODE.get(src))
121
+ full = stt.text.strip()
122
+ new = full[len(state["orig"]):]
123
+ if new:
124
+ state["orig"] = full
125
+ state["trans"] += " " + _gpt(new, src, tgt)
126
+ return state["orig"], state["trans"].strip(), state
127
+
128
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 5. ์‹ค์‹œ๊ฐ„ 4์–ธ์–ด โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
129
+ def stream_multi(audio_path, src, state):
130
+ state = state or {k:"" for k in ["orig"]+FOUR}
131
+ if not audio_path or not os.path.exists(audio_path):
132
+ return state["orig"],state["English"],state["Chinese"],state["Thai"],state["Russian"],state
133
+ with open(audio_path,"rb") as f:
134
+ stt = client.audio.transcriptions.create(
135
+ model="whisper-1", file=f, language=LANG_CODE.get(src))
136
+ full = stt.text.strip()
137
+ new = full[len(state["orig"]):]
138
+ if new:
139
+ state["orig"] = full
140
+ for lang in FOUR:
141
+ state[lang] += " " + _gpt(new, src, lang)
142
+ return (state["orig"].strip(),
143
+ state["English"].strip(),
144
+ state["Chinese"].strip(),
145
+ state["Thai"].strip(),
146
+ state["Russian"].strip(),
147
+ state)
148
+
149
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 6. Gradio UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
151
  with gr.Tabs():
152
+ # ํƒญ 1
153
  with gr.TabItem("๐ŸŽ™๏ธ ์˜ค๋””์˜ค ๋ฒˆ์—ญ"):
154
+ src1 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ ์–ธ์–ด")
155
+ tgt1 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ ์–ธ์–ด")
156
+ aud1 = gr.Audio(sources=["microphone","upload"],type="filepath")
157
+ res1 = gr.Button("๋ฒˆ์—ญ")
158
+ o1 = gr.Textbox(label="์›๋ฌธ",lines=5)
159
+ t1 = gr.Textbox(label="๋ฒˆ์—ญ",lines=5)
160
+ a1 = gr.Audio(label="TTS",type="filepath",autoplay=True)
161
+ res1.click(translate_audio,[aud1,src1,tgt1],[o1,t1,a1])
162
+
163
+ # ํƒญ 2
164
+ with gr.TabItem("๐Ÿ“„ ๋ฌธ์„œยท์ด๋ฏธ์ง€ ๋ฒˆ์—ญ"):
165
+ src2 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ ์–ธ์–ด")
166
+ tgt2 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ ์–ธ์–ด")
167
+ file2= gr.File(label="PDF/์ด๋ฏธ์ง€ ์—…๋กœ๋“œ",
168
+ file_types=[".pdf",".png",".jpg",".jpeg",
169
+ ".bmp",".tiff",".gif"])
170
+ doc2 = gr.Button("๋ฒˆ์—ญ")
171
+ o2 = gr.Textbox(label="์ถ”์ถœ ์›๋ฌธ",lines=15)
172
+ t2 = gr.Textbox(label="๋ฒˆ์—ญ ๊ฒฐ๊ณผ",lines=15)
173
+ doc2.click(translate_doc,[file2,src2,tgt2],[o2,t2])
174
+
175
+ # ํƒญ 3
 
176
  with gr.TabItem("โฑ๏ธ ์‹ค์‹œ๊ฐ„ 1์–ธ์–ด"):
177
+ src3 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ ์–ธ์–ด")
178
+ tgt3 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ ์–ธ์–ด")
179
+ mic3 = gr.Audio(sources=["microphone"],streaming=True)
180
+ o3 = gr.Textbox(label="์›๋ฌธ(์‹ค์‹œ๊ฐ„)",lines=8)
181
+ t3 = gr.Textbox(label="๋ฒˆ์—ญ(์‹ค์‹œ๊ฐ„)",lines=8)
182
+ st3 = gr.State()
183
+ mic3.stream(stream_single,
184
+ inputs=[src3,tgt3,st3],
185
+ outputs=[o3,t3,st3])
186
+
187
+ # ํƒญ 4
188
  with gr.TabItem("๐ŸŒ ์‹ค์‹œ๊ฐ„ 4๊ฐœ ์–ธ์–ด"):
 
189
  src4 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ ์–ธ์–ด")
190
+ mic4 = gr.Audio(sources=["microphone"],streaming=True)
191
+ o4 = gr.Textbox(label="์›๋ฌธ",lines=8)
192
+ e4 = gr.Textbox(label="English",lines=8)
193
+ c4 = gr.Textbox(label="Chinese(็ฎ€ไฝ“)",lines=8)
194
+ th4 = gr.Textbox(label="Thai",lines=8)
195
+ r4 = gr.Textbox(label="Russian",lines=8)
196
+ st4 = gr.State()
197
+ mic4.stream(stream_multi,
198
+ inputs=[src4,st4],
199
+ outputs=[o4,e4,c4,th4,r4,st4])
200
+
201
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 7. ์‹คํ–‰ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
202
  if __name__ == "__main__":
203
+ app.launch(server_name="0.0.0.0",server_port=7860,share=False,debug=True)