openfree commited on
Commit
effad1c
ยท
verified ยท
1 Parent(s): 392a5eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -62
app.py CHANGED
@@ -1,8 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import openai, os, io, tempfile, mimetypes
3
  from dotenv import load_dotenv
4
 
5
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ๊ณตํ†ต ์ดˆ๊ธฐํ™” โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
6
  load_dotenv()
7
  api_key = os.getenv("OPENAI_API_KEY")
8
  if not api_key:
@@ -19,16 +30,21 @@ LANG_CODE = {
19
  "Thai":"th","Russian":"ru","Vietnamese":"vi",
20
  "Spanish":"es","French":"fr"
21
  }
22
- FOUR_LANGS = ["English", "Chinese", "Thai", "Russian"] # ์‹ค์‹œ๊ฐ„ ๋™์‹œ ๋ฒˆ์—ญ ์–ธ์–ด
23
- VOICE = {l:("nova" if l in ["Korean","Japanese","Chinese"] else "alloy")
24
  for l in LANGUAGES}
25
 
26
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ์œ ํ‹ธ ํ•จ์ˆ˜ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
 
 
27
  def _safe_path(v):
28
- if v is None: return None
 
 
29
  return v.get("name") if isinstance(v, dict) else v
30
 
31
- def _gpt_translate(text, src, tgt):
 
32
  rsp = client.chat.completions.create(
33
  model="gpt-3.5-turbo",
34
  messages=[
@@ -37,25 +53,27 @@ def _gpt_translate(text, src, tgt):
37
  f"Only provide the translated text."},
38
  {"role":"user","content":text}
39
  ],
40
- temperature=0.3, max_tokens=2048
41
  )
42
  return rsp.choices[0].message.content.strip()
43
 
44
- def _tts(text, lang):
 
45
  out = client.audio.speech.create(
46
  model="tts-1",
47
  voice=VOICE.get(lang,"alloy"),
48
  input=text[:4096]
49
  )
50
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
51
- tmp.write(out.content); tmp.close()
 
52
  return tmp.name
53
 
54
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ‘  ์Œ์„ฑ(MicยทFile) ๋‹จ๊ฑด ๋ฒˆ์—ญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
55
  def translate_audio(audio_in, src, tgt):
56
  path = _safe_path(audio_in)
57
  if not path or not os.path.exists(path):
58
- return "โš ๏ธ ์Œ์„ฑ ํŒŒ์ผ์„ ๋…น์Œ/์—…๋กœ๋“œํ•˜์„ธ์š”.", "", None
59
 
60
  with open(path,"rb") as f:
61
  stt = client.audio.transcriptions.create(
@@ -71,13 +89,13 @@ def translate_audio(audio_in, src, tgt):
71
  tts_path = _tts(translated, tgt)
72
  return original, translated, tts_path
73
 
74
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ‘ก PDF / ์ด๋ฏธ์ง€ ๋ฒˆ์—ญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
75
  def translate_document(file_in, src, tgt):
76
  path = _safe_path(file_in)
77
  if not path or not os.path.exists(path):
78
- return "โš ๏ธ PDF ๋˜๋Š” ์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜์„ธ์š”.", ""
79
 
80
- ext = os.path.splitext(path)[1].lower()
81
  mime = mimetypes.guess_type(path)[0] or ""
82
  text = ""
83
 
@@ -85,14 +103,16 @@ def translate_document(file_in, src, tgt):
85
  if ext == ".pdf" or "pdf" in mime:
86
  import pdfplumber
87
  with pdfplumber.open(path) as pdf:
88
- pages = pdf.pages[:5] # ๋ฐ๋ชจ: ์ตœ๋Œ€ 5์ชฝ
89
- text = "\n".join(p.extract_text() or "" for p in pages)
90
- elif ext in [".png",".jpg",".jpeg",".bmp",".tiff",".gif"] or "image" in mime:
91
- from PIL import Image
92
- import pytesseract
93
- text = pytesseract.image_to_string(Image.open(path))
94
  else:
95
- return "โš ๏ธ ์ง€์›ํ•˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹์ž…๋‹ˆ๋‹ค.", ""
 
 
 
 
 
 
96
  except Exception as e:
97
  return f"โŒ ํ…์ŠคํŠธ ์ถ”์ถœ ์‹คํŒจ: {e}", ""
98
 
@@ -103,25 +123,27 @@ def translate_document(file_in, src, tgt):
103
  translated = _gpt_translate(text, src, tgt)
104
  return text, translated
105
 
106
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ‘ข ์‹ค์‹œ๊ฐ„ 1๊ฐœ ์–ธ์–ด ๋ฒˆ์—ญ (์˜ต์…˜) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
107
- STREAM_SEC = 4 # Whisper ํ˜ธ์ถœ ์ฃผ๊ธฐ
108
-
109
  def stream_single(mic_stream, src, tgt):
110
  buf, header = io.BytesIO(), None
111
  o_acc, t_acc = "", ""
112
  while True:
113
  chunk = mic_stream.recv()
114
- if chunk is None: break
115
- if header is None: header = chunk[:44]
 
 
116
  buf.write(chunk)
117
  if buf.getbuffer().nbytes > 16000*2*STREAM_SEC:
118
  wav = header + buf.getvalue()
119
  with tempfile.NamedTemporaryFile(delete=False,suffix=".wav") as tmp:
120
  tmp.write(wav); tmp.close()
121
  o, t, _ = translate_audio(tmp.name, src, tgt)
122
- o_acc += " " + o; t_acc += " " + t
 
123
  yield o_acc.strip(), t_acc.strip()
124
  buf = io.BytesIO()
 
125
  if buf.getbuffer().nbytes:
126
  wav = header + buf.getvalue()
127
  with tempfile.NamedTemporaryFile(delete=False,suffix=".wav") as tmp:
@@ -129,17 +151,18 @@ def stream_single(mic_stream, src, tgt):
129
  o, t, _ = translate_audio(tmp.name, src, tgt)
130
  yield (o_acc+" "+o).strip(), (t_acc+" "+t).strip()
131
 
132
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ‘ฃ ์‹ค์‹œ๊ฐ„ 4๊ฐœ ์–ธ์–ด ๋™์‹œ ๋ฒˆ์—ญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
133
  def stream_multi(mic_stream, src):
134
  buf, header = io.BytesIO(), None
135
- acc = {lang: "" for lang in ["original"] + FOUR_LANGS}
136
 
137
  while True:
138
  chunk = mic_stream.recv()
139
- if chunk is None: break
140
- if header is None: header = chunk[:44]
 
 
141
  buf.write(chunk)
142
-
143
  if buf.getbuffer().nbytes > 16000*2*STREAM_SEC:
144
  wav = header + buf.getvalue()
145
  with tempfile.NamedTemporaryFile(delete=False,suffix=".wav") as tmp:
@@ -151,17 +174,16 @@ def stream_multi(mic_stream, src):
151
  )
152
  orig = stt.text.strip()
153
  if orig:
154
- acc["original"] += " " + orig
155
  for lang in FOUR_LANGS:
156
  acc[lang] += " " + _gpt_translate(orig, src, lang)
157
- yield (acc["original"].strip(),
158
  acc["English"].strip(),
159
  acc["Chinese"].strip(),
160
  acc["Thai"].strip(),
161
  acc["Russian"].strip())
162
  buf = io.BytesIO()
163
 
164
- # ๋‚จ์€ ๋ฒ„ํผ
165
  if buf.getbuffer().nbytes:
166
  wav = header + buf.getvalue()
167
  with tempfile.NamedTemporaryFile(delete=False,suffix=".wav") as tmp:
@@ -173,19 +195,19 @@ def stream_multi(mic_stream, src):
173
  )
174
  orig = stt.text.strip()
175
  if orig:
176
- acc["original"] += " " + orig
177
  for lang in FOUR_LANGS:
178
  acc[lang] += " " + _gpt_translate(orig, src, lang)
179
- yield (acc["original"].strip(),
180
  acc["English"].strip(),
181
  acc["Chinese"].strip(),
182
  acc["Thai"].strip(),
183
  acc["Russian"].strip())
184
 
185
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Gradio UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
186
  with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
187
  with gr.Tabs():
188
- # 1) ์˜ค๋””์˜ค(๋…น์Œยท์—…๋กœ๋“œ) ๋ฒˆ์—ญ
189
  with gr.TabItem("๐ŸŽ™๏ธ ์˜ค๋””์˜ค ๋ฒˆ์—ญ"):
190
  src1 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ")
191
  tgt1 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ")
@@ -196,51 +218,46 @@ with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
196
  stt1 = gr.Textbox(label="์›๋ฌธ", lines=5)
197
  tlt1 = gr.Textbox(label="๋ฒˆ์—ญ", lines=5)
198
  out1 = gr.Audio(label="TTS",type="filepath",autoplay=True)
199
- btn1.click(translate_audio, [aud1,src1,tgt1],[stt1,tlt1,out1])
200
 
201
- # 2) PDF / ์ด๋ฏธ์ง€ ๋ฒˆ์—ญ
202
  with gr.TabItem("๐Ÿ“„ ๋ฌธ์„œ/์ด๋ฏธ์ง€ ๋ฒˆ์—ญ"):
203
  src2 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ")
204
  tgt2 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ")
205
  file2= gr.File(label="PDF ๋˜๋Š” ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ",
206
- file_types=[".pdf",".png",".jpg",".jpeg",
207
- ".bmp",".tiff",".gif"])
208
  btn2 = gr.Button("๋ฒˆ์—ญ")
209
  org2 = gr.Textbox(label="์ถ”์ถœ ์›๋ฌธ",lines=15)
210
  trs2 = gr.Textbox(label="๋ฒˆ์—ญ ๊ฒฐ๊ณผ",lines=15)
211
  btn2.click(translate_document,[file2,src2,tgt2],[org2,trs2])
212
 
213
- # 3) ์‹ค์‹œ๊ฐ„ 1๊ฐœ ์–ธ์–ด ๋ฒˆ์—ญ(์„ ํƒ)
214
  with gr.TabItem("โฑ๏ธ ์‹ค์‹œ๊ฐ„ 1์–ธ์–ด"):
215
  src3 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ")
216
  tgt3 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ")
217
  mic3 = gr.Audio(sources=["microphone"],
218
- streaming=True,label="์‹ค์‹œ๊ฐ„ ๋งˆ์ดํฌ")
 
219
  stt3 = gr.Textbox(label="์›๋ฌธ(์‹ค์‹œ๊ฐ„)",lines=8)
220
  tlt3 = gr.Textbox(label="๋ฒˆ์—ญ(์‹ค์‹œ๊ฐ„)",lines=8)
221
- mic3.stream(lambda a,s,t: stream_single(a,s,t),
222
- inputs=[src3,tgt3],
223
- outputs=[stt3,tlt3])
224
 
225
- # 4) **์‹ค์‹œ๊ฐ„ 4๊ฐœ ์–ธ์–ด ๋™์‹œ ๋ฒˆ์—ญ** โ† ํ•ต์‹ฌ ๋ฐ๋ชจ
226
  with gr.TabItem("๐ŸŒ ์‹ค์‹œ๊ฐ„ 4๊ฐœ ์–ธ์–ด"):
227
- gr.Markdown("๋งˆ์ดํฌ ์ž…๋ ฅ์„ **English / Chinese(็ฎ€ไฝ“) / Thai / Russian** "
228
- "4๊ฐœ ์–ธ์–ด๋กœ ์‹ค์‹œ๊ฐ„(3-4 ์ดˆ ์ง€์—ฐ) ๋™์‹œ ๋ฒˆ์—ญํ•ฉ๋‹ˆ๋‹ค.")
229
  src4 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ ์–ธ์–ด")
230
  mic4 = gr.Audio(sources=["microphone"],
231
- streaming=True,label="์‹ค์‹œ๊ฐ„ ๋งˆ์ดํฌ")
232
- o4 = gr.Textbox(label="์›๋ฌธ", lines=8)
233
- e4 = gr.Textbox(label="English", lines=8)
234
- z4 = gr.Textbox(label="Chinese(็ฎ€ไฝ“)", lines=8)
235
- t4 = gr.Textbox(label="Thai", lines=8)
236
- r4 = gr.Textbox(label="Russian", lines=8)
237
-
238
- # Audio.stream โ†’ 5๊ฐœ ์ถœ๋ ฅ
239
- mic4.stream(lambda a,s: stream_multi(a,s),
240
- inputs=[src4],
241
  outputs=[o4,e4,z4,t4,r4])
242
 
243
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ์‹คํ–‰ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
244
  if __name__ == "__main__":
245
  app.launch(server_name="0.0.0.0",
246
  server_port=7860,
 
1
+ """
2
+ SMARTok ํ•ต์‹ฌ ๋ฐ๋ชจ
3
+ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
4
+ โœ“ ํƒญ 1 : ์˜ค๋””์˜ค(๋…น์ŒยทํŒŒ์ผ) ๋‹จ๊ฑด ๋ฒˆ์—ญ + TTS ์žฌ์ƒ
5
+ โœ“ ํƒญ 2 : PDF / ์ด๋ฏธ์ง€ ๋ฒˆ์—ญ (Tesseract ์—†์œผ๋ฉด PDF๋งŒ ์•ˆ๋‚ด)
6
+ โœ“ ํƒญ 3 : ์‹ค์‹œ๊ฐ„ 1๊ฐœ ์–ธ์–ด(์„ ํƒํ˜•) ๋ฒˆ์—ญ
7
+ โœ“ ํƒญ 4 : ์‹ค์‹œ๊ฐ„ 4๊ฐœ ์–ธ์–ด(์˜ยท์ค‘(๊ฐ„)ยทํƒœยท๋Ÿฌ) ๋™์‹œ ๋ฒˆ์—ญ
8
+ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
9
+ Python โ‰ฅ3.10, Gradio 4.x, OpenAI Python SDK ํ•„์š”
10
+ """
11
+
12
  import gradio as gr
13
  import openai, os, io, tempfile, mimetypes
14
  from dotenv import load_dotenv
15
 
16
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 0. ๊ณตํ†ต ์ดˆ๊ธฐํ™” โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
17
  load_dotenv()
18
  api_key = os.getenv("OPENAI_API_KEY")
19
  if not api_key:
 
30
  "Thai":"th","Russian":"ru","Vietnamese":"vi",
31
  "Spanish":"es","French":"fr"
32
  }
33
+ VOICE = {l: ("nova" if l in ["Korean","Japanese","Chinese"] else "alloy")
 
34
  for l in LANGUAGES}
35
 
36
+ FOUR_LANGS = ["English", "Chinese", "Thai", "Russian"] # ์‹ค์‹œ๊ฐ„ ๋™์‹œ ๋ฒˆ์—ญ์šฉ
37
+ STREAM_SEC = 4 # Whisper ํ˜ธ์ถœ ๊ฐ„๊ฒฉ(์ดˆ) โ€“ 3~4 ์ดˆ ์ •๋„ ์ง€์—ฐ
38
+
39
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 1. ์œ ํ‹ธ ํ•จ์ˆ˜ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
40
  def _safe_path(v):
41
+ """Gradio File/Audio ์ž…๋ ฅ โ†’ ์‹ค์ œ ๊ฒฝ๋กœ ์ถ”์ถœ"""
42
+ if v is None:
43
+ return None
44
  return v.get("name") if isinstance(v, dict) else v
45
 
46
+ def _gpt_translate(text: str, src: str, tgt: str) -> str:
47
+ """GPT-3.5-turbo ๋ฒˆ์—ญ (์„ค๋ช… ์—†์ด ๊ฒฐ๊ณผ๋งŒ)"""
48
  rsp = client.chat.completions.create(
49
  model="gpt-3.5-turbo",
50
  messages=[
 
53
  f"Only provide the translated text."},
54
  {"role":"user","content":text}
55
  ],
56
+ temperature=0.3,max_tokens=4096
57
  )
58
  return rsp.choices[0].message.content.strip()
59
 
60
+ def _tts(text: str, lang: str) -> str:
61
+ """OpenAI TTS-1 โ‡’ MP3 ํŒŒ์ผ ๊ฒฝ๋กœ ๋ฐ˜ํ™˜"""
62
  out = client.audio.speech.create(
63
  model="tts-1",
64
  voice=VOICE.get(lang,"alloy"),
65
  input=text[:4096]
66
  )
67
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
68
+ tmp.write(out.content)
69
+ tmp.close()
70
  return tmp.name
71
 
72
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 2. ๋‹จ๊ฑด ์˜ค๋””์˜ค ๋ฒˆ์—ญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
73
  def translate_audio(audio_in, src, tgt):
74
  path = _safe_path(audio_in)
75
  if not path or not os.path.exists(path):
76
+ return "โš ๏ธ ์Œ์„ฑ ํŒŒ์ผ์„ ๋…น์Œ-์—…๋กœ๋“œํ•˜์„ธ์š”.", "", None
77
 
78
  with open(path,"rb") as f:
79
  stt = client.audio.transcriptions.create(
 
89
  tts_path = _tts(translated, tgt)
90
  return original, translated, tts_path
91
 
92
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 3. PDF / ์ด๋ฏธ์ง€ ๋ฒˆ์—ญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
93
  def translate_document(file_in, src, tgt):
94
  path = _safe_path(file_in)
95
  if not path or not os.path.exists(path):
96
+ return "โš ๏ธ PDF(๋˜๋Š” ์ด๋ฏธ์ง€) ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์„ธ์š”.", ""
97
 
98
+ ext = os.path.splitext(path)[1].lower()
99
  mime = mimetypes.guess_type(path)[0] or ""
100
  text = ""
101
 
 
103
  if ext == ".pdf" or "pdf" in mime:
104
  import pdfplumber
105
  with pdfplumber.open(path) as pdf:
106
+ pages = pdf.pages[:5] # ๋ฐ๋ชจ: 5์ชฝ ์ œํ•œ
107
+ text = "\n".join(p.extract_text() or "" for p in pages)
 
 
 
 
108
  else:
109
+ # ์ด๋ฏธ์ง€์˜ ๊ฒฝ์šฐ Tesseract ํ•„์š”
110
+ try:
111
+ from PIL import Image
112
+ import pytesseract
113
+ text = pytesseract.image_to_string(Image.open(path))
114
+ except Exception:
115
+ return "โš ๏ธ ์„œ๋ฒ„์— Tesseract OCR๊ฐ€ ์—†์–ด์„œ ์ด๋ฏธ์ง€ OCR์€ ์ง€์›๋˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. PDF๋งŒ ์‚ฌ์šฉํ•˜์„ธ์š”.", ""
116
  except Exception as e:
117
  return f"โŒ ํ…์ŠคํŠธ ์ถ”์ถœ ์‹คํŒจ: {e}", ""
118
 
 
123
  translated = _gpt_translate(text, src, tgt)
124
  return text, translated
125
 
126
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 4. ์‹ค์‹œ๊ฐ„ 1๊ฐœ ์–ธ์–ด ๋ฒˆ์—ญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
 
127
  def stream_single(mic_stream, src, tgt):
128
  buf, header = io.BytesIO(), None
129
  o_acc, t_acc = "", ""
130
  while True:
131
  chunk = mic_stream.recv()
132
+ if chunk is None:
133
+ break
134
+ if header is None:
135
+ header = chunk[:44]
136
  buf.write(chunk)
137
  if buf.getbuffer().nbytes > 16000*2*STREAM_SEC:
138
  wav = header + buf.getvalue()
139
  with tempfile.NamedTemporaryFile(delete=False,suffix=".wav") as tmp:
140
  tmp.write(wav); tmp.close()
141
  o, t, _ = translate_audio(tmp.name, src, tgt)
142
+ o_acc += " " + o
143
+ t_acc += " " + t
144
  yield o_acc.strip(), t_acc.strip()
145
  buf = io.BytesIO()
146
+
147
  if buf.getbuffer().nbytes:
148
  wav = header + buf.getvalue()
149
  with tempfile.NamedTemporaryFile(delete=False,suffix=".wav") as tmp:
 
151
  o, t, _ = translate_audio(tmp.name, src, tgt)
152
  yield (o_acc+" "+o).strip(), (t_acc+" "+t).strip()
153
 
154
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 5. ์‹ค์‹œ๊ฐ„ 4๊ฐœ ์–ธ์–ด ๋ฒˆ์—ญ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
155
  def stream_multi(mic_stream, src):
156
  buf, header = io.BytesIO(), None
157
+ acc = {lang:"" for lang in ["orig"]+FOUR_LANGS}
158
 
159
  while True:
160
  chunk = mic_stream.recv()
161
+ if chunk is None:
162
+ break
163
+ if header is None:
164
+ header = chunk[:44]
165
  buf.write(chunk)
 
166
  if buf.getbuffer().nbytes > 16000*2*STREAM_SEC:
167
  wav = header + buf.getvalue()
168
  with tempfile.NamedTemporaryFile(delete=False,suffix=".wav") as tmp:
 
174
  )
175
  orig = stt.text.strip()
176
  if orig:
177
+ acc["orig"] += " " + orig
178
  for lang in FOUR_LANGS:
179
  acc[lang] += " " + _gpt_translate(orig, src, lang)
180
+ yield (acc["orig"].strip(),
181
  acc["English"].strip(),
182
  acc["Chinese"].strip(),
183
  acc["Thai"].strip(),
184
  acc["Russian"].strip())
185
  buf = io.BytesIO()
186
 
 
187
  if buf.getbuffer().nbytes:
188
  wav = header + buf.getvalue()
189
  with tempfile.NamedTemporaryFile(delete=False,suffix=".wav") as tmp:
 
195
  )
196
  orig = stt.text.strip()
197
  if orig:
198
+ acc["orig"] += " " + orig
199
  for lang in FOUR_LANGS:
200
  acc[lang] += " " + _gpt_translate(orig, src, lang)
201
+ yield (acc["orig"].strip(),
202
  acc["English"].strip(),
203
  acc["Chinese"].strip(),
204
  acc["Thai"].strip(),
205
  acc["Russian"].strip())
206
 
207
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€๏ฟฝ๏ฟฝ๏ฟฝโ”€ 6. Gradio UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
208
  with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
209
  with gr.Tabs():
210
+ # ํƒญ 1 โ”€ ์˜ค๋””์˜ค ๋ฒˆ์—ญ
211
  with gr.TabItem("๐ŸŽ™๏ธ ์˜ค๋””์˜ค ๋ฒˆ์—ญ"):
212
  src1 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ")
213
  tgt1 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ")
 
218
  stt1 = gr.Textbox(label="์›๋ฌธ", lines=5)
219
  tlt1 = gr.Textbox(label="๋ฒˆ์—ญ", lines=5)
220
  out1 = gr.Audio(label="TTS",type="filepath",autoplay=True)
221
+ btn1.click(translate_audio,[aud1,src1,tgt1],[stt1,tlt1,out1])
222
 
223
+ # ํƒญ 2 โ”€ ๋ฌธ์„œ/์ด๋ฏธ์ง€ ๋ฒˆ์—ญ
224
  with gr.TabItem("๐Ÿ“„ ๋ฌธ์„œ/์ด๋ฏธ์ง€ ๋ฒˆ์—ญ"):
225
  src2 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ")
226
  tgt2 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ")
227
  file2= gr.File(label="PDF ๋˜๋Š” ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ",
228
+ file_types=[".pdf",".png",".jpg",".jpeg",".bmp",".tiff",".gif"])
 
229
  btn2 = gr.Button("๋ฒˆ์—ญ")
230
  org2 = gr.Textbox(label="์ถ”์ถœ ์›๋ฌธ",lines=15)
231
  trs2 = gr.Textbox(label="๋ฒˆ์—ญ ๊ฒฐ๊ณผ",lines=15)
232
  btn2.click(translate_document,[file2,src2,tgt2],[org2,trs2])
233
 
234
+ # ํƒญ 3 โ”€ ์‹ค์‹œ๊ฐ„ 1์–ธ์–ด ๋ฒˆ์—ญ
235
  with gr.TabItem("โฑ๏ธ ์‹ค์‹œ๊ฐ„ 1์–ธ์–ด"):
236
  src3 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ")
237
  tgt3 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ")
238
  mic3 = gr.Audio(sources=["microphone"],
239
+ streaming=True,
240
+ label="์‹ค์‹œ๊ฐ„ ๋งˆ์ดํฌ")
241
  stt3 = gr.Textbox(label="์›๋ฌธ(์‹ค์‹œ๊ฐ„)",lines=8)
242
  tlt3 = gr.Textbox(label="๋ฒˆ์—ญ(์‹ค์‹œ๊ฐ„)",lines=8)
243
+ mic3.stream(stream_single,inputs=[src3,tgt3],outputs=[stt3,tlt3])
 
 
244
 
245
+ # ํƒญ 4 โ”€ ์‹ค์‹œ๊ฐ„ 4๊ฐœ ์–ธ์–ด ๋ฒˆ์—ญ
246
  with gr.TabItem("๐ŸŒ ์‹ค์‹œ๊ฐ„ 4๊ฐœ ์–ธ์–ด"):
247
+ gr.Markdown("๋งˆ์ดํฌ ์ž…๋ ฅ์„ 3-4 ์ดˆ ๊ฐ„๊ฒฉ์œผ๋กœ **English / Chinese(็ฎ€ไฝ“) / Thai / Russian** 4๊ฐœ ์–ธ์–ด๋กœ ๋™์‹œ ๋ฒˆ์—ญํ•ฉ๋‹ˆ๋‹ค.")
 
248
  src4 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ ์–ธ์–ด")
249
  mic4 = gr.Audio(sources=["microphone"],
250
+ streaming=True,
251
+ label="์‹ค์‹œ๊ฐ„ ๋งˆ์ดํฌ")
252
+ o4 = gr.Textbox(label="์›๋ฌธ",lines=8)
253
+ e4 = gr.Textbox(label="English",lines=8)
254
+ z4 = gr.Textbox(label="Chinese(็ฎ€ไฝ“)",lines=8)
255
+ t4 = gr.Textbox(label="Thai",lines=8)
256
+ r4 = gr.Textbox(label="Russian",lines=8)
257
+ mic4.stream(stream_multi,inputs=[src4],
 
 
258
  outputs=[o4,e4,z4,t4,r4])
259
 
260
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 7. ์‹คํ–‰ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
261
  if __name__ == "__main__":
262
  app.launch(server_name="0.0.0.0",
263
  server_port=7860,