Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,15 @@
|
|
1 |
import gradio as gr
|
2 |
-
import openai, os, io, tempfile
|
3 |
from dotenv import load_dotenv
|
4 |
|
5 |
-
#
|
6 |
load_dotenv()
|
7 |
api_key = os.getenv("OPENAI_API_KEY")
|
8 |
if not api_key:
|
9 |
raise RuntimeError("OPENAI_API_KEY를 .env 파일에 설정하세요!")
|
10 |
client = openai.OpenAI(api_key=api_key)
|
11 |
|
12 |
-
#
|
13 |
LANGUAGES = [
|
14 |
"Korean", "English", "Japanese", "Chinese",
|
15 |
"Thai", "Russian", "Vietnamese",
|
@@ -20,33 +20,34 @@ LANG_CODE = {
|
|
20 |
"Thai": "th", "Russian": "ru", "Vietnamese": "vi",
|
21 |
"Spanish": "es", "French": "fr"
|
22 |
}
|
23 |
-
VOICE = {
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
-
# ============== 공통 함수 ===========================================
|
29 |
def _gpt_translate(text: str, src: str, tgt: str) -> str:
|
30 |
-
"""GPT-3.5 번역"""
|
31 |
rsp = client.chat.completions.create(
|
32 |
model="gpt-3.5-turbo",
|
33 |
messages=[
|
34 |
-
{
|
35 |
-
|
36 |
-
|
37 |
-
f"You are a professional translator. Translate the following {src} text to {tgt}. "
|
38 |
-
f"Only provide the translation without additional commentary."
|
39 |
-
)
|
40 |
-
},
|
41 |
{"role": "user", "content": text}
|
42 |
],
|
43 |
-
temperature=0.3,
|
44 |
-
max_tokens=2048
|
45 |
)
|
46 |
return rsp.choices[0].message.content.strip()
|
47 |
|
48 |
def _tts(text: str, lang: str) -> str:
|
49 |
-
"""TTS-1
|
50 |
out = client.audio.speech.create(
|
51 |
model="tts-1",
|
52 |
voice=VOICE.get(lang, "alloy"),
|
@@ -57,9 +58,13 @@ def _tts(text: str, lang: str) -> str:
|
|
57 |
tmp.close()
|
58 |
return tmp.name
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
63 |
stt = client.audio.transcriptions.create(
|
64 |
model="whisper-1",
|
65 |
file=f,
|
@@ -73,53 +78,82 @@ def translate_audio(audio_path, src, tgt):
|
|
73 |
tts_path = _tts(translated, tgt)
|
74 |
return original, translated, tts_path
|
75 |
|
76 |
-
#
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
buffer = io.BytesIO()
|
82 |
wav_header = None
|
83 |
-
|
84 |
|
85 |
while True:
|
86 |
chunk = mic_stream.recv()
|
87 |
-
if chunk is None:
|
88 |
break
|
89 |
-
|
90 |
if not wav_header:
|
91 |
-
wav_header = chunk[:44]
|
92 |
buffer.write(chunk)
|
93 |
|
94 |
-
# 지정 시간만큼 쌓이면 Whisper 호출
|
95 |
if buffer.getbuffer().nbytes > 16000 * 2 * STREAM_CHUNK_SEC:
|
96 |
wav_bytes = wav_header + buffer.getvalue()
|
97 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
98 |
tmp.write(wav_bytes)
|
99 |
tmp.close()
|
100 |
o, t, _ = translate_audio(tmp.name, src, tgt)
|
|
|
|
|
|
|
|
|
101 |
|
102 |
-
original_acc += " " + o
|
103 |
-
translated_acc += " " + t
|
104 |
-
yield original_acc.strip(), translated_acc.strip()
|
105 |
-
buffer = io.BytesIO() # 버퍼 초기화
|
106 |
-
|
107 |
-
# 마지막 남은 버퍼 처리
|
108 |
if buffer.getbuffer().nbytes:
|
109 |
wav_bytes = wav_header + buffer.getvalue()
|
110 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
111 |
tmp.write(wav_bytes)
|
112 |
tmp.close()
|
113 |
o, t, _ = translate_audio(tmp.name, src, tgt)
|
|
|
114 |
|
115 |
-
|
116 |
-
|
117 |
-
# ============== 4개국 동시 번역 =====================================
|
118 |
FOUR_LANGS = ["English", "Chinese", "Thai", "Russian"]
|
119 |
|
120 |
-
def translate_audio_four(
|
121 |
-
|
122 |
-
|
|
|
|
|
|
|
123 |
stt = client.audio.transcriptions.create(
|
124 |
model="whisper-1",
|
125 |
file=f,
|
@@ -132,13 +166,13 @@ def translate_audio_four(audio_path, src):
|
|
132 |
outs = [original]
|
133 |
for lang in FOUR_LANGS:
|
134 |
outs.append(_gpt_translate(original, src, lang))
|
135 |
-
return outs
|
136 |
|
137 |
-
#
|
138 |
with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
|
139 |
with gr.Tabs():
|
140 |
-
#
|
141 |
-
with gr.TabItem("🎙️
|
142 |
src1 = gr.Dropdown(LANGUAGES, value="Korean", label="입력")
|
143 |
tgt1 = gr.Dropdown(LANGUAGES, value="English", label="출력")
|
144 |
mic1 = gr.Audio(
|
@@ -157,29 +191,26 @@ with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
|
|
157 |
outputs=[stt1, tlt1, out1]
|
158 |
)
|
159 |
|
160 |
-
#
|
161 |
-
with gr.TabItem("
|
162 |
src2 = gr.Dropdown(LANGUAGES, value="Korean", label="입력")
|
163 |
tgt2 = gr.Dropdown(LANGUAGES, value="English", label="출력")
|
164 |
-
file2 = gr.
|
165 |
-
|
166 |
-
|
167 |
-
label="오디오 파일 업로드"
|
168 |
)
|
169 |
btn2 = gr.Button("번역")
|
170 |
-
|
171 |
-
|
172 |
-
out2 = gr.Audio(label="TTS", type="filepath", autoplay=True)
|
173 |
|
174 |
btn2.click(
|
175 |
-
|
176 |
inputs=[file2, src2, tgt2],
|
177 |
-
outputs=[
|
178 |
)
|
179 |
|
180 |
-
#
|
181 |
with gr.TabItem("⏱️ 실시간 번역 (Beta)"):
|
182 |
-
gr.Markdown("마이크를 켜면 3~4초 간격으로 자막이 갱신됩니다.")
|
183 |
src3 = gr.Dropdown(LANGUAGES, value="Korean", label="입력")
|
184 |
tgt3 = gr.Dropdown(LANGUAGES, value="English", label="출력")
|
185 |
mic3 = gr.Audio(
|
@@ -195,9 +226,8 @@ with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
|
|
195 |
|
196 |
mic3.stream(gen, inputs=[src3, tgt3], outputs=[stt3, tlt3])
|
197 |
|
198 |
-
#
|
199 |
with gr.TabItem("🌏 4개 언어 동시"):
|
200 |
-
gr.Markdown("입력 음성을 **English / Chinese(简体) / Thai / Russian** 4개 언어로 동시에 번역합니다.")
|
201 |
src4 = gr.Dropdown(LANGUAGES, value="Korean", label="입력 언어")
|
202 |
aud4 = gr.Audio(
|
203 |
sources=["microphone", "upload"],
|
@@ -219,6 +249,6 @@ with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
|
|
219 |
outputs=[org4, en4, zh4, th4, ru4]
|
220 |
)
|
221 |
|
222 |
-
#
|
223 |
if __name__ == "__main__":
|
224 |
app.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)
|
|
|
1 |
import gradio as gr
|
2 |
+
import openai, os, io, tempfile, mimetypes
|
3 |
from dotenv import load_dotenv
|
4 |
|
5 |
+
# ============ 환경 변수 & OpenAI 초기화 ==============================
|
6 |
load_dotenv()
|
7 |
api_key = os.getenv("OPENAI_API_KEY")
|
8 |
if not api_key:
|
9 |
raise RuntimeError("OPENAI_API_KEY를 .env 파일에 설정하세요!")
|
10 |
client = openai.OpenAI(api_key=api_key)
|
11 |
|
12 |
+
# ============ 언어 매핑 ==============================================
|
13 |
LANGUAGES = [
|
14 |
"Korean", "English", "Japanese", "Chinese",
|
15 |
"Thai", "Russian", "Vietnamese",
|
|
|
20 |
"Thai": "th", "Russian": "ru", "Vietnamese": "vi",
|
21 |
"Spanish": "es", "French": "fr"
|
22 |
}
|
23 |
+
VOICE = {l: ("nova" if l in ["Korean", "Japanese", "Chinese"] else "alloy")
|
24 |
+
for l in LANGUAGES}
|
25 |
+
|
26 |
+
# ============ 공통 유틸 함수 =========================================
|
27 |
+
def _safe_path(x):
|
28 |
+
"""Gradio File/Audio 컴포넌트 값에서 실제 파일 경로 추출"""
|
29 |
+
if x is None:
|
30 |
+
return None
|
31 |
+
if isinstance(x, dict):
|
32 |
+
return x.get("name") # 4.x 형식
|
33 |
+
return x
|
34 |
|
|
|
35 |
def _gpt_translate(text: str, src: str, tgt: str) -> str:
|
36 |
+
"""GPT-3.5-Turbo 번역"""
|
37 |
rsp = client.chat.completions.create(
|
38 |
model="gpt-3.5-turbo",
|
39 |
messages=[
|
40 |
+
{"role": "system",
|
41 |
+
"content": f"You are a professional translator. Translate the following {src} text to {tgt}. "
|
42 |
+
f"Only provide the translated text."},
|
|
|
|
|
|
|
|
|
43 |
{"role": "user", "content": text}
|
44 |
],
|
45 |
+
temperature=0.3, max_tokens=4096
|
|
|
46 |
)
|
47 |
return rsp.choices[0].message.content.strip()
|
48 |
|
49 |
def _tts(text: str, lang: str) -> str:
|
50 |
+
"""OpenAI TTS-1 → mp3 파일 경로"""
|
51 |
out = client.audio.speech.create(
|
52 |
model="tts-1",
|
53 |
voice=VOICE.get(lang, "alloy"),
|
|
|
58 |
tmp.close()
|
59 |
return tmp.name
|
60 |
|
61 |
+
# ============ ① 음성 파일/마이크 번역 ================================
|
62 |
+
def translate_audio(audio_in, src, tgt):
|
63 |
+
path = _safe_path(audio_in)
|
64 |
+
if not path or not os.path.exists(path):
|
65 |
+
return "⚠️ 음성 파일을 녹음하거나 업로드하세요.", "", None
|
66 |
+
|
67 |
+
with open(path, "rb") as f:
|
68 |
stt = client.audio.transcriptions.create(
|
69 |
model="whisper-1",
|
70 |
file=f,
|
|
|
78 |
tts_path = _tts(translated, tgt)
|
79 |
return original, translated, tts_path
|
80 |
|
81 |
+
# ============ ② PDF / 이미지 번역 ===================================
|
82 |
+
def translate_document(file_in, src, tgt):
|
83 |
+
path = _safe_path(file_in)
|
84 |
+
if not path or not os.path.exists(path):
|
85 |
+
return "⚠️ PDF 또는 이미지를 업로드하세요.", ""
|
86 |
+
|
87 |
+
text = ""
|
88 |
+
ext = os.path.splitext(path)[1].lower()
|
89 |
+
mime = mimetypes.guess_type(path)[0] or ""
|
90 |
+
|
91 |
+
try:
|
92 |
+
if ext == ".pdf" or "pdf" in mime:
|
93 |
+
import pdfplumber
|
94 |
+
with pdfplumber.open(path) as pdf:
|
95 |
+
pages = pdf.pages[:5] # 데모: 최대 5쪽
|
96 |
+
text = "\n".join(p.extract_text() or "" for p in pages)
|
97 |
+
elif ext in [".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".gif"] or "image" in mime:
|
98 |
+
from PIL import Image
|
99 |
+
import pytesseract
|
100 |
+
text = pytesseract.image_to_string(Image.open(path))
|
101 |
+
else:
|
102 |
+
return "⚠️ 지원하지 않는 파일 형식입니다.", ""
|
103 |
+
except Exception as e:
|
104 |
+
return f"❌ 텍스트 추출 실패: {type(e).__name__}: {e}", ""
|
105 |
+
|
106 |
+
text = text.strip()
|
107 |
+
if not text:
|
108 |
+
return "⚠️ 텍스트가 추출되지 않았습니다.", ""
|
109 |
+
|
110 |
+
translated = _gpt_translate(text, src, tgt)
|
111 |
+
return text, translated
|
112 |
+
|
113 |
+
# ============ ③ 실시간 스트리밍 전사/번역 ===========================
|
114 |
+
STREAM_CHUNK_SEC = 4
|
115 |
+
|
116 |
+
def stream_generator(mic_stream, src, tgt="English"):
|
117 |
buffer = io.BytesIO()
|
118 |
wav_header = None
|
119 |
+
orig_acc, trans_acc = "", ""
|
120 |
|
121 |
while True:
|
122 |
chunk = mic_stream.recv()
|
123 |
+
if chunk is None:
|
124 |
break
|
|
|
125 |
if not wav_header:
|
126 |
+
wav_header = chunk[:44] # WAV 헤더
|
127 |
buffer.write(chunk)
|
128 |
|
|
|
129 |
if buffer.getbuffer().nbytes > 16000 * 2 * STREAM_CHUNK_SEC:
|
130 |
wav_bytes = wav_header + buffer.getvalue()
|
131 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
132 |
tmp.write(wav_bytes)
|
133 |
tmp.close()
|
134 |
o, t, _ = translate_audio(tmp.name, src, tgt)
|
135 |
+
orig_acc += " " + o
|
136 |
+
trans_acc += " " + t
|
137 |
+
yield orig_acc.strip(), trans_acc.strip()
|
138 |
+
buffer = io.BytesIO()
|
139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
if buffer.getbuffer().nbytes:
|
141 |
wav_bytes = wav_header + buffer.getvalue()
|
142 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
143 |
tmp.write(wav_bytes)
|
144 |
tmp.close()
|
145 |
o, t, _ = translate_audio(tmp.name, src, tgt)
|
146 |
+
yield (orig_acc + " " + o).strip(), (trans_acc + " " + t).strip()
|
147 |
|
148 |
+
# ============ ④ 4개국 동시 번역 ====================================
|
|
|
|
|
149 |
FOUR_LANGS = ["English", "Chinese", "Thai", "Russian"]
|
150 |
|
151 |
+
def translate_audio_four(audio_in, src):
|
152 |
+
path = _safe_path(audio_in)
|
153 |
+
if not path or not os.path.exists(path):
|
154 |
+
return ["⚠️ 음성 파일을 녹음하거나 업로드하세요."] + [""] * 4
|
155 |
+
|
156 |
+
with open(path, "rb") as f:
|
157 |
stt = client.audio.transcriptions.create(
|
158 |
model="whisper-1",
|
159 |
file=f,
|
|
|
166 |
outs = [original]
|
167 |
for lang in FOUR_LANGS:
|
168 |
outs.append(_gpt_translate(original, src, lang))
|
169 |
+
return outs
|
170 |
|
171 |
+
# ============ Gradio UI =============================================
|
172 |
with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
|
173 |
with gr.Tabs():
|
174 |
+
# 1) 마이크/파일(오디오) 번역
|
175 |
+
with gr.TabItem("🎙️ 마이크/오디오 번역"):
|
176 |
src1 = gr.Dropdown(LANGUAGES, value="Korean", label="입력")
|
177 |
tgt1 = gr.Dropdown(LANGUAGES, value="English", label="출력")
|
178 |
mic1 = gr.Audio(
|
|
|
191 |
outputs=[stt1, tlt1, out1]
|
192 |
)
|
193 |
|
194 |
+
# 2) PDF / 이미지 번역
|
195 |
+
with gr.TabItem("📄 문서/이미지 번역"):
|
196 |
src2 = gr.Dropdown(LANGUAGES, value="Korean", label="입력")
|
197 |
tgt2 = gr.Dropdown(LANGUAGES, value="English", label="출력")
|
198 |
+
file2 = gr.File(
|
199 |
+
label="PDF 또는 이미지 업로드",
|
200 |
+
file_types=[".pdf", ".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".gif"]
|
|
|
201 |
)
|
202 |
btn2 = gr.Button("번역")
|
203 |
+
original_doc = gr.Textbox(label="추출된 원문", lines=15)
|
204 |
+
translated_doc = gr.Textbox(label="번역 결과", lines=15)
|
|
|
205 |
|
206 |
btn2.click(
|
207 |
+
translate_document,
|
208 |
inputs=[file2, src2, tgt2],
|
209 |
+
outputs=[original_doc, translated_doc]
|
210 |
)
|
211 |
|
212 |
+
# 3) 실시간 번역(Beta)
|
213 |
with gr.TabItem("⏱️ 실시간 번역 (Beta)"):
|
|
|
214 |
src3 = gr.Dropdown(LANGUAGES, value="Korean", label="입력")
|
215 |
tgt3 = gr.Dropdown(LANGUAGES, value="English", label="출력")
|
216 |
mic3 = gr.Audio(
|
|
|
226 |
|
227 |
mic3.stream(gen, inputs=[src3, tgt3], outputs=[stt3, tlt3])
|
228 |
|
229 |
+
# 4) 4개 언어 동시 번역
|
230 |
with gr.TabItem("🌏 4개 언어 동시"):
|
|
|
231 |
src4 = gr.Dropdown(LANGUAGES, value="Korean", label="입력 언어")
|
232 |
aud4 = gr.Audio(
|
233 |
sources=["microphone", "upload"],
|
|
|
249 |
outputs=[org4, en4, zh4, th4, ru4]
|
250 |
)
|
251 |
|
252 |
+
# ============ 실행 ===================================================
|
253 |
if __name__ == "__main__":
|
254 |
app.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)
|