Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ import os
|
|
4 |
import tempfile
|
5 |
from dotenv import load_dotenv
|
6 |
|
7 |
-
# ===== ๊ณตํต ์ด๊ธฐํ
|
8 |
load_dotenv()
|
9 |
api_key = os.getenv("OPENAI_API_KEY")
|
10 |
if not api_key:
|
@@ -18,8 +18,35 @@ except Exception as e:
|
|
18 |
print(f"โ OpenAI ํด๋ผ์ด์ธํธ ์ด๊ธฐํ ์คํจ: {e}")
|
19 |
client = None
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
# ----------------------------------------------------------
|
22 |
-
# (1)
|
23 |
# ----------------------------------------------------------
|
24 |
def translate_audio(audio_file, source_lang, target_lang):
|
25 |
if not audio_file:
|
@@ -30,16 +57,19 @@ def translate_audio(audio_file, source_lang, target_lang):
|
|
30 |
return "โ ๏ธ ์
๋ ฅ ์ธ์ด์ ์ถ๋ ฅ ์ธ์ด๊ฐ ๊ฐ์ต๋๋ค.", "", None
|
31 |
|
32 |
try:
|
|
|
|
|
33 |
with open(audio_file, "rb") as f:
|
34 |
transcript = client.audio.transcriptions.create(
|
35 |
model="whisper-1",
|
36 |
file=f,
|
37 |
-
language=
|
38 |
)
|
39 |
original_text = transcript.text.strip()
|
40 |
if not original_text:
|
41 |
return "โ ๏ธ ์์ฑ์ด ์ธ์๋์ง ์์์ต๋๋ค.", "", None
|
42 |
|
|
|
43 |
response = client.chat.completions.create(
|
44 |
model="gpt-3.5-turbo",
|
45 |
messages=[
|
@@ -53,11 +83,10 @@ def translate_audio(audio_file, source_lang, target_lang):
|
|
53 |
)
|
54 |
translated_text = response.choices[0].message.content.strip()
|
55 |
|
56 |
-
|
57 |
-
"Chinese": "nova", "Spanish": "nova", "French": "nova"}
|
58 |
tts_response = client.audio.speech.create(
|
59 |
model="tts-1",
|
60 |
-
voice=
|
61 |
input=translated_text[:4096]
|
62 |
)
|
63 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
|
@@ -69,9 +98,8 @@ def translate_audio(audio_file, source_lang, target_lang):
|
|
69 |
except Exception as e:
|
70 |
return f"โ ์ค๋ฅ: {type(e).__name__}: {str(e)}", "", None
|
71 |
|
72 |
-
|
73 |
# ----------------------------------------------------------
|
74 |
-
# (2)
|
75 |
# ----------------------------------------------------------
|
76 |
def translate_document(file_obj, source_lang, target_lang):
|
77 |
if not file_obj:
|
@@ -83,12 +111,12 @@ def translate_document(file_obj, source_lang, target_lang):
|
|
83 |
|
84 |
ext = os.path.splitext(file_obj.name)[1].lower()
|
85 |
try:
|
86 |
-
# ---
|
87 |
if ext == ".pdf":
|
88 |
import pdfplumber
|
89 |
text_chunks = []
|
90 |
with pdfplumber.open(file_obj.name) as pdf:
|
91 |
-
for page in pdf.pages[:5]:
|
92 |
text_chunks.append(page.extract_text() or "")
|
93 |
original_text = "\n".join(text_chunks).strip()
|
94 |
|
@@ -121,13 +149,12 @@ def translate_document(file_obj, source_lang, target_lang):
|
|
121 |
except Exception as e:
|
122 |
return f"โ ์ค๋ฅ: {type(e).__name__}: {str(e)}", ""
|
123 |
|
124 |
-
|
125 |
# ==========================================================
|
126 |
-
# Gradio UI
|
127 |
# ==========================================================
|
128 |
with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
|
129 |
with gr.Tabs():
|
130 |
-
# -----
|
131 |
with gr.TabItem("๐๏ธ ์์ฑ ๋ฒ์ญ"):
|
132 |
gr.Markdown("""
|
133 |
# ๐๏ธ AI ์์ฑ ๋ฒ์ญ๊ธฐ
|
@@ -135,14 +162,8 @@ with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
|
|
135 |
""")
|
136 |
|
137 |
with gr.Row():
|
138 |
-
src_lang_a = gr.Dropdown(
|
139 |
-
|
140 |
-
value="Korean", label="์
๋ ฅ ์ธ์ด"
|
141 |
-
)
|
142 |
-
tgt_lang_a = gr.Dropdown(
|
143 |
-
["Korean", "English", "Japanese", "Chinese", "Spanish", "French"],
|
144 |
-
value="English", label="์ถ๋ ฅ ์ธ์ด"
|
145 |
-
)
|
146 |
|
147 |
audio_in = gr.Audio(
|
148 |
sources=["microphone", "upload"],
|
@@ -163,7 +184,7 @@ with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
|
|
163 |
outputs=[stt_text, tlt_text, audio_out]
|
164 |
)
|
165 |
|
166 |
-
# -----
|
167 |
with gr.TabItem("๐ ์๋ฃ ๋ฒ์ญ"):
|
168 |
gr.Markdown("""
|
169 |
# ๐ PDF / ์ด๋ฏธ์ง ๋ฒ์ญ ๋ฐ๋ชจ
|
@@ -171,14 +192,8 @@ with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
|
|
171 |
""")
|
172 |
|
173 |
with gr.Row():
|
174 |
-
src_lang_d = gr.Dropdown(
|
175 |
-
|
176 |
-
value="Korean", label="์
๋ ฅ ์ธ์ด"
|
177 |
-
)
|
178 |
-
tgt_lang_d = gr.Dropdown(
|
179 |
-
["Korean", "English", "Japanese", "Chinese", "Spanish", "French"],
|
180 |
-
value="English", label="์ถ๋ ฅ ์ธ์ด"
|
181 |
-
)
|
182 |
|
183 |
file_in = gr.File(label="PDF / ์ด๋ฏธ์ง ์
๋ก๋")
|
184 |
btn_doc = gr.Button("๐ ๋ฒ์ญํ๊ธฐ")
|
|
|
4 |
import tempfile
|
5 |
from dotenv import load_dotenv
|
6 |
|
7 |
+
# ===== ๊ณตํต ์ด๊ธฐํ =========================================
|
8 |
load_dotenv()
|
9 |
api_key = os.getenv("OPENAI_API_KEY")
|
10 |
if not api_key:
|
|
|
18 |
print(f"โ OpenAI ํด๋ผ์ด์ธํธ ์ด๊ธฐํ ์คํจ: {e}")
|
19 |
client = None
|
20 |
|
21 |
+
# ===== ์ธ์ด ์ค์ ===========================================
|
22 |
+
LANGUAGES = [
|
23 |
+
"Korean", "English", "Japanese", "Chinese", # ๊ธฐ์กด
|
24 |
+
"Thai", "Russian", "Vietnamese", # ์ถ๊ฐ
|
25 |
+
"Spanish", "French" # ์ ํ
|
26 |
+
]
|
27 |
+
|
28 |
+
# Whisper์ฉ ISO-639 ์ฝ๋ ๋งคํ
|
29 |
+
LANG_CODE_MAP = {
|
30 |
+
"Korean": "ko", "English": "en", "Japanese": "ja", "Chinese": "zh",
|
31 |
+
"Thai": "th", "Russian": "ru", "Vietnamese": "vi",
|
32 |
+
"Spanish": "es", "French": "fr"
|
33 |
+
}
|
34 |
+
|
35 |
+
# TTS ์์ฑ ๋งคํ(OpenAI tts-1: alloy, nova ๋ ๊ฐ์ง)
|
36 |
+
VOICE_MAP = {
|
37 |
+
"Korean": "nova",
|
38 |
+
"English": "alloy",
|
39 |
+
"Japanese": "nova",
|
40 |
+
"Chinese": "nova",
|
41 |
+
"Thai": "alloy",
|
42 |
+
"Russian": "alloy",
|
43 |
+
"Vietnamese": "alloy",
|
44 |
+
"Spanish": "alloy",
|
45 |
+
"French": "alloy"
|
46 |
+
}
|
47 |
+
|
48 |
# ----------------------------------------------------------
|
49 |
+
# (1) ์์ฑ(STT) โ ๋ฒ์ญ โ ์์ฑ(TTS)
|
50 |
# ----------------------------------------------------------
|
51 |
def translate_audio(audio_file, source_lang, target_lang):
|
52 |
if not audio_file:
|
|
|
57 |
return "โ ๏ธ ์
๋ ฅ ์ธ์ด์ ์ถ๋ ฅ ์ธ์ด๊ฐ ๊ฐ์ต๋๋ค.", "", None
|
58 |
|
59 |
try:
|
60 |
+
# ---------- Whisper STT ----------
|
61 |
+
lang_code = LANG_CODE_MAP.get(source_lang, None)
|
62 |
with open(audio_file, "rb") as f:
|
63 |
transcript = client.audio.transcriptions.create(
|
64 |
model="whisper-1",
|
65 |
file=f,
|
66 |
+
language=lang_code if lang_code else None # ๋ชป ์ฐพ์ผ๋ฉด ์๋๊ฐ์ง
|
67 |
)
|
68 |
original_text = transcript.text.strip()
|
69 |
if not original_text:
|
70 |
return "โ ๏ธ ์์ฑ์ด ์ธ์๋์ง ์์์ต๋๋ค.", "", None
|
71 |
|
72 |
+
# ---------- GPT ๋ฒ์ญ ----------
|
73 |
response = client.chat.completions.create(
|
74 |
model="gpt-3.5-turbo",
|
75 |
messages=[
|
|
|
83 |
)
|
84 |
translated_text = response.choices[0].message.content.strip()
|
85 |
|
86 |
+
# ---------- TTS ----------
|
|
|
87 |
tts_response = client.audio.speech.create(
|
88 |
model="tts-1",
|
89 |
+
voice=VOICE_MAP.get(target_lang, "alloy"),
|
90 |
input=translated_text[:4096]
|
91 |
)
|
92 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
|
|
|
98 |
except Exception as e:
|
99 |
return f"โ ์ค๋ฅ: {type(e).__name__}: {str(e)}", "", None
|
100 |
|
|
|
101 |
# ----------------------------------------------------------
|
102 |
+
# (2) PDF / ์ด๋ฏธ์ง โ ๋ฒ์ญ
|
103 |
# ----------------------------------------------------------
|
104 |
def translate_document(file_obj, source_lang, target_lang):
|
105 |
if not file_obj:
|
|
|
111 |
|
112 |
ext = os.path.splitext(file_obj.name)[1].lower()
|
113 |
try:
|
114 |
+
# --- ํ
์คํธ ์ถ์ถ ---
|
115 |
if ext == ".pdf":
|
116 |
import pdfplumber
|
117 |
text_chunks = []
|
118 |
with pdfplumber.open(file_obj.name) as pdf:
|
119 |
+
for page in pdf.pages[:5]: # ๋ฐ๋ชจ: ์ 5์ชฝ๋ง
|
120 |
text_chunks.append(page.extract_text() or "")
|
121 |
original_text = "\n".join(text_chunks).strip()
|
122 |
|
|
|
149 |
except Exception as e:
|
150 |
return f"โ ์ค๋ฅ: {type(e).__name__}: {str(e)}", ""
|
151 |
|
|
|
152 |
# ==========================================================
|
153 |
+
# Gradio UI
|
154 |
# ==========================================================
|
155 |
with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
|
156 |
with gr.Tabs():
|
157 |
+
# ----- ๐๏ธ ์์ฑ ๋ฒ์ญ -----
|
158 |
with gr.TabItem("๐๏ธ ์์ฑ ๋ฒ์ญ"):
|
159 |
gr.Markdown("""
|
160 |
# ๐๏ธ AI ์์ฑ ๋ฒ์ญ๊ธฐ
|
|
|
162 |
""")
|
163 |
|
164 |
with gr.Row():
|
165 |
+
src_lang_a = gr.Dropdown(LANGUAGES, value="Korean", label="์
๋ ฅ ์ธ์ด")
|
166 |
+
tgt_lang_a = gr.Dropdown(LANGUAGES, value="English", label="์ถ๋ ฅ ์ธ์ด")
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
|
168 |
audio_in = gr.Audio(
|
169 |
sources=["microphone", "upload"],
|
|
|
184 |
outputs=[stt_text, tlt_text, audio_out]
|
185 |
)
|
186 |
|
187 |
+
# ----- ๐ ์๋ฃ ๋ฒ์ญ -----
|
188 |
with gr.TabItem("๐ ์๋ฃ ๋ฒ์ญ"):
|
189 |
gr.Markdown("""
|
190 |
# ๐ PDF / ์ด๋ฏธ์ง ๋ฒ์ญ ๋ฐ๋ชจ
|
|
|
192 |
""")
|
193 |
|
194 |
with gr.Row():
|
195 |
+
src_lang_d = gr.Dropdown(LANGUAGES, value="Korean", label="์
๋ ฅ ์ธ์ด")
|
196 |
+
tgt_lang_d = gr.Dropdown(LANGUAGES, value="English", label="์ถ๋ ฅ ์ธ์ด")
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
|
198 |
file_in = gr.File(label="PDF / ์ด๋ฏธ์ง ์
๋ก๋")
|
199 |
btn_doc = gr.Button("๐ ๋ฒ์ญํ๊ธฐ")
|