openfree commited on
Commit
32b3c75
ยท
verified ยท
1 Parent(s): 7cce69a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -30
app.py CHANGED
@@ -4,7 +4,7 @@ import os
4
  import tempfile
5
  from dotenv import load_dotenv
6
 
7
- # ===== ๊ณตํ†ต ์ดˆ๊ธฐํ™” =====
8
  load_dotenv()
9
  api_key = os.getenv("OPENAI_API_KEY")
10
  if not api_key:
@@ -18,8 +18,35 @@ except Exception as e:
18
  print(f"โŒ OpenAI ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}")
19
  client = None
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # ----------------------------------------------------------
22
- # (1) ๊ธฐ์กด: ์Œ์„ฑ(STT) โ†’ ๋ฒˆ์—ญ โ†’ ์Œ์„ฑ(TTS)
23
  # ----------------------------------------------------------
24
  def translate_audio(audio_file, source_lang, target_lang):
25
  if not audio_file:
@@ -30,16 +57,19 @@ def translate_audio(audio_file, source_lang, target_lang):
30
  return "โš ๏ธ ์ž…๋ ฅ ์–ธ์–ด์™€ ์ถœ๋ ฅ ์–ธ์–ด๊ฐ€ ๊ฐ™์Šต๋‹ˆ๋‹ค.", "", None
31
 
32
  try:
 
 
33
  with open(audio_file, "rb") as f:
34
  transcript = client.audio.transcriptions.create(
35
  model="whisper-1",
36
  file=f,
37
- language=source_lang[:2].lower() if source_lang != "Chinese" else "zh"
38
  )
39
  original_text = transcript.text.strip()
40
  if not original_text:
41
  return "โš ๏ธ ์Œ์„ฑ์ด ์ธ์‹๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.", "", None
42
 
 
43
  response = client.chat.completions.create(
44
  model="gpt-3.5-turbo",
45
  messages=[
@@ -53,11 +83,10 @@ def translate_audio(audio_file, source_lang, target_lang):
53
  )
54
  translated_text = response.choices[0].message.content.strip()
55
 
56
- voice_map = {"Korean": "nova", "English": "alloy", "Japanese": "nova",
57
- "Chinese": "nova", "Spanish": "nova", "French": "nova"}
58
  tts_response = client.audio.speech.create(
59
  model="tts-1",
60
- voice=voice_map.get(target_lang, "alloy"),
61
  input=translated_text[:4096]
62
  )
63
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
@@ -69,9 +98,8 @@ def translate_audio(audio_file, source_lang, target_lang):
69
  except Exception as e:
70
  return f"โŒ ์˜ค๋ฅ˜: {type(e).__name__}: {str(e)}", "", None
71
 
72
-
73
  # ----------------------------------------------------------
74
- # (2) ์‹ ๊ทœ ํƒญ: PDF / ์ด๋ฏธ์ง€ โ†’ ๋ฒˆ์—ญ ํ…์ŠคํŠธ
75
  # ----------------------------------------------------------
76
  def translate_document(file_obj, source_lang, target_lang):
77
  if not file_obj:
@@ -83,12 +111,12 @@ def translate_document(file_obj, source_lang, target_lang):
83
 
84
  ext = os.path.splitext(file_obj.name)[1].lower()
85
  try:
86
- # --- ์›๋ณธ ํ…์ŠคํŠธ ์ถ”์ถœ ---
87
  if ext == ".pdf":
88
  import pdfplumber
89
  text_chunks = []
90
  with pdfplumber.open(file_obj.name) as pdf:
91
- for page in pdf.pages[:5]: # ๋ฐ๋ชจ: ์•ž 5์ชฝ๋งŒ
92
  text_chunks.append(page.extract_text() or "")
93
  original_text = "\n".join(text_chunks).strip()
94
 
@@ -121,13 +149,12 @@ def translate_document(file_obj, source_lang, target_lang):
121
  except Exception as e:
122
  return f"โŒ ์˜ค๋ฅ˜: {type(e).__name__}: {str(e)}", ""
123
 
124
-
125
  # ==========================================================
126
- # Gradio UI (Tabs ๊ตฌ์กฐ)
127
  # ==========================================================
128
  with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
129
  with gr.Tabs():
130
- # ----- โ‘  ๊ธฐ์กด ์Œ์„ฑ ๋ฒˆ์—ญ -----
131
  with gr.TabItem("๐ŸŽ™๏ธ ์Œ์„ฑ ๋ฒˆ์—ญ"):
132
  gr.Markdown("""
133
  # ๐ŸŽ™๏ธ AI ์Œ์„ฑ ๋ฒˆ์—ญ๊ธฐ
@@ -135,14 +162,8 @@ with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
135
  """)
136
 
137
  with gr.Row():
138
- src_lang_a = gr.Dropdown(
139
- ["Korean", "English", "Japanese", "Chinese", "Spanish", "French"],
140
- value="Korean", label="์ž…๋ ฅ ์–ธ์–ด"
141
- )
142
- tgt_lang_a = gr.Dropdown(
143
- ["Korean", "English", "Japanese", "Chinese", "Spanish", "French"],
144
- value="English", label="์ถœ๋ ฅ ์–ธ์–ด"
145
- )
146
 
147
  audio_in = gr.Audio(
148
  sources=["microphone", "upload"],
@@ -163,7 +184,7 @@ with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
163
  outputs=[stt_text, tlt_text, audio_out]
164
  )
165
 
166
- # ----- โ‘ก ์‹ ๊ทœ ์ž๋ฃŒ ๋ฒˆ์—ญ -----
167
  with gr.TabItem("๐Ÿ“„ ์ž๋ฃŒ ๋ฒˆ์—ญ"):
168
  gr.Markdown("""
169
  # ๐Ÿ“„ PDF / ์ด๋ฏธ์ง€ ๋ฒˆ์—ญ ๋ฐ๋ชจ
@@ -171,14 +192,8 @@ with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
171
  """)
172
 
173
  with gr.Row():
174
- src_lang_d = gr.Dropdown(
175
- ["Korean", "English", "Japanese", "Chinese", "Spanish", "French"],
176
- value="Korean", label="์ž…๋ ฅ ์–ธ์–ด"
177
- )
178
- tgt_lang_d = gr.Dropdown(
179
- ["Korean", "English", "Japanese", "Chinese", "Spanish", "French"],
180
- value="English", label="์ถœ๋ ฅ ์–ธ์–ด"
181
- )
182
 
183
  file_in = gr.File(label="PDF / ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ")
184
  btn_doc = gr.Button("๐Ÿ”„ ๋ฒˆ์—ญํ•˜๊ธฐ")
 
4
  import tempfile
5
  from dotenv import load_dotenv
6
 
7
+ # ===== ๊ณตํ†ต ์ดˆ๊ธฐํ™” =========================================
8
  load_dotenv()
9
  api_key = os.getenv("OPENAI_API_KEY")
10
  if not api_key:
 
18
  print(f"โŒ OpenAI ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}")
19
  client = None
20
 
21
+ # ===== ์–ธ์–ด ์„ค์ • ===========================================
22
+ LANGUAGES = [
23
+ "Korean", "English", "Japanese", "Chinese", # ๊ธฐ์กด
24
+ "Thai", "Russian", "Vietnamese", # ์ถ”๊ฐ€
25
+ "Spanish", "French" # ์„ ํƒ
26
+ ]
27
+
28
+ # Whisper์šฉ ISO-639 ์ฝ”๋“œ ๋งคํ•‘
29
+ LANG_CODE_MAP = {
30
+ "Korean": "ko", "English": "en", "Japanese": "ja", "Chinese": "zh",
31
+ "Thai": "th", "Russian": "ru", "Vietnamese": "vi",
32
+ "Spanish": "es", "French": "fr"
33
+ }
34
+
35
+ # TTS ์Œ์„ฑ ๋งคํ•‘(OpenAI tts-1: alloy, nova ๋‘ ๊ฐ€์ง€)
36
+ VOICE_MAP = {
37
+ "Korean": "nova",
38
+ "English": "alloy",
39
+ "Japanese": "nova",
40
+ "Chinese": "nova",
41
+ "Thai": "alloy",
42
+ "Russian": "alloy",
43
+ "Vietnamese": "alloy",
44
+ "Spanish": "alloy",
45
+ "French": "alloy"
46
+ }
47
+
48
  # ----------------------------------------------------------
49
+ # (1) ์Œ์„ฑ(STT) โ†’ ๋ฒˆ์—ญ โ†’ ์Œ์„ฑ(TTS)
50
  # ----------------------------------------------------------
51
  def translate_audio(audio_file, source_lang, target_lang):
52
  if not audio_file:
 
57
  return "โš ๏ธ ์ž…๋ ฅ ์–ธ์–ด์™€ ์ถœ๋ ฅ ์–ธ์–ด๊ฐ€ ๊ฐ™์Šต๋‹ˆ๋‹ค.", "", None
58
 
59
  try:
60
+ # ---------- Whisper STT ----------
61
+ lang_code = LANG_CODE_MAP.get(source_lang, None)
62
  with open(audio_file, "rb") as f:
63
  transcript = client.audio.transcriptions.create(
64
  model="whisper-1",
65
  file=f,
66
+ language=lang_code if lang_code else None # ๋ชป ์ฐพ์œผ๋ฉด ์ž๋™๊ฐ์ง€
67
  )
68
  original_text = transcript.text.strip()
69
  if not original_text:
70
  return "โš ๏ธ ์Œ์„ฑ์ด ์ธ์‹๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.", "", None
71
 
72
+ # ---------- GPT ๋ฒˆ์—ญ ----------
73
  response = client.chat.completions.create(
74
  model="gpt-3.5-turbo",
75
  messages=[
 
83
  )
84
  translated_text = response.choices[0].message.content.strip()
85
 
86
+ # ---------- TTS ----------
 
87
  tts_response = client.audio.speech.create(
88
  model="tts-1",
89
+ voice=VOICE_MAP.get(target_lang, "alloy"),
90
  input=translated_text[:4096]
91
  )
92
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
 
98
  except Exception as e:
99
  return f"โŒ ์˜ค๋ฅ˜: {type(e).__name__}: {str(e)}", "", None
100
 
 
101
  # ----------------------------------------------------------
102
+ # (2) PDF / ์ด๋ฏธ์ง€ โ†’ ๋ฒˆ์—ญ
103
  # ----------------------------------------------------------
104
  def translate_document(file_obj, source_lang, target_lang):
105
  if not file_obj:
 
111
 
112
  ext = os.path.splitext(file_obj.name)[1].lower()
113
  try:
114
+ # --- ํ…์ŠคํŠธ ์ถ”์ถœ ---
115
  if ext == ".pdf":
116
  import pdfplumber
117
  text_chunks = []
118
  with pdfplumber.open(file_obj.name) as pdf:
119
+ for page in pdf.pages[:5]: # ๋ฐ๋ชจ: ์•ž 5์ชฝ๋งŒ
120
  text_chunks.append(page.extract_text() or "")
121
  original_text = "\n".join(text_chunks).strip()
122
 
 
149
  except Exception as e:
150
  return f"โŒ ์˜ค๋ฅ˜: {type(e).__name__}: {str(e)}", ""
151
 
 
152
  # ==========================================================
153
+ # Gradio UI
154
  # ==========================================================
155
  with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
156
  with gr.Tabs():
157
+ # ----- ๐ŸŽ™๏ธ ์Œ์„ฑ ๋ฒˆ์—ญ -----
158
  with gr.TabItem("๐ŸŽ™๏ธ ์Œ์„ฑ ๋ฒˆ์—ญ"):
159
  gr.Markdown("""
160
  # ๐ŸŽ™๏ธ AI ์Œ์„ฑ ๋ฒˆ์—ญ๊ธฐ
 
162
  """)
163
 
164
  with gr.Row():
165
+ src_lang_a = gr.Dropdown(LANGUAGES, value="Korean", label="์ž…๋ ฅ ์–ธ์–ด")
166
+ tgt_lang_a = gr.Dropdown(LANGUAGES, value="English", label="์ถœ๋ ฅ ์–ธ์–ด")
 
 
 
 
 
 
167
 
168
  audio_in = gr.Audio(
169
  sources=["microphone", "upload"],
 
184
  outputs=[stt_text, tlt_text, audio_out]
185
  )
186
 
187
+ # ----- ๐Ÿ“„ ์ž๋ฃŒ ๋ฒˆ์—ญ -----
188
  with gr.TabItem("๐Ÿ“„ ์ž๋ฃŒ ๋ฒˆ์—ญ"):
189
  gr.Markdown("""
190
  # ๐Ÿ“„ PDF / ์ด๋ฏธ์ง€ ๋ฒˆ์—ญ ๋ฐ๋ชจ
 
192
  """)
193
 
194
  with gr.Row():
195
+ src_lang_d = gr.Dropdown(LANGUAGES, value="Korean", label="์ž…๋ ฅ ์–ธ์–ด")
196
+ tgt_lang_d = gr.Dropdown(LANGUAGES, value="English", label="์ถœ๋ ฅ ์–ธ์–ด")
 
 
 
 
 
 
197
 
198
  file_in = gr.File(label="PDF / ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ")
199
  btn_doc = gr.Button("๐Ÿ”„ ๋ฒˆ์—ญํ•˜๊ธฐ")