openfree commited on
Commit
a609646
ยท
verified ยท
1 Parent(s): 32b3c75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -200
app.py CHANGED
@@ -1,213 +1,134 @@
1
  import gradio as gr
2
- import openai
3
- import os
4
- import tempfile
5
  from dotenv import load_dotenv
6
 
7
- # ===== ๊ณตํ†ต ์ดˆ๊ธฐํ™” =========================================
8
  load_dotenv()
9
  api_key = os.getenv("OPENAI_API_KEY")
10
  if not api_key:
11
- print("โš ๏ธ OPENAI_API_KEY๋ฅผ .env ํŒŒ์ผ์— ์„ค์ •ํ•˜์„ธ์š”!")
12
- else:
13
- print(f"โœ… API Key ๋กœ๋“œ๋จ: {api_key[:10]}...")
14
 
15
- try:
16
- client = openai.OpenAI(api_key=api_key)
17
- except Exception as e:
18
- print(f"โŒ OpenAI ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}")
19
- client = None
20
-
21
- # ===== ์–ธ์–ด ์„ค์ • ===========================================
22
  LANGUAGES = [
23
- "Korean", "English", "Japanese", "Chinese", # ๊ธฐ์กด
24
- "Thai", "Russian", "Vietnamese", # ์ถ”๊ฐ€
25
- "Spanish", "French" # ์„ ํƒ
26
  ]
27
-
28
- # Whisper์šฉ ISO-639 ์ฝ”๋“œ ๋งคํ•‘
29
- LANG_CODE_MAP = {
30
- "Korean": "ko", "English": "en", "Japanese": "ja", "Chinese": "zh",
31
- "Thai": "th", "Russian": "ru", "Vietnamese": "vi",
32
- "Spanish": "es", "French": "fr"
33
- }
34
-
35
- # TTS ์Œ์„ฑ ๋งคํ•‘(OpenAI tts-1: alloy, nova ๋‘ ๊ฐ€์ง€)
36
- VOICE_MAP = {
37
- "Korean": "nova",
38
- "English": "alloy",
39
- "Japanese": "nova",
40
- "Chinese": "nova",
41
- "Thai": "alloy",
42
- "Russian": "alloy",
43
- "Vietnamese": "alloy",
44
- "Spanish": "alloy",
45
- "French": "alloy"
46
- }
47
-
48
- # ----------------------------------------------------------
49
- # (1) ์Œ์„ฑ(STT) โ†’ ๋ฒˆ์—ญ โ†’ ์Œ์„ฑ(TTS)
50
- # ----------------------------------------------------------
51
- def translate_audio(audio_file, source_lang, target_lang):
52
- if not audio_file:
53
- return "โš ๏ธ ์˜ค๋””์˜ค ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜๊ฑฐ๋‚˜ ๋…น์Œํ•˜์„ธ์š”.", "", None
54
- if not api_key or not client:
55
- return "โŒ API ์ดˆ๊ธฐํ™” ์˜ค๋ฅ˜", "", None
56
- if source_lang == target_lang:
57
- return "โš ๏ธ ์ž…๋ ฅ ์–ธ์–ด์™€ ์ถœ๋ ฅ ์–ธ์–ด๊ฐ€ ๊ฐ™์Šต๋‹ˆ๋‹ค.", "", None
58
-
59
- try:
60
- # ---------- Whisper STT ----------
61
- lang_code = LANG_CODE_MAP.get(source_lang, None)
62
- with open(audio_file, "rb") as f:
63
- transcript = client.audio.transcriptions.create(
64
- model="whisper-1",
65
- file=f,
66
- language=lang_code if lang_code else None # ๋ชป ์ฐพ์œผ๋ฉด ์ž๋™๊ฐ์ง€
67
- )
68
- original_text = transcript.text.strip()
69
- if not original_text:
70
- return "โš ๏ธ ์Œ์„ฑ์ด ์ธ์‹๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.", "", None
71
-
72
- # ---------- GPT ๋ฒˆ์—ญ ----------
73
- response = client.chat.completions.create(
74
- model="gpt-3.5-turbo",
75
- messages=[
76
- {"role": "system",
77
- "content": f"You are a professional translator. Translate the following {source_lang} text to {target_lang}. "
78
- f"Only provide the translation without any explanation or additional text."},
79
- {"role": "user", "content": original_text}
80
- ],
81
- temperature=0.3,
82
- max_tokens=2000
83
- )
84
- translated_text = response.choices[0].message.content.strip()
85
-
86
- # ---------- TTS ----------
87
- tts_response = client.audio.speech.create(
88
- model="tts-1",
89
- voice=VOICE_MAP.get(target_lang, "alloy"),
90
- input=translated_text[:4096]
91
- )
92
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
93
- tmp.write(tts_response.content)
94
- output_audio = tmp.name
95
-
96
- return original_text, translated_text, output_audio
97
-
98
- except Exception as e:
99
- return f"โŒ ์˜ค๋ฅ˜: {type(e).__name__}: {str(e)}", "", None
100
-
101
- # ----------------------------------------------------------
102
- # (2) PDF / ์ด๋ฏธ์ง€ โ†’ ๋ฒˆ์—ญ
103
- # ----------------------------------------------------------
104
- def translate_document(file_obj, source_lang, target_lang):
105
- if not file_obj:
106
- return "โš ๏ธ ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์„ธ์š”.", ""
107
- if not api_key or not client:
108
- return "โŒ API ์ดˆ๊ธฐํ™” ์˜ค๋ฅ˜", ""
109
- if source_lang == target_lang:
110
- return "โš ๏ธ ์ž…๋ ฅ ์–ธ์–ด์™€ ์ถœ๋ ฅ ์–ธ์–ด๊ฐ€ ๊ฐ™์Šต๋‹ˆ๋‹ค.", ""
111
-
112
- ext = os.path.splitext(file_obj.name)[1].lower()
113
- try:
114
- # --- ํ…์ŠคํŠธ ์ถ”์ถœ ---
115
- if ext == ".pdf":
116
- import pdfplumber
117
- text_chunks = []
118
- with pdfplumber.open(file_obj.name) as pdf:
119
- for page in pdf.pages[:5]: # ๋ฐ๋ชจ: ์•ž 5์ชฝ๋งŒ
120
- text_chunks.append(page.extract_text() or "")
121
- original_text = "\n".join(text_chunks).strip()
122
-
123
- elif ext in [".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tiff"]:
124
- from PIL import Image
125
- import pytesseract
126
- original_text = pytesseract.image_to_string(Image.open(file_obj.name))
127
-
128
- else:
129
- return "โš ๏ธ ์ง€์›ํ•˜์ง€ ์•Š๋Š” ํ˜•์‹์ž…๋‹ˆ๋‹ค.", ""
130
-
131
- if not original_text:
132
- return "โš ๏ธ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.", ""
133
-
134
- # --- ๋ฒˆ์—ญ ---
135
- response = client.chat.completions.create(
136
- model="gpt-3.5-turbo",
137
- messages=[
138
- {"role": "system",
139
- "content": f"You are a professional translator. Translate the following {source_lang} text to {target_lang}. "
140
- f"Only provide the translation without any explanation or additional text."},
141
- {"role": "user", "content": original_text}
142
- ],
143
- temperature=0.3,
144
- max_tokens=4096
145
- )
146
- translated_text = response.choices[0].message.content.strip()
147
- return original_text, translated_text
148
-
149
- except Exception as e:
150
- return f"โŒ ์˜ค๋ฅ˜: {type(e).__name__}: {str(e)}", ""
151
-
152
- # ==========================================================
153
- # Gradio UI
154
- # ==========================================================
155
  with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
156
  with gr.Tabs():
157
- # ----- ๐ŸŽ™๏ธ ์Œ์„ฑ ๋ฒˆ์—ญ -----
158
- with gr.TabItem("๐ŸŽ™๏ธ ์Œ์„ฑ ๋ฒˆ์—ญ"):
159
- gr.Markdown("""
160
- # ๐ŸŽ™๏ธ AI ์Œ์„ฑ ๋ฒˆ์—ญ๊ธฐ
161
- ๋งˆ์ดํฌ๋กœ ๋…น์Œํ•˜๊ฑฐ๋‚˜ ์˜ค๋””์˜ค ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜๋ฉด **์‹ค์‹œ๊ฐ„ ์ž๋ง‰ + ๋ฒˆ์—ญ + ์Œ์„ฑํ•ฉ์„ฑ**๊นŒ์ง€ ํ•œ ๋ฒˆ์—!
162
- """)
163
-
164
- with gr.Row():
165
- src_lang_a = gr.Dropdown(LANGUAGES, value="Korean", label="์ž…๋ ฅ ์–ธ์–ด")
166
- tgt_lang_a = gr.Dropdown(LANGUAGES, value="English", label="์ถœ๋ ฅ ์–ธ์–ด")
167
-
168
- audio_in = gr.Audio(
169
- sources=["microphone", "upload"],
170
- type="filepath",
171
- label="์Œ์„ฑ ์ž…๋ ฅ (๋…น์Œ ๋˜๋Š” ํŒŒ์ผ ์—…๋กœ๋“œ)"
172
- )
173
- btn_audio = gr.Button("๐Ÿ”„ ๋ฒˆ์—ญํ•˜๊ธฐ")
174
-
175
- with gr.Row():
176
- stt_text = gr.Textbox(label="๐Ÿ“ ์›๋ณธ ํ…์ŠคํŠธ", lines=5)
177
- tlt_text = gr.Textbox(label="๐ŸŒ ๋ฒˆ์—ญ๋œ ํ…์ŠคํŠธ", lines=5)
178
-
179
- audio_out = gr.Audio(label="๐Ÿ”Š ๋ฒˆ์—ญ๋œ ์Œ์„ฑ", type="filepath", autoplay=True)
180
-
181
- btn_audio.click(
182
- translate_audio,
183
- inputs=[audio_in, src_lang_a, tgt_lang_a],
184
- outputs=[stt_text, tlt_text, audio_out]
185
- )
186
-
187
- # ----- ๐Ÿ“„ ์ž๋ฃŒ ๋ฒˆ์—ญ -----
188
- with gr.TabItem("๐Ÿ“„ ์ž๋ฃŒ ๋ฒˆ์—ญ"):
189
- gr.Markdown("""
190
- # ๐Ÿ“„ PDF / ์ด๋ฏธ์ง€ ๋ฒˆ์—ญ ๋ฐ๋ชจ
191
- ๊ต์œก์ž๋ฃŒยท๋ฐœํ‘œ์ž๋ฃŒ ๋“ฑ **PDF ์ตœ๋Œ€ 5์ชฝ** ๋˜๋Š” ์ด๋ฏธ์ง€ 1์žฅ์„ ์—…๋กœ๋“œํ•˜๋ฉด ํ…์ŠคํŠธ ์ถ”์ถœ ํ›„ ๋ฒˆ์—ญํ•ด์ค๋‹ˆ๋‹ค.
192
- """)
193
-
194
- with gr.Row():
195
- src_lang_d = gr.Dropdown(LANGUAGES, value="Korean", label="์ž…๋ ฅ ์–ธ์–ด")
196
- tgt_lang_d = gr.Dropdown(LANGUAGES, value="English", label="์ถœ๋ ฅ ์–ธ์–ด")
197
-
198
- file_in = gr.File(label="PDF / ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ")
199
- btn_doc = gr.Button("๐Ÿ”„ ๋ฒˆ์—ญํ•˜๊ธฐ")
200
-
201
- original_doc = gr.Textbox(label="๐Ÿ“ ์ถ”์ถœ๋œ ์›๋ฌธ", lines=15)
202
- translated_doc = gr.Textbox(label="๐ŸŒ ๋ฒˆ์—ญ ๊ฒฐ๊ณผ", lines=15)
203
-
204
- btn_doc.click(
205
- translate_document,
206
- inputs=[file_in, src_lang_d, tgt_lang_d],
207
- outputs=[original_doc, translated_doc]
208
- )
209
 
210
- # ==========================================================
211
  if __name__ == "__main__":
212
- print("๐Ÿš€ ์„œ๋ฒ„ ์‹œ์ž‘ ์ค‘...")
213
- app.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)
 
1
  import gradio as gr
2
+ import openai, os, io, tempfile, wave, time, threading
 
 
3
  from dotenv import load_dotenv
4
 
5
+ # =============== ๊ณตํ†ต ์ดˆ๊ธฐํ™” ========================================
6
  load_dotenv()
7
  api_key = os.getenv("OPENAI_API_KEY")
8
  if not api_key:
9
+ raise RuntimeError("OPENAI_API_KEY๋ฅผ .env ํŒŒ์ผ์— ์„ค์ •ํ•˜์„ธ์š”!")
10
+ client = openai.OpenAI(api_key=api_key)
 
11
 
 
 
 
 
 
 
 
12
  LANGUAGES = [
13
+ "Korean", "English", "Japanese", "Chinese",
14
+ "Thai", "Russian", "Vietnamese",
15
+ "Spanish", "French"
16
  ]
17
+ LANG_CODE = {"Korean":"ko","English":"en","Japanese":"ja","Chinese":"zh",
18
+ "Thai":"th","Russian":"ru","Vietnamese":"vi",
19
+ "Spanish":"es","French":"fr"}
20
+ VOICE = {lang: ("nova" if lang in ["Korean","Japanese","Chinese"] else "alloy")
21
+ for lang in LANGUAGES}
22
+
23
+ # ---------------- ๊ณตํ†ต ๋ฒˆ์—ญ/ํ•ฉ์„ฑ ------------------------------------
24
+ def _gpt_translate(text, src, tgt):
25
+ rsp = client.chat.completions.create(
26
+ model="gpt-3.5-turbo",
27
+ messages=[
28
+ {"role":"system",
29
+ "content":f"You are a professional translator. Translate {src} to {tgt}. "
30
+ f"Only give the translated text."},
31
+ {"role":"user","content":text}
32
+ ],
33
+ temperature=0.3,max_tokens=2048)
34
+ return rsp.choices[0].message.content.strip()
35
+
36
+ def _tts(text, lang):
37
+ out = client.audio.speech.create(model="tts-1",
38
+ voice=VOICE.get(lang,"alloy"),
39
+ input=text[:4096])
40
+ tmp = tempfile.NamedTemporaryFile(delete=False,suffix=".mp3")
41
+ tmp.write(out.content); tmp.close()
42
+ return tmp.name
43
+
44
+ # =============== 1) ๋งˆ์ดํฌยทํŒŒ์ผ ๊ณตํ†ต ์ฒ˜๋ฆฌ ============================
45
+ def translate_audio(audio_path, src, tgt):
46
+ """wav/mp3 ๊ฒฝ๋กœ -> (์›๋ฌธ, ๋ฒˆ์—ญ๋ฌธ, ๋ฒˆ์—ญ TTS ๊ฒฝ๋กœ)"""
47
+ with open(audio_path,"rb") as f:
48
+ stt = client.audio.transcriptions.create(
49
+ model="whisper-1",
50
+ file=f,
51
+ language=LANG_CODE.get(src))
52
+ original = stt.text.strip()
53
+ if not original:
54
+ return "โš ๏ธ ์Œ์„ฑ ์ธ์‹ ์‹คํŒจ", "", None
55
+ translated = _gpt_translate(original, src, tgt)
56
+ tts_path = _tts(translated, tgt)
57
+ return original, translated, tts_path
58
+
59
+ # =============== 2) ์‹ค์‹œ๊ฐ„ ์ŠคํŠธ๋ฆฌ๋ฐ(๋ฒ ํƒ€) ============================
60
+ STREAM_CHUNK_SEC = 4 # 4์ดˆ๋งˆ๋‹ค Whisper ํ˜ธ์ถœ
61
+ def stream_generator(mic_stream, src, tgt):
62
+ """generator: ๋งค chunk๋งˆ๋‹ค yield (์›๋ฌธ๋ˆ„์ , ๋ฒˆ์—ญ๋ˆ„์ )"""
63
+ buffer = io.BytesIO()
64
+ wav_header = None
65
+ original_acc, translated_acc = "", ""
66
+
67
+ while True:
68
+ chunk = mic_stream.recv() # bytes
69
+ if chunk is None: # ์ŠคํŠธ๋ฆผ ์ข…๋ฃŒ
70
+ break
71
+ if not wav_header:
72
+ wav_header = chunk[:44] # WAV ํ—ค๋”
73
+ buffer.write(chunk)
74
+ # chunk ๊ธธ์ด๊ฐ€ STREAM_CHUNK_SEC ์ด์ƒ ์Œ“์˜€์œผ๋ฉด ์ฒ˜๋ฆฌ
75
+ if buffer.getbuffer().nbytes > 16000*2*STREAM_CHUNK_SEC: # 16kHz 16-bit mono
76
+ wav_bytes = wav_header + buffer.getvalue()
77
+ with tempfile.NamedTemporaryFile(delete=False,suffix=".wav") as tmp:
78
+ tmp.write(wav_bytes); tmp.close()
79
+ o, t, _ = translate_audio(tmp.name, src, tgt)
80
+ original_acc += " " + o
81
+ translated_acc += " " + t
82
+ yield original_acc.strip(), translated_acc.strip()
83
+ buffer = io.BytesIO() # reset buffer
84
+ # ๋งˆ์ง€๋ง‰ ๋‚จ์€ ๋ฒ„ํผ
85
+ if buffer.getbuffer().nbytes:
86
+ wav_bytes = wav_header + buffer.getvalue()
87
+ with tempfile.NamedTemporaryFile(delete=False,suffix=".wav") as tmp:
88
+ tmp.write(wav_bytes); tmp.close()
89
+ o, t, _ = translate_audio(tmp.name, src, tgt)
90
+ yield (original_acc+" "+o).strip(), (translated_acc+" "+t).strip()
91
+
92
+ # =============== Gradio UI ==========================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app:
94
  with gr.Tabs():
95
+ # โ‘  ๋งˆ์ดํฌ ๋ฒˆ์—ญ (๋…น์Œ ํ›„ ์ผ๊ด„)
96
+ with gr.TabItem("๐ŸŽ™๏ธ ๋งˆ์ดํฌ ๋ฒˆ์—ญ"):
97
+ src1 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ")
98
+ tgt1 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ")
99
+ mic1 = gr.Audio(sources=["microphone"],type="filepath",label="๐ŸŽค ๋…น์Œ ํ›„ Stop")
100
+ btn1 = gr.Button("๋ฒˆ์—ญ")
101
+ stt1 = gr.Textbox(label="์›๋ฌธ",lines=5)
102
+ tlt1 = gr.Textbox(label="๋ฒˆ์—ญ",lines=5)
103
+ out1 = gr.Audio(label="TTS",type="filepath",autoplay=True)
104
+ btn1.click(translate_audio,inputs=[mic1,src1,tgt1],
105
+ outputs=[stt1,tlt1,out1])
106
+
107
+ # โ‘ก ์˜ค๋””์˜ค ํŒŒ์ผ ๋ฒˆ์—ญ
108
+ with gr.TabItem("๐ŸŽง ํŒŒ์ผ ๋ฒˆ์—ญ"):
109
+ src2 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ")
110
+ tgt2 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ")
111
+ file2= gr.Audio(sources=["upload"],type="filepath",label="์˜ค๋””์˜ค ํŒŒ์ผ ์—…๋กœ๋“œ")
112
+ btn2 = gr.Button("๋ฒˆ์—ญ")
113
+ stt2 = gr.Textbox(label="์›๋ฌธ",lines=5)
114
+ tlt2 = gr.Textbox(label="๋ฒˆ์—ญ",lines=5)
115
+ out2 = gr.Audio(label="TTS",type="filepath",autoplay=True)
116
+ btn2.click(translate_audio,inputs=[file2,src2,tgt2],
117
+ outputs=[stt2,tlt2,out2])
118
+
119
+ # โ‘ข ์‹ค์‹œ๊ฐ„ ์ŠคํŠธ๋ฆฌ๋ฐ ์ „์‚ฌยท๋ฒˆ์—ญ (Beta)
120
+ with gr.TabItem("โฑ๏ธ ์‹ค์‹œ๊ฐ„ ๋ฒˆ์—ญ (Beta)"):
121
+ gr.Markdown("๋งˆ์ดํฌ๋ฅผ ์ผœ๋ฉด 3-4์ดˆ ๋‹จ์œ„๋กœ ์ž๋ง‰์ด ๊ฐฑ์‹ ๋ฉ๋‹ˆ๋‹ค.")
122
+ src3 = gr.Dropdown(LANGUAGES,value="Korean",label="์ž…๋ ฅ")
123
+ tgt3 = gr.Dropdown(LANGUAGES,value="English",label="์ถœ๋ ฅ")
124
+ mic3 = gr.Audio(sources=["microphone"],streaming=True,label="๐ŸŽค ์‹ค์‹œ๊ฐ„")
125
+ stt3 = gr.Textbox(label="์›๋ฌธ(์‹ค์‹œ๊ฐ„)",lines=8)
126
+ tlt3 = gr.Textbox(label="๋ฒˆ์—ญ(์‹ค์‹œ๊ฐ„)",lines=8)
127
+
128
+ def gen(audio, src_lang, tgt_lang):
129
+ yield from stream_generator(audio, src_lang, tgt_lang)
130
+
131
+ mic3.stream(gen, inputs=[src3,tgt3], outputs=[stt3,tlt3])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
 
133
  if __name__ == "__main__":
134
+ app.launch(server_name="0.0.0.0",server_port=7860,share=False,debug=True)