File size: 7,293 Bytes
b3067c5
6bdc489
b3067c5
77b322a
6bdc489
b3067c5
6bdc489
 
 
 
 
 
 
 
77b322a
 
6bdc489
77b322a
 
 
 
 
6bdc489
 
 
77b322a
 
6bdc489
77b322a
6bdc489
 
77b322a
 
 
 
 
 
 
 
6bdc489
 
77b322a
 
 
6bdc489
77b322a
6bdc489
 
 
77b322a
 
b3067c5
6bdc489
77b322a
 
 
 
 
6bdc489
 
77b322a
6bdc489
77b322a
6bdc489
77b322a
 
 
 
 
 
 
 
6bdc489
77b322a
 
6bdc489
77b322a
 
b3067c5
6bdc489
77b322a
 
 
 
 
 
 
 
 
 
 
 
 
6bdc489
 
77b322a
 
6bdc489
b3067c5
77b322a
 
 
 
b3067c5
77b322a
6bdc489
b3067c5
77b322a
 
 
 
 
 
 
 
 
 
 
 
6bdc489
77b322a
 
 
 
 
b3067c5
6bdc489
77b322a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6bdc489
 
 
 
 
77b322a
 
6bdc489
 
 
 
77b322a
 
6bdc489
b3067c5
6bdc489
 
 
77b322a
 
6bdc489
b3067c5
77b322a
b3067c5
6bdc489
77b322a
 
 
 
 
 
 
 
 
 
54c3f0f
 
 
77b322a
54c3f0f
 
77b322a
54c3f0f
 
77b322a
 
 
 
 
 
 
 
 
 
 
 
b3067c5
6bdc489
 
 
 
 
b3067c5
 
77b322a
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
import gradio as gr
import openai
import os
import tempfile
from dotenv import load_dotenv

# ν™˜κ²½λ³€μˆ˜ λ‘œλ“œ
load_dotenv()

# OpenAI ν΄λΌμ΄μ–ΈνŠΈ μ„€μ •
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    print("⚠️  OPENAI_API_KEYλ₯Ό .env νŒŒμΌμ— μ„€μ •ν•˜μ„Έμš”!")
    print("예: OPENAI_API_KEY=sk-...")
else:
    print(f"βœ… API Key λ‘œλ“œλ¨: {api_key[:10]}...")

try:
    client = openai.OpenAI(api_key=api_key)
except Exception as e:
    print(f"❌ OpenAI ν΄λΌμ΄μ–ΈνŠΈ μ΄ˆκΈ°ν™” μ‹€νŒ¨: {e}")
    client = None

def translate_audio(audio_file, source_lang, target_lang):
    """μŒμ„± νŒŒμΌμ„ λ²ˆμ—­ν•˜λŠ” ν•¨μˆ˜"""
    
    # μž…λ ₯ 검증
    if not audio_file:
        return "⚠️ μ˜€λ””μ˜€ νŒŒμΌμ„ μ—…λ‘œλ“œν•˜κ±°λ‚˜ λ…ΉμŒν•˜μ„Έμš”.", "", None
    
    if not api_key:
        return "❌ API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. .env νŒŒμΌμ„ ν™•μΈν•˜μ„Έμš”.", "", None
    
    if not client:
        return "❌ OpenAI ν΄λΌμ΄μ–ΈνŠΈκ°€ μ΄ˆκΈ°ν™”λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.", "", None
    
    # 같은 μ–Έμ–΄λ‘œ λ²ˆμ—­ν•˜λ €λŠ” 경우
    if source_lang == target_lang:
        return "⚠️ μž…λ ₯ 언어와 좜λ ₯ μ–Έμ–΄κ°€ κ°™μŠ΅λ‹ˆλ‹€.", "", None
    
    try:
        print(f"🎀 μ˜€λ””μ˜€ 파일 처리 쀑: {audio_file}")
        print(f"πŸ“Š 파일 크기: {os.path.getsize(audio_file) / 1024 / 1024:.2f} MB")
        
        # 1. Whisper둜 μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜
        print("1️⃣ μŒμ„± 인식 μ‹œμž‘...")
        with open(audio_file, "rb") as f:
            transcript = client.audio.transcriptions.create(
                model="whisper-1",
                file=f,
                language=source_lang[:2].lower() if source_lang != "Chinese" else "zh"
            )
        original_text = transcript.text
        print(f"βœ… μŒμ„± 인식 μ™„λ£Œ: {original_text[:50]}...")
        
        # 빈 ν…μŠ€νŠΈ 체크
        if not original_text.strip():
            return "⚠️ μŒμ„±μ΄ μΈμ‹λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. λ‹€μ‹œ λ…ΉμŒν•΄μ£Όμ„Έμš”.", "", None
        
        # 2. GPT-4둜 λ²ˆμ—­
        print("2️⃣ λ²ˆμ—­ μ‹œμž‘...")
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",  # 더 λΉ λ₯΄κ³  μ•ˆμ •μ 
            messages=[
                {
                    "role": "system", 
                    "content": f"You are a professional translator. Translate the following {source_lang} text to {target_lang}. Only provide the translation without any explanation or additional text."
                },
                {
                    "role": "user", 
                    "content": original_text
                }
            ],
            temperature=0.3,
            max_tokens=2000
        )
        translated_text = response.choices[0].message.content.strip()
        print(f"βœ… λ²ˆμ—­ μ™„λ£Œ: {translated_text[:50]}...")
        
        # 3. TTS둜 λ²ˆμ—­λœ ν…μŠ€νŠΈλ₯Ό μŒμ„±μœΌλ‘œ λ³€ν™˜
        print("3️⃣ μŒμ„± ν•©μ„± μ‹œμž‘...")
        
        # 언어별 μŒμ„± 선택
        voice_map = {
            "Korean": "nova",
            "English": "alloy",
            "Japanese": "nova",
            "Chinese": "nova",
            "Spanish": "nova",
            "French": "nova"
        }
        voice = voice_map.get(target_lang, "alloy")
        
        tts_response = client.audio.speech.create(
            model="tts-1",
            voice=voice,
            input=translated_text[:4096]  # TTS 길이 μ œν•œ
        )
        
        # μž„μ‹œ 파일둜 μ €μž₯
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
            tmp_file.write(tts_response.content)
            output_file = tmp_file.name
        
        print("βœ… λͺ¨λ“  처리 μ™„λ£Œ!")
        return original_text, translated_text, output_file
        
    except openai.APIError as e:
        error_msg = f"❌ OpenAI API 였λ₯˜: {str(e)}"
        print(error_msg)
        return error_msg, "", None
    except openai.AuthenticationError:
        error_msg = "❌ API ν‚€κ°€ μ˜¬λ°”λ₯΄μ§€ μ•ŠμŠ΅λ‹ˆλ‹€. .env νŒŒμΌμ„ ν™•μΈν•˜μ„Έμš”."
        print(error_msg)
        return error_msg, "", None
    except openai.RateLimitError:
        error_msg = "❌ API μ‚¬μš© ν•œλ„λ₯Ό μ΄ˆκ³Όν–ˆμŠ΅λ‹ˆλ‹€. μž μ‹œ ν›„ λ‹€μ‹œ μ‹œλ„ν•˜μ„Έμš”."
        print(error_msg)
        return error_msg, "", None
    except Exception as e:
        error_msg = f"❌ μ˜ˆμƒμΉ˜ λͺ»ν•œ 였λ₯˜: {type(e).__name__}: {str(e)}"
        print(error_msg)
        import traceback
        traceback.print_exc()
        return error_msg, "", None

# Gradio μΈν„°νŽ˜μ΄μŠ€
with gr.Blocks(title="μŒμ„± λ²ˆμ—­κΈ°", theme=gr.themes.Soft()) as app:
    gr.Markdown(
        """
        # πŸŽ™οΈ AI μŒμ„± λ²ˆμ—­κΈ°
        μŒμ„±μ„ λ…ΉμŒν•˜κ±°λ‚˜ μ—…λ‘œλ“œν•˜λ©΄ μžλ™μœΌλ‘œ λ²ˆμ—­ν•©λ‹ˆλ‹€.
        
        **지원 ν˜•μ‹**: MP3, WAV, M4A, WEBM (μ΅œλŒ€ 25MB)
        """
    )
    
    # API ν‚€ μƒνƒœ ν‘œμ‹œ
    if api_key:
        gr.Markdown(f"βœ… API μ—°κ²° μƒνƒœ: 정상 (ν‚€: {api_key[:10]}...)")
    else:
        gr.Markdown("❌ API μ—°κ²° μƒνƒœ: API ν‚€λ₯Ό μ„€μ •ν•˜μ„Έμš”")
    
    with gr.Row():
        source_lang = gr.Dropdown(
            ["Korean", "English", "Japanese", "Chinese", "Spanish", "French"],
            value="Korean",
            label="μž…λ ₯ μ–Έμ–΄",
            info="μŒμ„±μ˜ μ–Έμ–΄λ₯Ό μ„ νƒν•˜μ„Έμš”"
        )
        target_lang = gr.Dropdown(
            ["Korean", "English", "Japanese", "Chinese", "Spanish", "French"],
            value="English",
            label="좜λ ₯ μ–Έμ–΄",
            info="λ²ˆμ—­ν•  μ–Έμ–΄λ₯Ό μ„ νƒν•˜μ„Έμš”"
        )
    
    audio_input = gr.Audio(
        sources=["microphone", "upload"],
        type="filepath",
        label="μŒμ„± μž…λ ₯ (λ…ΉμŒ λ˜λŠ” 파일 μ—…λ‘œλ“œ)",
        info="마이크 λ²„νŠΌμ„ ν΄λ¦­ν•˜μ—¬ λ…ΉμŒν•˜κ±°λ‚˜ νŒŒμΌμ„ λ“œλž˜κ·Έν•˜μ„Έμš”"
    )
    
    translate_btn = gr.Button("πŸ”„ λ²ˆμ—­ν•˜κΈ°", variant="primary", size="lg")
    
    with gr.Row():
        original_text = gr.Textbox(
            label="πŸ“ 원본 ν…μŠ€νŠΈ",
            lines=5,
            placeholder="μŒμ„± 인식 κ²°κ³Όκ°€ 여기에 ν‘œμ‹œλ©λ‹ˆλ‹€..."
        )
        translated_text = gr.Textbox(
            label="🌐 λ²ˆμ—­λœ ν…μŠ€νŠΈ",
            lines=5,
            placeholder="λ²ˆμ—­ κ²°κ³Όκ°€ 여기에 ν‘œμ‹œλ©λ‹ˆλ‹€..."
        )

    audio_input = gr.Audio(
        sources=["microphone", "upload"],
        type="filepath",
        label="μŒμ„± μž…λ ₯ (λ…ΉμŒ λ˜λŠ” 파일 μ—…λ‘œλ“œ)"
        # info νŒŒλΌλ―Έν„° 제거
    )


    
    # μ˜ˆμ‹œ
    gr.Examples(
        examples=[
            ["Korean", "English"],
            ["English", "Korean"],
            ["Japanese", "English"],
            ["Chinese", "Korean"]
        ],
        inputs=[source_lang, target_lang],
        label="μ–Έμ–΄ μ‘°ν•© μ˜ˆμ‹œ"
    )
    
    translate_btn.click(
        translate_audio,
        inputs=[audio_input, source_lang, target_lang],
        outputs=[original_text, translated_text, audio_output]
    )

if __name__ == "__main__":
    print("πŸš€ μ„œλ²„ μ‹œμž‘ 쀑...")
    app.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,  # λ‘œμ»¬μ—μ„œλ§Œ μ‹€ν–‰
        debug=True    # 디버그 λͺ¨λ“œ ν™œμ„±ν™”
    )