voice-trans / app.py
openfree's picture
Update app.py
54c3f0f verified
raw
history blame
7.29 kB
import gradio as gr
import openai
import os
import tempfile
from dotenv import load_dotenv
# ν™˜κ²½λ³€μˆ˜ λ‘œλ“œ
load_dotenv()
# OpenAI ν΄λΌμ΄μ–ΈνŠΈ μ„€μ •
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
print("⚠️ OPENAI_API_KEYλ₯Ό .env νŒŒμΌμ— μ„€μ •ν•˜μ„Έμš”!")
print("예: OPENAI_API_KEY=sk-...")
else:
print(f"βœ… API Key λ‘œλ“œλ¨: {api_key[:10]}...")
try:
client = openai.OpenAI(api_key=api_key)
except Exception as e:
print(f"❌ OpenAI ν΄λΌμ΄μ–ΈνŠΈ μ΄ˆκΈ°ν™” μ‹€νŒ¨: {e}")
client = None
def translate_audio(audio_file, source_lang, target_lang):
"""μŒμ„± νŒŒμΌμ„ λ²ˆμ—­ν•˜λŠ” ν•¨μˆ˜"""
# μž…λ ₯ 검증
if not audio_file:
return "⚠️ μ˜€λ””μ˜€ νŒŒμΌμ„ μ—…λ‘œλ“œν•˜κ±°λ‚˜ λ…ΉμŒν•˜μ„Έμš”.", "", None
if not api_key:
return "❌ API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. .env νŒŒμΌμ„ ν™•μΈν•˜μ„Έμš”.", "", None
if not client:
return "❌ OpenAI ν΄λΌμ΄μ–ΈνŠΈκ°€ μ΄ˆκΈ°ν™”λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.", "", None
# 같은 μ–Έμ–΄λ‘œ λ²ˆμ—­ν•˜λ €λŠ” 경우
if source_lang == target_lang:
return "⚠️ μž…λ ₯ 언어와 좜λ ₯ μ–Έμ–΄κ°€ κ°™μŠ΅λ‹ˆλ‹€.", "", None
try:
print(f"🎀 μ˜€λ””μ˜€ 파일 처리 쀑: {audio_file}")
print(f"πŸ“Š 파일 크기: {os.path.getsize(audio_file) / 1024 / 1024:.2f} MB")
# 1. Whisper둜 μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜
print("1️⃣ μŒμ„± 인식 μ‹œμž‘...")
with open(audio_file, "rb") as f:
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=f,
language=source_lang[:2].lower() if source_lang != "Chinese" else "zh"
)
original_text = transcript.text
print(f"βœ… μŒμ„± 인식 μ™„λ£Œ: {original_text[:50]}...")
# 빈 ν…μŠ€νŠΈ 체크
if not original_text.strip():
return "⚠️ μŒμ„±μ΄ μΈμ‹λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. λ‹€μ‹œ λ…ΉμŒν•΄μ£Όμ„Έμš”.", "", None
# 2. GPT-4둜 λ²ˆμ—­
print("2️⃣ λ²ˆμ—­ μ‹œμž‘...")
response = client.chat.completions.create(
model="gpt-3.5-turbo", # 더 λΉ λ₯΄κ³  μ•ˆμ •μ 
messages=[
{
"role": "system",
"content": f"You are a professional translator. Translate the following {source_lang} text to {target_lang}. Only provide the translation without any explanation or additional text."
},
{
"role": "user",
"content": original_text
}
],
temperature=0.3,
max_tokens=2000
)
translated_text = response.choices[0].message.content.strip()
print(f"βœ… λ²ˆμ—­ μ™„λ£Œ: {translated_text[:50]}...")
# 3. TTS둜 λ²ˆμ—­λœ ν…μŠ€νŠΈλ₯Ό μŒμ„±μœΌλ‘œ λ³€ν™˜
print("3️⃣ μŒμ„± ν•©μ„± μ‹œμž‘...")
# 언어별 μŒμ„± 선택
voice_map = {
"Korean": "nova",
"English": "alloy",
"Japanese": "nova",
"Chinese": "nova",
"Spanish": "nova",
"French": "nova"
}
voice = voice_map.get(target_lang, "alloy")
tts_response = client.audio.speech.create(
model="tts-1",
voice=voice,
input=translated_text[:4096] # TTS 길이 μ œν•œ
)
# μž„μ‹œ 파일둜 μ €μž₯
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_file.write(tts_response.content)
output_file = tmp_file.name
print("βœ… λͺ¨λ“  처리 μ™„λ£Œ!")
return original_text, translated_text, output_file
except openai.APIError as e:
error_msg = f"❌ OpenAI API 였λ₯˜: {str(e)}"
print(error_msg)
return error_msg, "", None
except openai.AuthenticationError:
error_msg = "❌ API ν‚€κ°€ μ˜¬λ°”λ₯΄μ§€ μ•ŠμŠ΅λ‹ˆλ‹€. .env νŒŒμΌμ„ ν™•μΈν•˜μ„Έμš”."
print(error_msg)
return error_msg, "", None
except openai.RateLimitError:
error_msg = "❌ API μ‚¬μš© ν•œλ„λ₯Ό μ΄ˆκ³Όν–ˆμŠ΅λ‹ˆλ‹€. μž μ‹œ ν›„ λ‹€μ‹œ μ‹œλ„ν•˜μ„Έμš”."
print(error_msg)
return error_msg, "", None
except Exception as e:
error_msg = f"❌ μ˜ˆμƒμΉ˜ λͺ»ν•œ 였λ₯˜: {type(e).__name__}: {str(e)}"
print(error_msg)
import traceback
traceback.print_exc()
return error_msg, "", None
# Gradio μΈν„°νŽ˜μ΄μŠ€
with gr.Blocks(title="μŒμ„± λ²ˆμ—­κΈ°", theme=gr.themes.Soft()) as app:
gr.Markdown(
"""
# πŸŽ™οΈ AI μŒμ„± λ²ˆμ—­κΈ°
μŒμ„±μ„ λ…ΉμŒν•˜κ±°λ‚˜ μ—…λ‘œλ“œν•˜λ©΄ μžλ™μœΌλ‘œ λ²ˆμ—­ν•©λ‹ˆλ‹€.
**지원 ν˜•μ‹**: MP3, WAV, M4A, WEBM (μ΅œλŒ€ 25MB)
"""
)
# API ν‚€ μƒνƒœ ν‘œμ‹œ
if api_key:
gr.Markdown(f"βœ… API μ—°κ²° μƒνƒœ: 정상 (ν‚€: {api_key[:10]}...)")
else:
gr.Markdown("❌ API μ—°κ²° μƒνƒœ: API ν‚€λ₯Ό μ„€μ •ν•˜μ„Έμš”")
with gr.Row():
source_lang = gr.Dropdown(
["Korean", "English", "Japanese", "Chinese", "Spanish", "French"],
value="Korean",
label="μž…λ ₯ μ–Έμ–΄",
info="μŒμ„±μ˜ μ–Έμ–΄λ₯Ό μ„ νƒν•˜μ„Έμš”"
)
target_lang = gr.Dropdown(
["Korean", "English", "Japanese", "Chinese", "Spanish", "French"],
value="English",
label="좜λ ₯ μ–Έμ–΄",
info="λ²ˆμ—­ν•  μ–Έμ–΄λ₯Ό μ„ νƒν•˜μ„Έμš”"
)
audio_input = gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="μŒμ„± μž…λ ₯ (λ…ΉμŒ λ˜λŠ” 파일 μ—…λ‘œλ“œ)",
info="마이크 λ²„νŠΌμ„ ν΄λ¦­ν•˜μ—¬ λ…ΉμŒν•˜κ±°λ‚˜ νŒŒμΌμ„ λ“œλž˜κ·Έν•˜μ„Έμš”"
)
translate_btn = gr.Button("πŸ”„ λ²ˆμ—­ν•˜κΈ°", variant="primary", size="lg")
with gr.Row():
original_text = gr.Textbox(
label="πŸ“ 원본 ν…μŠ€νŠΈ",
lines=5,
placeholder="μŒμ„± 인식 κ²°κ³Όκ°€ 여기에 ν‘œμ‹œλ©λ‹ˆλ‹€..."
)
translated_text = gr.Textbox(
label="🌐 λ²ˆμ—­λœ ν…μŠ€νŠΈ",
lines=5,
placeholder="λ²ˆμ—­ κ²°κ³Όκ°€ 여기에 ν‘œμ‹œλ©λ‹ˆλ‹€..."
)
audio_input = gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="μŒμ„± μž…λ ₯ (λ…ΉμŒ λ˜λŠ” 파일 μ—…λ‘œλ“œ)"
# info νŒŒλΌλ―Έν„° 제거
)
# μ˜ˆμ‹œ
gr.Examples(
examples=[
["Korean", "English"],
["English", "Korean"],
["Japanese", "English"],
["Chinese", "Korean"]
],
inputs=[source_lang, target_lang],
label="μ–Έμ–΄ μ‘°ν•© μ˜ˆμ‹œ"
)
translate_btn.click(
translate_audio,
inputs=[audio_input, source_lang, target_lang],
outputs=[original_text, translated_text, audio_output]
)
if __name__ == "__main__":
print("πŸš€ μ„œλ²„ μ‹œμž‘ 쀑...")
app.launch(
server_name="0.0.0.0",
server_port=7860,
share=False, # λ‘œμ»¬μ—μ„œλ§Œ μ‹€ν–‰
debug=True # 디버그 λͺ¨λ“œ ν™œμ„±ν™”
)