Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import openai | |
import os | |
import tempfile | |
from dotenv import load_dotenv | |
# νκ²½λ³μ λ‘λ | |
load_dotenv() | |
# OpenAI ν΄λΌμ΄μΈνΈ μ€μ | |
api_key = os.getenv("OPENAI_API_KEY") | |
if not api_key: | |
print("β οΈ OPENAI_API_KEYλ₯Ό .env νμΌμ μ€μ νμΈμ!") | |
print("μ: OPENAI_API_KEY=sk-...") | |
else: | |
print(f"β API Key λ‘λλ¨: {api_key[:10]}...") | |
try: | |
client = openai.OpenAI(api_key=api_key) | |
except Exception as e: | |
print(f"β OpenAI ν΄λΌμ΄μΈνΈ μ΄κΈ°ν μ€ν¨: {e}") | |
client = None | |
def translate_audio(audio_file, source_lang, target_lang): | |
"""μμ± νμΌμ λ²μνλ ν¨μ""" | |
# μ λ ₯ κ²μ¦ | |
if not audio_file: | |
return "β οΈ μ€λμ€ νμΌμ μ λ‘λνκ±°λ λ ΉμνμΈμ.", "", None | |
if not api_key: | |
return "β API ν€κ° μ€μ λμ§ μμμ΅λλ€. .env νμΌμ νμΈνμΈμ.", "", None | |
if not client: | |
return "β OpenAI ν΄λΌμ΄μΈνΈκ° μ΄κΈ°νλμ§ μμμ΅λλ€.", "", None | |
# κ°μ μΈμ΄λ‘ λ²μνλ €λ κ²½μ° | |
if source_lang == target_lang: | |
return "β οΈ μ λ ₯ μΈμ΄μ μΆλ ₯ μΈμ΄κ° κ°μ΅λλ€.", "", None | |
try: | |
print(f"π€ μ€λμ€ νμΌ μ²λ¦¬ μ€: {audio_file}") | |
print(f"π νμΌ ν¬κΈ°: {os.path.getsize(audio_file) / 1024 / 1024:.2f} MB") | |
# 1. Whisperλ‘ μμ±μ ν μ€νΈλ‘ λ³ν | |
print("1οΈβ£ μμ± μΈμ μμ...") | |
with open(audio_file, "rb") as f: | |
transcript = client.audio.transcriptions.create( | |
model="whisper-1", | |
file=f, | |
language=source_lang[:2].lower() if source_lang != "Chinese" else "zh" | |
) | |
original_text = transcript.text | |
print(f"β μμ± μΈμ μλ£: {original_text[:50]}...") | |
# λΉ ν μ€νΈ μ²΄ν¬ | |
if not original_text.strip(): | |
return "β οΈ μμ±μ΄ μΈμλμ§ μμμ΅λλ€. λ€μ λ Ήμν΄μ£ΌμΈμ.", "", None | |
# 2. GPT-4λ‘ λ²μ | |
print("2οΈβ£ λ²μ μμ...") | |
response = client.chat.completions.create( | |
model="gpt-3.5-turbo", # λ λΉ λ₯΄κ³ μμ μ | |
messages=[ | |
{ | |
"role": "system", | |
"content": f"You are a professional translator. Translate the following {source_lang} text to {target_lang}. Only provide the translation without any explanation or additional text." | |
}, | |
{ | |
"role": "user", | |
"content": original_text | |
} | |
], | |
temperature=0.3, | |
max_tokens=2000 | |
) | |
translated_text = response.choices[0].message.content.strip() | |
print(f"β λ²μ μλ£: {translated_text[:50]}...") | |
# 3. TTSλ‘ λ²μλ ν μ€νΈλ₯Ό μμ±μΌλ‘ λ³ν | |
print("3οΈβ£ μμ± ν©μ± μμ...") | |
# μΈμ΄λ³ μμ± μ ν | |
voice_map = { | |
"Korean": "nova", | |
"English": "alloy", | |
"Japanese": "nova", | |
"Chinese": "nova", | |
"Spanish": "nova", | |
"French": "nova" | |
} | |
voice = voice_map.get(target_lang, "alloy") | |
tts_response = client.audio.speech.create( | |
model="tts-1", | |
voice=voice, | |
input=translated_text[:4096] # TTS κΈΈμ΄ μ ν | |
) | |
# μμ νμΌλ‘ μ μ₯ | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: | |
tmp_file.write(tts_response.content) | |
output_file = tmp_file.name | |
print("β λͺ¨λ μ²λ¦¬ μλ£!") | |
return original_text, translated_text, output_file | |
except openai.APIError as e: | |
error_msg = f"β OpenAI API μ€λ₯: {str(e)}" | |
print(error_msg) | |
return error_msg, "", None | |
except openai.AuthenticationError: | |
error_msg = "β API ν€κ° μ¬λ°λ₯΄μ§ μμ΅λλ€. .env νμΌμ νμΈνμΈμ." | |
print(error_msg) | |
return error_msg, "", None | |
except openai.RateLimitError: | |
error_msg = "β API μ¬μ© νλλ₯Ό μ΄κ³Όνμ΅λλ€. μ μ ν λ€μ μλνμΈμ." | |
print(error_msg) | |
return error_msg, "", None | |
except Exception as e: | |
error_msg = f"β μμμΉ λͺ»ν μ€λ₯: {type(e).__name__}: {str(e)}" | |
print(error_msg) | |
import traceback | |
traceback.print_exc() | |
return error_msg, "", None | |
# Gradio μΈν°νμ΄μ€ | |
with gr.Blocks(title="μμ± λ²μκΈ°", theme=gr.themes.Soft()) as app: | |
gr.Markdown( | |
""" | |
# ποΈ AI μμ± λ²μκΈ° | |
μμ±μ λ Ήμνκ±°λ μ λ‘λνλ©΄ μλμΌλ‘ λ²μν©λλ€. | |
**μ§μ νμ**: MP3, WAV, M4A, WEBM (μ΅λ 25MB) | |
""" | |
) | |
# API ν€ μν νμ | |
if api_key: | |
gr.Markdown(f"β API μ°κ²° μν: μ μ (ν€: {api_key[:10]}...)") | |
else: | |
gr.Markdown("β API μ°κ²° μν: API ν€λ₯Ό μ€μ νμΈμ") | |
with gr.Row(): | |
source_lang = gr.Dropdown( | |
["Korean", "English", "Japanese", "Chinese", "Spanish", "French"], | |
value="Korean", | |
label="μ λ ₯ μΈμ΄", | |
info="μμ±μ μΈμ΄λ₯Ό μ ννμΈμ" | |
) | |
target_lang = gr.Dropdown( | |
["Korean", "English", "Japanese", "Chinese", "Spanish", "French"], | |
value="English", | |
label="μΆλ ₯ μΈμ΄", | |
info="λ²μν μΈμ΄λ₯Ό μ ννμΈμ" | |
) | |
audio_input = gr.Audio( | |
sources=["microphone", "upload"], | |
type="filepath", | |
label="μμ± μ λ ₯ (λ Ήμ λλ νμΌ μ λ‘λ)", | |
info="λ§μ΄ν¬ λ²νΌμ ν΄λ¦νμ¬ λ Ήμνκ±°λ νμΌμ λλκ·ΈνμΈμ" | |
) | |
translate_btn = gr.Button("π λ²μνκΈ°", variant="primary", size="lg") | |
with gr.Row(): | |
original_text = gr.Textbox( | |
label="π μλ³Έ ν μ€νΈ", | |
lines=5, | |
placeholder="μμ± μΈμ κ²°κ³Όκ° μ¬κΈ°μ νμλ©λλ€..." | |
) | |
translated_text = gr.Textbox( | |
label="π λ²μλ ν μ€νΈ", | |
lines=5, | |
placeholder="λ²μ κ²°κ³Όκ° μ¬κΈ°μ νμλ©λλ€..." | |
) | |
audio_input = gr.Audio( | |
sources=["microphone", "upload"], | |
type="filepath", | |
label="μμ± μ λ ₯ (λ Ήμ λλ νμΌ μ λ‘λ)" | |
# info νλΌλ―Έν° μ κ±° | |
) | |
# μμ | |
gr.Examples( | |
examples=[ | |
["Korean", "English"], | |
["English", "Korean"], | |
["Japanese", "English"], | |
["Chinese", "Korean"] | |
], | |
inputs=[source_lang, target_lang], | |
label="μΈμ΄ μ‘°ν© μμ" | |
) | |
translate_btn.click( | |
translate_audio, | |
inputs=[audio_input, source_lang, target_lang], | |
outputs=[original_text, translated_text, audio_output] | |
) | |
if __name__ == "__main__": | |
print("π μλ² μμ μ€...") | |
app.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=False, # λ‘컬μμλ§ μ€ν | |
debug=True # λλ²κ·Έ λͺ¨λ νμ±ν | |
) |