Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import openai | |
import os | |
import tempfile | |
from dotenv import load_dotenv | |
# ===== κ³΅ν΅ μ΄κΈ°ν ========================================= | |
load_dotenv() | |
api_key = os.getenv("OPENAI_API_KEY") | |
if not api_key: | |
print("β οΈ OPENAI_API_KEYλ₯Ό .env νμΌμ μ€μ νμΈμ!") | |
else: | |
print(f"β API Key λ‘λλ¨: {api_key[:10]}...") | |
try: | |
client = openai.OpenAI(api_key=api_key) | |
except Exception as e: | |
print(f"β OpenAI ν΄λΌμ΄μΈνΈ μ΄κΈ°ν μ€ν¨: {e}") | |
client = None | |
# ===== μΈμ΄ μ€μ =========================================== | |
LANGUAGES = [ | |
"Korean", "English", "Japanese", "Chinese", # κΈ°μ‘΄ | |
"Thai", "Russian", "Vietnamese", # μΆκ° | |
"Spanish", "French" # μ ν | |
] | |
# Whisperμ© ISO-639 μ½λ λ§€ν | |
LANG_CODE_MAP = { | |
"Korean": "ko", "English": "en", "Japanese": "ja", "Chinese": "zh", | |
"Thai": "th", "Russian": "ru", "Vietnamese": "vi", | |
"Spanish": "es", "French": "fr" | |
} | |
# TTS μμ± λ§€ν(OpenAI tts-1: alloy, nova λ κ°μ§) | |
VOICE_MAP = { | |
"Korean": "nova", | |
"English": "alloy", | |
"Japanese": "nova", | |
"Chinese": "nova", | |
"Thai": "alloy", | |
"Russian": "alloy", | |
"Vietnamese": "alloy", | |
"Spanish": "alloy", | |
"French": "alloy" | |
} | |
# ---------------------------------------------------------- | |
# (1) μμ±(STT) β λ²μ β μμ±(TTS) | |
# ---------------------------------------------------------- | |
def translate_audio(audio_file, source_lang, target_lang): | |
if not audio_file: | |
return "β οΈ μ€λμ€ νμΌμ μ λ‘λνκ±°λ λ ΉμνμΈμ.", "", None | |
if not api_key or not client: | |
return "β API μ΄κΈ°ν μ€λ₯", "", None | |
if source_lang == target_lang: | |
return "β οΈ μ λ ₯ μΈμ΄μ μΆλ ₯ μΈμ΄κ° κ°μ΅λλ€.", "", None | |
try: | |
# ---------- Whisper STT ---------- | |
lang_code = LANG_CODE_MAP.get(source_lang, None) | |
with open(audio_file, "rb") as f: | |
transcript = client.audio.transcriptions.create( | |
model="whisper-1", | |
file=f, | |
language=lang_code if lang_code else None # λͺ» μ°ΎμΌλ©΄ μλκ°μ§ | |
) | |
original_text = transcript.text.strip() | |
if not original_text: | |
return "β οΈ μμ±μ΄ μΈμλμ§ μμμ΅λλ€.", "", None | |
# ---------- GPT λ²μ ---------- | |
response = client.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[ | |
{"role": "system", | |
"content": f"You are a professional translator. Translate the following {source_lang} text to {target_lang}. " | |
f"Only provide the translation without any explanation or additional text."}, | |
{"role": "user", "content": original_text} | |
], | |
temperature=0.3, | |
max_tokens=2000 | |
) | |
translated_text = response.choices[0].message.content.strip() | |
# ---------- TTS ---------- | |
tts_response = client.audio.speech.create( | |
model="tts-1", | |
voice=VOICE_MAP.get(target_lang, "alloy"), | |
input=translated_text[:4096] | |
) | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp: | |
tmp.write(tts_response.content) | |
output_audio = tmp.name | |
return original_text, translated_text, output_audio | |
except Exception as e: | |
return f"β μ€λ₯: {type(e).__name__}: {str(e)}", "", None | |
# ---------------------------------------------------------- | |
# (2) PDF / μ΄λ―Έμ§ β λ²μ | |
# ---------------------------------------------------------- | |
def translate_document(file_obj, source_lang, target_lang): | |
if not file_obj: | |
return "β οΈ νμΌμ μ λ‘λνμΈμ.", "" | |
if not api_key or not client: | |
return "β API μ΄κΈ°ν μ€λ₯", "" | |
if source_lang == target_lang: | |
return "β οΈ μ λ ₯ μΈμ΄μ μΆλ ₯ μΈμ΄κ° κ°μ΅λλ€.", "" | |
ext = os.path.splitext(file_obj.name)[1].lower() | |
try: | |
# --- ν μ€νΈ μΆμΆ --- | |
if ext == ".pdf": | |
import pdfplumber | |
text_chunks = [] | |
with pdfplumber.open(file_obj.name) as pdf: | |
for page in pdf.pages[:5]: # λ°λͺ¨: μ 5μͺ½λ§ | |
text_chunks.append(page.extract_text() or "") | |
original_text = "\n".join(text_chunks).strip() | |
elif ext in [".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tiff"]: | |
from PIL import Image | |
import pytesseract | |
original_text = pytesseract.image_to_string(Image.open(file_obj.name)) | |
else: | |
return "β οΈ μ§μνμ§ μλ νμμ λλ€.", "" | |
if not original_text: | |
return "β οΈ ν μ€νΈλ₯Ό μΆμΆν μ μμ΅λλ€.", "" | |
# --- λ²μ --- | |
response = client.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[ | |
{"role": "system", | |
"content": f"You are a professional translator. Translate the following {source_lang} text to {target_lang}. " | |
f"Only provide the translation without any explanation or additional text."}, | |
{"role": "user", "content": original_text} | |
], | |
temperature=0.3, | |
max_tokens=4096 | |
) | |
translated_text = response.choices[0].message.content.strip() | |
return original_text, translated_text | |
except Exception as e: | |
return f"β μ€λ₯: {type(e).__name__}: {str(e)}", "" | |
# ========================================================== | |
# Gradio UI | |
# ========================================================== | |
with gr.Blocks(title="SMARTok Demo", theme=gr.themes.Soft()) as app: | |
with gr.Tabs(): | |
# ----- ποΈ μμ± λ²μ ----- | |
with gr.TabItem("ποΈ μμ± λ²μ"): | |
gr.Markdown(""" | |
# ποΈ AI μμ± λ²μκΈ° | |
λ§μ΄ν¬λ‘ λ Ήμνκ±°λ μ€λμ€ νμΌμ μ λ‘λνλ©΄ **μ€μκ° μλ§ + λ²μ + μμ±ν©μ±**κΉμ§ ν λ²μ! | |
""") | |
with gr.Row(): | |
src_lang_a = gr.Dropdown(LANGUAGES, value="Korean", label="μ λ ₯ μΈμ΄") | |
tgt_lang_a = gr.Dropdown(LANGUAGES, value="English", label="μΆλ ₯ μΈμ΄") | |
audio_in = gr.Audio( | |
sources=["microphone", "upload"], | |
type="filepath", | |
label="μμ± μ λ ₯ (λ Ήμ λλ νμΌ μ λ‘λ)" | |
) | |
btn_audio = gr.Button("π λ²μνκΈ°") | |
with gr.Row(): | |
stt_text = gr.Textbox(label="π μλ³Έ ν μ€νΈ", lines=5) | |
tlt_text = gr.Textbox(label="π λ²μλ ν μ€νΈ", lines=5) | |
audio_out = gr.Audio(label="π λ²μλ μμ±", type="filepath", autoplay=True) | |
btn_audio.click( | |
translate_audio, | |
inputs=[audio_in, src_lang_a, tgt_lang_a], | |
outputs=[stt_text, tlt_text, audio_out] | |
) | |
# ----- π μλ£ λ²μ ----- | |
with gr.TabItem("π μλ£ λ²μ"): | |
gr.Markdown(""" | |
# π PDF / μ΄λ―Έμ§ λ²μ λ°λͺ¨ | |
κ΅μ‘μλ£Β·λ°νμλ£ λ± **PDF μ΅λ 5μͺ½** λλ μ΄λ―Έμ§ 1μ₯μ μ λ‘λνλ©΄ ν μ€νΈ μΆμΆ ν λ²μν΄μ€λλ€. | |
""") | |
with gr.Row(): | |
src_lang_d = gr.Dropdown(LANGUAGES, value="Korean", label="μ λ ₯ μΈμ΄") | |
tgt_lang_d = gr.Dropdown(LANGUAGES, value="English", label="μΆλ ₯ μΈμ΄") | |
file_in = gr.File(label="PDF / μ΄λ―Έμ§ μ λ‘λ") | |
btn_doc = gr.Button("π λ²μνκΈ°") | |
original_doc = gr.Textbox(label="π μΆμΆλ μλ¬Έ", lines=15) | |
translated_doc = gr.Textbox(label="π λ²μ κ²°κ³Ό", lines=15) | |
btn_doc.click( | |
translate_document, | |
inputs=[file_in, src_lang_d, tgt_lang_d], | |
outputs=[original_doc, translated_doc] | |
) | |
# ========================================================== | |
if __name__ == "__main__": | |
print("π μλ² μμ μ€...") | |
app.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True) | |