Spaces:
Runtime error
Runtime error
import gradio as gr | |
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
from transformers import pipeline | |
import langdetect | |
import logging | |
import os | |
from typing import Optional, Dict | |
import re | |
from functools import lru_cache | |
import asyncio | |
# --- 1. Konfigurasi Awal (Tetap Sama) --- | |
# Create necessary directories | |
os.makedirs("./cache", exist_ok=True) | |
os.makedirs("./logs", exist_ok=True) | |
# Set environment variables for Hugging Face cache | |
os.environ["HF_HOME"] = "./cache" | |
os.environ["TRANSFORMERS_CACHE"] = "./cache" | |
# Environment configuration | |
DEVICE = -1 # Selalu CPU untuk efisiensi di banyak environment | |
MAX_TEXT_LENGTH = int(os.getenv("MAX_TEXT_LENGTH", "5000")) | |
# Configure logging | |
logging.basicConfig( | |
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", | |
level=logging.INFO | |
) | |
logger = logging.getLogger(__name__) | |
# Map model yang didukung | |
MODEL_MAP = { | |
"th": "Helsinki-NLP/opus-mt-th-en", | |
"ja": "Helsinki-NLP/opus-mt-ja-en", | |
"zh": "Helsinki-NLP/opus-mt-zh-en", | |
"vi": "Helsinki-NLP/opus-mt-vi-en", | |
} | |
# Istilah yang dilindungi dari translasi | |
PROTECTED_TERMS = ["2030 Aspirations", "Griffith"] | |
# Cache untuk translator (pipeline) | |
translators: Dict[str, pipeline] = {} | |
# --- Pydantic Models (Tetap Sama) --- | |
class TranslationRequest(BaseModel): | |
text: str | |
source_lang_override: Optional[str] = None | |
class TranslationResponse(BaseModel): | |
translated_text: str | |
source_language: Optional[str] = None | |
# --- 2. Inisialisasi Aplikasi FastAPI --- | |
app = FastAPI(title="Translation Service API") | |
# --- 3. OPTIMASI: Prapemuatan Model saat Startup --- | |
async def startup_event(): | |
"""Memuat semua model translasi saat aplikasi dimulai.""" | |
logger.info("Memulai prapemuatan model translasi...") | |
for lang, model_name in MODEL_MAP.items(): | |
try: | |
logger.info(f"Memuat model untuk bahasa: {lang} ({model_name})") | |
translators[lang] = pipeline("translation", model=model_name, device=DEVICE) | |
logger.info(f"Model untuk {lang} berhasil dimuat.") | |
except Exception as e: | |
logger.error(f"Gagal memuat model untuk {lang}: {str(e)}") | |
logger.info("Semua model telah dimuat.") | |
def get_translator(lang: str) -> pipeline: | |
"""Mengambil translator yang sudah dimuat dari cache.""" | |
translator = translators.get(lang) | |
if not translator: | |
logger.error(f"Translator untuk bahasa '{lang}' tidak ditemukan. Mungkin gagal dimuat saat startup.") | |
raise HTTPException(status_code=500, detail=f"Model terjemahan untuk '{lang}' tidak tersedia.") | |
return translator | |
# --- Fungsi Utility (Hampir Sama, Sedikit Perbaikan) --- | |
# Cache lebih besar jika perlu | |
def detect_language(text: str) -> str: | |
"""Deteksi bahasa dengan cache.""" | |
try: | |
# Potong teks untuk deteksi yang lebih cepat jika teks sangat panjang | |
preview_text = text[:500] | |
detected_lang = langdetect.detect(preview_text) | |
if detected_lang.startswith('zh'): | |
return 'zh' | |
return detected_lang if detected_lang in MODEL_MAP else "en" | |
except Exception as e: | |
logger.warning(f"Deteksi bahasa gagal: {str(e)}. Mengasumsikan 'en'.") | |
return "en" | |
def protect_terms(text: str, protected_terms: list) -> tuple[str, dict]: | |
"""Mengganti istilah yang dilindungi dengan placeholder.""" | |
replacements = {} | |
for i, term in enumerate(protected_terms): | |
placeholder = f"__PROTECTED_{i}__" | |
# Gunakan word boundary (\b) untuk memastikan hanya kata utuh yang diganti | |
modified_text = re.sub(r'\b' + re.escape(term) + r'\b', placeholder, text, flags=re.IGNORECASE) | |
# Hanya tambahkan ke replacement jika ada perubahan | |
if modified_text != text: | |
replacements[placeholder] = term | |
text = modified_text | |
return text, replacements | |
def restore_terms(text: str, replacements: dict) -> str: | |
"""Mengembalikan istilah yang dilindungi.""" | |
for placeholder, term in replacements.items(): | |
text = text.replace(placeholder, term) | |
return text | |
# --- 4. OPTIMASI: Fungsi Inti dan Endpoint API menjadi Full Asynchronous --- | |
async def perform_translation(text: str, source_lang_override: Optional[str] = None) -> TranslationResponse: | |
"""Fungsi inti translasi yang sepenuhnya async.""" | |
if not text or not text.strip(): | |
raise HTTPException(status_code=400, detail="Teks input tidak boleh kosong.") | |
if len(text) > MAX_TEXT_LENGTH: | |
raise HTTPException( | |
status_code=413, | |
detail=f"Teks terlalu panjang. Panjang maksimal yang diizinkan: {MAX_TEXT_LENGTH}." | |
) | |
try: | |
# Tentukan bahasa sumber | |
if source_lang_override and source_lang_override in MODEL_MAP: | |
source_lang = source_lang_override | |
else: | |
source_lang = detect_language(text) | |
# Jika bahasa sumber adalah Inggris, kembalikan teks asli | |
if source_lang == "en": | |
return TranslationResponse(translated_text=text, source_language=source_lang) | |
# Ambil translator | |
translator = get_translator(source_lang) | |
# Lindungi istilah sebelum translasi | |
modified_text, replacements = protect_terms(text, PROTECTED_TERMS) | |
# --- OPTIMASI KUNCI: Jalankan model di thread terpisah --- | |
# Ini mencegah pipeline yang berat memblokir event loop utama | |
def _translate_task(): | |
return translator(modified_text, max_length=512, num_beams=4) | |
result = await asyncio.to_thread(_translate_task) | |
translated_text = result[0]["translation_text"] | |
# Kembalikan istilah yang dilindungi | |
final_text = restore_terms(translated_text, replacements) | |
return TranslationResponse(translated_text=final_text, source_language=source_lang) | |
except HTTPException as e: | |
raise e # Re-raise HTTPException agar status code-nya benar | |
except Exception as e: | |
logger.error(f"Terjadi kesalahan saat translasi: {str(e)}") | |
raise HTTPException(status_code=500, detail=f"Proses translasi gagal: {str(e)}") | |
async def translate_api(request: TranslationRequest): | |
"""Endpoint API untuk translasi.""" | |
return await perform_translation(request.text, request.source_lang_override) | |
async def health_check(): | |
return {"status": "healthy", "loaded_models": list(translators.keys())} | |
# --- 5. OPTIMASI: Handler Gradio menjadi Asynchronous --- | |
async def translate_gradio(text: str, source_lang: str = "auto"): | |
"""Wrapper Gradio yang sekarang async dan lebih efisien.""" | |
if not text or not text.strip(): | |
return "Masukkan teks untuk diterjemahkan.", "N/A" | |
try: | |
source_lang_param = source_lang if source_lang != "auto" else None | |
result = await perform_translation(text, source_lang_param) | |
return result.translated_text, result.source_language or "Unknown" | |
except HTTPException as e: | |
return f"Error: {e.detail}", "Error" | |
except Exception as e: | |
return f"Error: {str(e)}", "Error" | |
# --- 6. OPTIMASI: Mount Gradio ke FastAPI --- | |
# Fungsi untuk membuat UI Gradio tetap sama | |
def create_gradio_interface(): | |
with gr.Blocks( | |
title="Multi-Language Translation Service", | |
theme=gr.themes.Soft(), | |
css=".gradio-container { max-width: 1200px !important; }" | |
) as interface: | |
gr.Markdown(""" | |
# 🌐 Multi-Language Translation Service | |
Terjemahkan teks dari **Thai**, **Jepang**, **Mandarin**, atau **Vietnam** ke **Inggris**. | |
✨ Fitur: Deteksi bahasa otomatis • Perlindungan istilah • Model Helsinki-NLP yang cepat. | |
""") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
text_input = gr.Textbox(label="📝 Input Text", placeholder="Enter text to translate...", lines=6, max_lines=10) | |
with gr.Row(): | |
lang_dropdown = gr.Dropdown( | |
choices=[ | |
("🔍 Auto-detect", "auto"), ("🇹🇭 Thai", "th"), ("🇯🇵 Japanese", "ja"), | |
("🇨🇳 Chinese", "zh"), ("🇻🇳 Vietnamese", "vi") | |
], | |
value="auto", label="Source Language" | |
) | |
translate_btn = gr.Button("🚀 Translate", variant="primary", size="lg") | |
with gr.Column(scale=1): | |
output_text = gr.Textbox(label="🎯 Translation Result", lines=6, max_lines=10, interactive=False) | |
detected_lang = gr.Textbox(label="🔍 Detected Language", interactive=False, max_lines=1) | |
gr.Examples( | |
examples=[ | |
["สวัสดีครับ ยินดีที่ได้รู้จัก การพัฒนา 2030 Aspirations เป็นเป้าหมายสำคัญ", "th"], | |
["こんにちは、はじめまして。Griffith大学での研究が進んでいます。", "ja"], | |
["你好,很高兴认识你。我们正在为2030 Aspirations制定计划。", "zh"], | |
["Xin chào, rất vui được gặp bạn. Griffith là trường đại học tuyệt vời.", "vi"], | |
], | |
inputs=[text_input, lang_dropdown], | |
outputs=[output_text, detected_lang], | |
fn=translate_gradio, # Sekarang memanggil fungsi async secara langsung | |
cache_examples=False | |
) | |
# Event handlers sekarang bisa langsung memanggil fungsi async | |
translate_btn.click(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang]) | |
text_input.submit(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang]) | |
return interface | |
# Buat UI Gradio | |
gradio_app = create_gradio_interface() | |
# Mount Gradio app ke FastAPI di path "/" | |
# Ini adalah cara yang benar untuk mengintegrasikan keduanya | |
app = gr.mount_gradio_app(app, gradio_app, path="/") | |
# Untuk menjalankan: | |
# Simpan file ini sebagai app.py dan jalankan dengan uvicorn | |
# > uvicorn app:app --reload --port 7860 |