File size: 3,543 Bytes
acf8bfe
1fd0997
129257a
acf8bfe
129257a
57cd6dc
fc58506
1fd0997
5dc46ff
 
 
fc58506
acf8bfe
 
 
 
1fd0997
 
 
 
 
 
 
57cd6dc
 
 
1fd0997
 
 
 
acf8bfe
1fd0997
 
 
 
acf8bfe
1fd0997
acf8bfe
fc58506
57cd6dc
 
 
 
 
 
 
 
129257a
 
 
 
57cd6dc
 
 
1fd0997
129257a
 
 
 
fc58506
 
acf8bfe
 
 
129257a
 
 
1fd0997
129257a
 
57cd6dc
 
 
 
 
129257a
1fd0997
 
 
57cd6dc
1fd0997
acf8bfe
129257a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from fastapi import FastAPI, HTTPException
from transformers import pipeline
import langdetect
import logging
import os
import opencc  # Untuk konversi Mandarin Sederhana/Tradisional

# Atur direktori cache
os.environ["HF_HOME"] = "/app/cache"
os.environ["TRANSFORMERS_CACHE"] = "/app/cache"

app = FastAPI()

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Daftar model untuk bahasa yang didukung
MODEL_MAP = {
    "id": "Helsinki-NLP/opus-mt-id-en",  # Indonesia ke Inggris
    "th": "Helsinki-NLP/opus-mt-th-en",  # Thailand ke Inggris
    "fr": "Helsinki-NLP/opus-mt-fr-en",  # Prancis ke Inggris
    "es": "Helsinki-NLP/opus-mt-es-en",  # Spanyol ke Inggris
    "ja": "Helsinki-NLP/opus-mt-ja-en",  # Jepang ke Inggris
    "zh-CN": "Helsinki-NLP/opus-mt-zh-en",  # Mandarin Sederhana ke Inggris
    "zh-TW": "Helsinki-NLP/opus-mt-zh-en",  # Mandarin Tradisional ke Inggris
    "vi": "Helsinki-NLP/opus-mt-vi-en",  # Vietnam ke Inggris
}

# Inisialisasi pipeline untuk setiap model
translators = {}
try:
    for lang, model_name in MODEL_MAP.items():
        logger.info(f"Loading model for {lang}...")
        translators[lang] = pipeline("translation", model=model_name)
        logger.info(f"Model for {lang} loaded successfully")
except Exception as e:
    logger.error(f"Model initialization failed: {str(e)}")
    raise Exception(f"Model initialization failed: {str(e)}")

# Inisialisasi konverter OpenCC untuk Mandarin Tradisional ke Sederhana
converter = opencc.OpenCC('t2s')  # Tradisional ke Sederhana

def is_traditional_chinese(text: str) -> bool:
    """Cek apakah teks menggunakan karakter Mandarin Tradisional."""
    traditional_chars = set('繁體字')  # Contoh karakter Tradisional
    return any(char in traditional_chars for char in text)

def detect_language(text: str) -> str:
    """Deteksi bahasa dari teks menggunakan langdetect."""
    try:
        lang = langdetect.detect(text)
        # Jika terdeteksi sebagai Mandarin (zh), cek apakah Tradisional atau Sederhana
        if lang == "zh":
            return "zh-TW" if is_traditional_chinese(text) else "zh-CN"
        return lang if lang in MODEL_MAP else "en"  # Default ke Inggris jika tidak didukung
    except Exception as e:
        logger.warning(f"Language detection failed: {str(e)}, defaulting to English")
        return "en"

@app.post("/translate")
async def translate(text: str):
    if not text:
        raise HTTPException(status_code=400, detail="Text input is required")
    try:
        # Deteksi bahasa
        source_lang = detect_language(text)
        logger.info(f"Detected source language: {source_lang}")
        # Jika sudah Inggris atau bahasa tidak didukung, kembalikan teks asli
        if source_lang == "en":
            return {"translated_text": text}
        # Jika Mandarin Tradisional, konversi ke Sederhana
        input_text = text
        if source_lang == "zh-TW":
            input_text = converter.convert(text)
            logger.info("Converted Traditional Chinese to Simplified Chinese")
        # Terjemahkan ke Inggris
        translator = translators.get(source_lang)
        if not translator:
            raise HTTPException(status_code=400, detail=f"Translation not supported for language: {source_lang}")
        result = translator(input_text)
        return {"translated_text": result[0]["translation_text"]}
    except Exception as e:
        logger.error(f"Processing failed: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")