File size: 3,500 Bytes
acf8bfe
1fd0997
129257a
acf8bfe
129257a
d434148
fc58506
5dc46ff
 
 
fc58506
acf8bfe
 
 
 
1fd0997
d434148
 
 
 
 
 
 
 
1fd0997
 
 
acf8bfe
1fd0997
 
 
 
acf8bfe
1fd0997
acf8bfe
fc58506
d434148
 
 
 
 
 
 
 
 
129257a
d434148
129257a
 
d434148
 
 
 
129257a
 
 
 
fc58506
 
acf8bfe
 
 
d434148
129257a
 
d434148
129257a
 
d434148
 
 
 
 
 
1fd0997
 
 
d434148
1fd0997
acf8bfe
129257a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from fastapi import FastAPI, HTTPException
from transformers import pipeline
import langdetect
import logging
import os
import opencc  # Untuk konversi Mandarin Tradisional ke Sederhana

os.environ["HF_HOME"] = "/app/cache"
os.environ["TRANSFORMERS_CACHE"] = "/app/cache"

app = FastAPI()

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

MODEL_MAP = {
    "id": "Helsinki-NLP/opus-mt-id-en",  # Indonesia ke Inggris
    "th": "Helsinki-NLP/opus-mt-th-en",  # Thailand ke Inggris
    "fr": "Helsinki-NLP/opus-mt-fr-en",  # Prancis ke Inggris
    "es": "Helsinki-NLP/opus-mt-es-en",  # Spanyol ke Inggris
    "ja": "Helsinki-NLP/opus-mt-ja-en",  # Jepang ke Inggris
    "zh-CN": "Helsinki-NLP/opus-mt-zh-en",  # Mandarin Sederhana ke Inggris
    "zh-TW": "Helsinki-NLP/opus-mt-zh-en",  # Mandarin Tradisional ke Inggris
    "vi": "Helsinki-NLP/opus-mt-vi-en",  # Vietnam ke Inggris
}

translators = {}
try:
    for lang, model_name in MODEL_MAP.items():
        logger.info(f"Loading model for {lang}...")
        translators[lang] = pipeline("translation", model=model_name)
        logger.info(f"Model for {lang} loaded successfully")
except Exception as e:
    logger.error(f"Model initialization failed: {str(e)}")
    raise Exception(f"Model initialization failed: {str(e)}")

# Inisialisasi konverter OpenCC untuk Tradisional ke Sederhana
converter = opencc.OpenCC('t2s')  # t2s = Traditional to Simplified

def is_traditional_chinese(text: str) -> bool:
    """Cek apakah teks menggunakan karakter Mandarin Tradisional."""
    # Contoh sederhana: deteksi beberapa karakter Tradisional
    traditional_chars = set('繁體字')  # Bisa diperluas dengan daftar karakter yang lebih lengkap
    return any(char in traditional_chars for char in text)

def detect_language(text: str) -> str:
    """Deteksi bahasa dari teks menggunakan langdetect."""
    try:
        lang = langdetect.detect(text)
        # Jika terdeteksi sebagai Mandarin (zh), cek apakah Tradisional atau Sederhana
        if lang == "zh":
            return "zh-TW" if is_traditional_chinese(text) else "zh-CN"
        return lang if lang in MODEL_MAP else "en"  # Default ke Inggris jika tidak didukung
    except Exception as e:
        logger.warning(f"Language detection failed: {str(e)}, defaulting to English")
        return "en"

@app.post("/translate")
async def translate(text: str):
    if not text:
        raise HTTPException(status_code=400, detail="Text input is required")
    try:
        # Deteksi bahasa
        source_lang = detect_language(text)
        logger.info(f"Detected source language: {source_lang}")
        # Jika sudah Inggris, kembalikan teks asli
        if source_lang == "en":
            return {"translated_text": text}
        # Jika Mandarin Tradisional, konversi ke Sederhana
        input_text = text
        if source_lang == "zh-TW":
            input_text = converter.convert(text)
            logger.info("Converted Traditional Chinese to Simplified Chinese")
        # Terjemahkan ke Inggris
        translator = translators.get(source_lang)
        if not translator:
            raise HTTPException(status_code=400, detail=f"Translation not supported for language: {source_lang}")
        result = translator(input_text)
        return {"translated_text": result[0]["translation_text"]}
    except Exception as e:
        logger.error(f"Processing failed: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")