Spaces:
Running
Running
File size: 3,500 Bytes
acf8bfe 1fd0997 129257a acf8bfe 129257a d434148 fc58506 5dc46ff fc58506 acf8bfe 1fd0997 d434148 1fd0997 acf8bfe 1fd0997 acf8bfe 1fd0997 acf8bfe fc58506 d434148 129257a d434148 129257a d434148 129257a fc58506 acf8bfe d434148 129257a d434148 129257a d434148 1fd0997 d434148 1fd0997 acf8bfe 129257a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
from fastapi import FastAPI, HTTPException
from transformers import pipeline
import langdetect
import logging
import os
import opencc # Untuk konversi Mandarin Tradisional ke Sederhana
os.environ["HF_HOME"] = "/app/cache"
os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
app = FastAPI()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
MODEL_MAP = {
"id": "Helsinki-NLP/opus-mt-id-en", # Indonesia ke Inggris
"th": "Helsinki-NLP/opus-mt-th-en", # Thailand ke Inggris
"fr": "Helsinki-NLP/opus-mt-fr-en", # Prancis ke Inggris
"es": "Helsinki-NLP/opus-mt-es-en", # Spanyol ke Inggris
"ja": "Helsinki-NLP/opus-mt-ja-en", # Jepang ke Inggris
"zh-CN": "Helsinki-NLP/opus-mt-zh-en", # Mandarin Sederhana ke Inggris
"zh-TW": "Helsinki-NLP/opus-mt-zh-en", # Mandarin Tradisional ke Inggris
"vi": "Helsinki-NLP/opus-mt-vi-en", # Vietnam ke Inggris
}
translators = {}
try:
for lang, model_name in MODEL_MAP.items():
logger.info(f"Loading model for {lang}...")
translators[lang] = pipeline("translation", model=model_name)
logger.info(f"Model for {lang} loaded successfully")
except Exception as e:
logger.error(f"Model initialization failed: {str(e)}")
raise Exception(f"Model initialization failed: {str(e)}")
# Inisialisasi konverter OpenCC untuk Tradisional ke Sederhana
converter = opencc.OpenCC('t2s') # t2s = Traditional to Simplified
def is_traditional_chinese(text: str) -> bool:
"""Cek apakah teks menggunakan karakter Mandarin Tradisional."""
# Contoh sederhana: deteksi beberapa karakter Tradisional
traditional_chars = set('繁體字') # Bisa diperluas dengan daftar karakter yang lebih lengkap
return any(char in traditional_chars for char in text)
def detect_language(text: str) -> str:
"""Deteksi bahasa dari teks menggunakan langdetect."""
try:
lang = langdetect.detect(text)
# Jika terdeteksi sebagai Mandarin (zh), cek apakah Tradisional atau Sederhana
if lang == "zh":
return "zh-TW" if is_traditional_chinese(text) else "zh-CN"
return lang if lang in MODEL_MAP else "en" # Default ke Inggris jika tidak didukung
except Exception as e:
logger.warning(f"Language detection failed: {str(e)}, defaulting to English")
return "en"
@app.post("/translate")
async def translate(text: str):
if not text:
raise HTTPException(status_code=400, detail="Text input is required")
try:
# Deteksi bahasa
source_lang = detect_language(text)
logger.info(f"Detected source language: {source_lang}")
# Jika sudah Inggris, kembalikan teks asli
if source_lang == "en":
return {"translated_text": text}
# Jika Mandarin Tradisional, konversi ke Sederhana
input_text = text
if source_lang == "zh-TW":
input_text = converter.convert(text)
logger.info("Converted Traditional Chinese to Simplified Chinese")
# Terjemahkan ke Inggris
translator = translators.get(source_lang)
if not translator:
raise HTTPException(status_code=400, detail=f"Translation not supported for language: {source_lang}")
result = translator(input_text)
return {"translated_text": result[0]["translation_text"]}
except Exception as e:
logger.error(f"Processing failed: {str(e)}")
raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}") |