Spaces:
Runtime error
Runtime error
File size: 3,543 Bytes
acf8bfe 1fd0997 129257a acf8bfe 129257a 57cd6dc fc58506 1fd0997 5dc46ff fc58506 acf8bfe 1fd0997 57cd6dc 1fd0997 acf8bfe 1fd0997 acf8bfe 1fd0997 acf8bfe fc58506 57cd6dc 129257a 57cd6dc 1fd0997 129257a fc58506 acf8bfe 129257a 1fd0997 129257a 57cd6dc 129257a 1fd0997 57cd6dc 1fd0997 acf8bfe 129257a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
from fastapi import FastAPI, HTTPException
from transformers import pipeline
import langdetect
import logging
import os
import opencc # Untuk konversi Mandarin Sederhana/Tradisional
# Atur direktori cache
os.environ["HF_HOME"] = "/app/cache"
os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
app = FastAPI()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Daftar model untuk bahasa yang didukung
MODEL_MAP = {
"id": "Helsinki-NLP/opus-mt-id-en", # Indonesia ke Inggris
"th": "Helsinki-NLP/opus-mt-th-en", # Thailand ke Inggris
"fr": "Helsinki-NLP/opus-mt-fr-en", # Prancis ke Inggris
"es": "Helsinki-NLP/opus-mt-es-en", # Spanyol ke Inggris
"ja": "Helsinki-NLP/opus-mt-ja-en", # Jepang ke Inggris
"zh-CN": "Helsinki-NLP/opus-mt-zh-en", # Mandarin Sederhana ke Inggris
"zh-TW": "Helsinki-NLP/opus-mt-zh-en", # Mandarin Tradisional ke Inggris
"vi": "Helsinki-NLP/opus-mt-vi-en", # Vietnam ke Inggris
}
# Inisialisasi pipeline untuk setiap model
translators = {}
try:
for lang, model_name in MODEL_MAP.items():
logger.info(f"Loading model for {lang}...")
translators[lang] = pipeline("translation", model=model_name)
logger.info(f"Model for {lang} loaded successfully")
except Exception as e:
logger.error(f"Model initialization failed: {str(e)}")
raise Exception(f"Model initialization failed: {str(e)}")
# Inisialisasi konverter OpenCC untuk Mandarin Tradisional ke Sederhana
converter = opencc.OpenCC('t2s') # Tradisional ke Sederhana
def is_traditional_chinese(text: str) -> bool:
"""Cek apakah teks menggunakan karakter Mandarin Tradisional."""
traditional_chars = set('繁體字') # Contoh karakter Tradisional
return any(char in traditional_chars for char in text)
def detect_language(text: str) -> str:
"""Deteksi bahasa dari teks menggunakan langdetect."""
try:
lang = langdetect.detect(text)
# Jika terdeteksi sebagai Mandarin (zh), cek apakah Tradisional atau Sederhana
if lang == "zh":
return "zh-TW" if is_traditional_chinese(text) else "zh-CN"
return lang if lang in MODEL_MAP else "en" # Default ke Inggris jika tidak didukung
except Exception as e:
logger.warning(f"Language detection failed: {str(e)}, defaulting to English")
return "en"
@app.post("/translate")
async def translate(text: str):
if not text:
raise HTTPException(status_code=400, detail="Text input is required")
try:
# Deteksi bahasa
source_lang = detect_language(text)
logger.info(f"Detected source language: {source_lang}")
# Jika sudah Inggris atau bahasa tidak didukung, kembalikan teks asli
if source_lang == "en":
return {"translated_text": text}
# Jika Mandarin Tradisional, konversi ke Sederhana
input_text = text
if source_lang == "zh-TW":
input_text = converter.convert(text)
logger.info("Converted Traditional Chinese to Simplified Chinese")
# Terjemahkan ke Inggris
translator = translators.get(source_lang)
if not translator:
raise HTTPException(status_code=400, detail=f"Translation not supported for language: {source_lang}")
result = translator(input_text)
return {"translated_text": result[0]["translation_text"]}
except Exception as e:
logger.error(f"Processing failed: {str(e)}")
raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}") |