Translator / app.py
feliksius's picture
Update app.py
129257a verified
raw
history blame
3.02 kB
from fastapi import FastAPI, HTTPException
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import langdetect
import logging
import os
# Atur direktori cache untuk Hugging Face
os.environ["HF_HOME"] = "/app/cache"
os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
app = FastAPI()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
try:
logger.info("Loading translation model...")
# Load model dan tokenizer M2M100
model_name = "facebook/m2m100_418M"
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
tokenizer = M2M100Tokenizer.from_pretrained(model_name)
logger.info("Model loaded successfully")
except Exception as e:
logger.error(f"Failed to load model: {str(e)}")
raise Exception(f"Model initialization failed: {str(e)}")
def detect_language(text: str) -> str:
"""Deteksi bahasa dari teks menggunakan langdetect."""
try:
lang = langdetect.detect(text)
# Konversi kode bahasa ke format M2M100
lang_map = {
"id": "id", # Indonesia
"fr": "fr", # Prancis
"es": "es", # Spanyol
"de": "de", # Jerman
"ja": "ja", # Jepang
"zh": "zh", # Mandarin
"ru": "ru", # Rusia
"th": "th", # Thailand
}
return lang_map.get(lang, "en") # Default ke Inggris jika tidak dikenali
except Exception as e:
logger.warning(f"Language detection failed: {str(e)}, defaulting to English")
return "en"
def translate_to_english(text: str, source_lang: str) -> str:
"""Terjemahkan teks ke Inggris menggunakan M2M100."""
try:
# Set bahasa sumber
tokenizer.src_lang = source_lang
# Encode teks
encoded = tokenizer(text, return_tensors="pt")
# Generate terjemahan (ke Inggris)
generated_tokens = model.generate(**encoded, forced_bos_token_id=tokenizer.get_lang_id("en"))
# Decode hasil
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
return translated_text
except Exception as e:
logger.error(f"Translation failed: {str(e)}")
raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
@app.post("/translate")
async def translate(text: str):
if not text:
raise HTTPException(status_code=400, detail="Text input is required")
try:
# Deteksi bahasa
source_lang = detect_language(text)
logger.info(f"Detected source language: {source_lang}")
# Jika sudah Inggris, kembalikan teks asli
if source_lang == "en":
return {"translated_text": text}
# Terjemahkan ke Inggris
translated_text = translate_to_english(text, source_lang)
return {"translated_text": translated_text}
except Exception as e:
logger.error(f"Processing failed: {str(e)}")
raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")