trans / app.py
feliksius's picture
Update app.py
b548d03 verified
from fastapi import FastAPI, HTTPException
from transformers import MarianMTModel, MarianTokenizer
import torch
from langdetect import detect, LangDetectException
from pydantic import BaseModel
# Inisialisasi FastAPI
app = FastAPI(title="Helsinki-NLP Translation API")
# Daftar model untuk setiap bahasa
MODEL_MAPPING = {
"th": "Helsinki-NLP/opus-mt-th-en",
"ja": "Helsinki-NLP/opus-mt-ja-en",
"zh": "Helsinki-NLP/opus-mt-zh-en",
"vi": "Helsinki-NLP/opus-mt-vi-en"
}
# Muat model dan tokenizer untuk setiap bahasa
models = {}
tokenizers = {}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
try:
for lang, model_name in MODEL_MAPPING.items():
tokenizers[lang] = MarianTokenizer.from_pretrained(model_name)
models[lang] = MarianMTModel.from_pretrained(model_name).to(device)
models[lang].eval()
except Exception as e:
raise Exception(f"Gagal memuat model: {str(e)}")
# Model untuk respons JSON
class TranslationResponse(BaseModel):
translated_text: str | None = None
source_lang: str | None = None
message: str | None = None
error: str | None = None
# Fungsi terjemahan
def translate_text(text: str, source_lang: str = None):
try:
# Validasi input teks
if not text.strip():
return {"error": "Teks tidak boleh kosong"}, None
# Autodeteksi bahasa jika source_lang tidak diberikan
if not source_lang:
try:
detected_lang = detect(text)
if detected_lang == "en":
return {"translated_text": text, "message": "Teks sudah dalam bahasa Inggris"}, detected_lang
if detected_lang not in MODEL_MAPPING:
return {"error": f"Bahasa terdeteksi '{detected_lang}' tidak didukung. Hanya mendukung: {list(MODEL_MAPPING.keys())}"}, detected_lang
source_lang = detected_lang
except LangDetectException:
return {"error": "Gagal mendeteksi bahasa. Harap masukkan kode bahasa sumber (th, ja, zh, vi)"}, None
else:
if source_lang == "en":
return {"translated_text": text, "message": "Teks sudah dalam bahasa Inggris"}, source_lang
if source_lang not in MODEL_MAPPING:
return {"error": f"Kode bahasa '{source_lang}' tidak didukung. Hanya mendukung: {list(MODEL_MAPPING.keys())}"}, None
# Ambil model dan tokenizer berdasarkan bahasa
tokenizer = tokenizers[source_lang]
model = models[source_lang]
# Encode dan terjemahkan
encoded = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)
generated_tokens = model.generate(**encoded)
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
return {"translated_text": translated_text}, source_lang
except Exception as e:
return {"error": f"Terjemahan gagal: {str(e)}"}, None
# Endpoint API
@app.get("/translate", response_model=TranslationResponse)
async def translate(text: str, lang: str | None = None):
result, detected_lang = translate_text(text, lang)
if "error" in result:
raise HTTPException(status_code=400, detail=result["error"])
return {
"translated_text": result.get("translated_text"),
"source_lang": detected_lang,
"message": result.get("message")
}
# Jalankan aplikasi (untuk pengembangan lokal)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)