File size: 3,557 Bytes
919c43d b548d03 85186fc 123a391 919c43d b548d03 85186fc b548d03 85186fc b548d03 919c43d 85186fc 919c43d 123a391 b548d03 123a391 b548d03 123a391 b548d03 85186fc b548d03 85186fc 123a391 b548d03 85186fc 919c43d 85186fc 919c43d 123a391 919c43d b548d03 85186fc 919c43d 123a391 919c43d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
from fastapi import FastAPI, HTTPException
from transformers import MarianMTModel, MarianTokenizer
import torch
from langdetect import detect, LangDetectException
from pydantic import BaseModel
# Inisialisasi FastAPI
app = FastAPI(title="Helsinki-NLP Translation API")
# Daftar model untuk setiap bahasa
MODEL_MAPPING = {
"th": "Helsinki-NLP/opus-mt-th-en",
"ja": "Helsinki-NLP/opus-mt-ja-en",
"zh": "Helsinki-NLP/opus-mt-zh-en",
"vi": "Helsinki-NLP/opus-mt-vi-en"
}
# Muat model dan tokenizer untuk setiap bahasa
models = {}
tokenizers = {}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
try:
for lang, model_name in MODEL_MAPPING.items():
tokenizers[lang] = MarianTokenizer.from_pretrained(model_name)
models[lang] = MarianMTModel.from_pretrained(model_name).to(device)
models[lang].eval()
except Exception as e:
raise Exception(f"Gagal memuat model: {str(e)}")
# Model untuk respons JSON
class TranslationResponse(BaseModel):
translated_text: str | None = None
source_lang: str | None = None
message: str | None = None
error: str | None = None
# Fungsi terjemahan
def translate_text(text: str, source_lang: str = None):
try:
# Validasi input teks
if not text.strip():
return {"error": "Teks tidak boleh kosong"}, None
# Autodeteksi bahasa jika source_lang tidak diberikan
if not source_lang:
try:
detected_lang = detect(text)
if detected_lang == "en":
return {"translated_text": text, "message": "Teks sudah dalam bahasa Inggris"}, detected_lang
if detected_lang not in MODEL_MAPPING:
return {"error": f"Bahasa terdeteksi '{detected_lang}' tidak didukung. Hanya mendukung: {list(MODEL_MAPPING.keys())}"}, detected_lang
source_lang = detected_lang
except LangDetectException:
return {"error": "Gagal mendeteksi bahasa. Harap masukkan kode bahasa sumber (th, ja, zh, vi)"}, None
else:
if source_lang == "en":
return {"translated_text": text, "message": "Teks sudah dalam bahasa Inggris"}, source_lang
if source_lang not in MODEL_MAPPING:
return {"error": f"Kode bahasa '{source_lang}' tidak didukung. Hanya mendukung: {list(MODEL_MAPPING.keys())}"}, None
# Ambil model dan tokenizer berdasarkan bahasa
tokenizer = tokenizers[source_lang]
model = models[source_lang]
# Encode dan terjemahkan
encoded = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)
generated_tokens = model.generate(**encoded)
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
return {"translated_text": translated_text}, source_lang
except Exception as e:
return {"error": f"Terjemahan gagal: {str(e)}"}, None
# Endpoint API
@app.get("/translate", response_model=TranslationResponse)
async def translate(text: str, lang: str | None = None):
result, detected_lang = translate_text(text, lang)
if "error" in result:
raise HTTPException(status_code=400, detail=result["error"])
return {
"translated_text": result.get("translated_text"),
"source_lang": detected_lang,
"message": result.get("message")
}
# Jalankan aplikasi (untuk pengembangan lokal)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000) |