|
from fastapi import FastAPI, HTTPException |
|
from transformers import MarianMTModel, MarianTokenizer |
|
import torch |
|
from langdetect import detect, LangDetectException |
|
from pydantic import BaseModel |
|
|
|
|
|
app = FastAPI(title="Helsinki-NLP Translation API") |
|
|
|
|
|
MODEL_MAPPING = { |
|
"th": "Helsinki-NLP/opus-mt-th-en", |
|
"ja": "Helsinki-NLP/opus-mt-ja-en", |
|
"zh": "Helsinki-NLP/opus-mt-zh-en", |
|
"vi": "Helsinki-NLP/opus-mt-vi-en" |
|
} |
|
|
|
|
|
models = {} |
|
tokenizers = {} |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
try: |
|
for lang, model_name in MODEL_MAPPING.items(): |
|
tokenizers[lang] = MarianTokenizer.from_pretrained(model_name) |
|
models[lang] = MarianMTModel.from_pretrained(model_name).to(device) |
|
models[lang].eval() |
|
except Exception as e: |
|
raise Exception(f"Gagal memuat model: {str(e)}") |
|
|
|
|
|
class TranslationResponse(BaseModel): |
|
translated_text: str | None = None |
|
source_lang: str | None = None |
|
message: str | None = None |
|
error: str | None = None |
|
|
|
|
|
def translate_text(text: str, source_lang: str = None): |
|
try: |
|
|
|
if not text.strip(): |
|
return {"error": "Teks tidak boleh kosong"}, None |
|
|
|
|
|
if not source_lang: |
|
try: |
|
detected_lang = detect(text) |
|
if detected_lang == "en": |
|
return {"translated_text": text, "message": "Teks sudah dalam bahasa Inggris"}, detected_lang |
|
if detected_lang not in MODEL_MAPPING: |
|
return {"error": f"Bahasa terdeteksi '{detected_lang}' tidak didukung. Hanya mendukung: {list(MODEL_MAPPING.keys())}"}, detected_lang |
|
source_lang = detected_lang |
|
except LangDetectException: |
|
return {"error": "Gagal mendeteksi bahasa. Harap masukkan kode bahasa sumber (th, ja, zh, vi)"}, None |
|
else: |
|
if source_lang == "en": |
|
return {"translated_text": text, "message": "Teks sudah dalam bahasa Inggris"}, source_lang |
|
if source_lang not in MODEL_MAPPING: |
|
return {"error": f"Kode bahasa '{source_lang}' tidak didukung. Hanya mendukung: {list(MODEL_MAPPING.keys())}"}, None |
|
|
|
|
|
tokenizer = tokenizers[source_lang] |
|
model = models[source_lang] |
|
|
|
|
|
encoded = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device) |
|
generated_tokens = model.generate(**encoded) |
|
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] |
|
|
|
return {"translated_text": translated_text}, source_lang |
|
|
|
except Exception as e: |
|
return {"error": f"Terjemahan gagal: {str(e)}"}, None |
|
|
|
|
|
@app.get("/translate", response_model=TranslationResponse) |
|
async def translate(text: str, lang: str | None = None): |
|
result, detected_lang = translate_text(text, lang) |
|
if "error" in result: |
|
raise HTTPException(status_code=400, detail=result["error"]) |
|
return { |
|
"translated_text": result.get("translated_text"), |
|
"source_lang": detected_lang, |
|
"message": result.get("message") |
|
} |
|
|
|
|
|
if __name__ == "__main__": |
|
import uvicorn |
|
uvicorn.run(app, host="0.0.0.0", port=8000) |