File size: 3,557 Bytes
919c43d
b548d03
85186fc
123a391
919c43d
 
 
b548d03
 
 
 
 
 
 
 
 
 
 
 
 
 
85186fc
 
b548d03
 
 
 
85186fc
 
 
b548d03
 
 
 
 
 
 
 
919c43d
85186fc
919c43d
 
 
 
123a391
 
 
 
b548d03
 
 
 
123a391
 
b548d03
123a391
b548d03
 
 
 
85186fc
b548d03
 
 
85186fc
123a391
 
b548d03
85186fc
 
919c43d
85186fc
 
919c43d
123a391
919c43d
 
 
 
 
 
b548d03
 
 
 
 
85186fc
919c43d
123a391
919c43d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from fastapi import FastAPI, HTTPException
from transformers import MarianMTModel, MarianTokenizer
import torch
from langdetect import detect, LangDetectException
from pydantic import BaseModel

# Inisialisasi FastAPI
app = FastAPI(title="Helsinki-NLP Translation API")

# Daftar model untuk setiap bahasa
MODEL_MAPPING = {
    "th": "Helsinki-NLP/opus-mt-th-en",
    "ja": "Helsinki-NLP/opus-mt-ja-en",
    "zh": "Helsinki-NLP/opus-mt-zh-en",
    "vi": "Helsinki-NLP/opus-mt-vi-en"
}

# Muat model dan tokenizer untuk setiap bahasa
models = {}
tokenizers = {}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

try:
    for lang, model_name in MODEL_MAPPING.items():
        tokenizers[lang] = MarianTokenizer.from_pretrained(model_name)
        models[lang] = MarianMTModel.from_pretrained(model_name).to(device)
        models[lang].eval()
except Exception as e:
    raise Exception(f"Gagal memuat model: {str(e)}")

# Model untuk respons JSON
class TranslationResponse(BaseModel):
    translated_text: str | None = None
    source_lang: str | None = None
    message: str | None = None
    error: str | None = None

# Fungsi terjemahan
def translate_text(text: str, source_lang: str = None):
    try:
        # Validasi input teks
        if not text.strip():
            return {"error": "Teks tidak boleh kosong"}, None

        # Autodeteksi bahasa jika source_lang tidak diberikan
        if not source_lang:
            try:
                detected_lang = detect(text)
                if detected_lang == "en":
                    return {"translated_text": text, "message": "Teks sudah dalam bahasa Inggris"}, detected_lang
                if detected_lang not in MODEL_MAPPING:
                    return {"error": f"Bahasa terdeteksi '{detected_lang}' tidak didukung. Hanya mendukung: {list(MODEL_MAPPING.keys())}"}, detected_lang
                source_lang = detected_lang
            except LangDetectException:
                return {"error": "Gagal mendeteksi bahasa. Harap masukkan kode bahasa sumber (th, ja, zh, vi)"}, None
        else:
            if source_lang == "en":
                return {"translated_text": text, "message": "Teks sudah dalam bahasa Inggris"}, source_lang
            if source_lang not in MODEL_MAPPING:
                return {"error": f"Kode bahasa '{source_lang}' tidak didukung. Hanya mendukung: {list(MODEL_MAPPING.keys())}"}, None

        # Ambil model dan tokenizer berdasarkan bahasa
        tokenizer = tokenizers[source_lang]
        model = models[source_lang]

        # Encode dan terjemahkan
        encoded = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)
        generated_tokens = model.generate(**encoded)
        translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]

        return {"translated_text": translated_text}, source_lang

    except Exception as e:
        return {"error": f"Terjemahan gagal: {str(e)}"}, None

# Endpoint API
@app.get("/translate", response_model=TranslationResponse)
async def translate(text: str, lang: str | None = None):
    result, detected_lang = translate_text(text, lang)
    if "error" in result:
        raise HTTPException(status_code=400, detail=result["error"])
    return {
        "translated_text": result.get("translated_text"),
        "source_lang": detected_lang,
        "message": result.get("message")
    }

# Jalankan aplikasi (untuk pengembangan lokal)
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)