from fastapi import FastAPI, HTTPException from transformers import MarianMTModel, MarianTokenizer import torch from langdetect import detect, LangDetectException from pydantic import BaseModel # Inisialisasi FastAPI app = FastAPI(title="Helsinki-NLP Translation API") # Daftar model untuk setiap bahasa MODEL_MAPPING = { "th": "Helsinki-NLP/opus-mt-th-en", "ja": "Helsinki-NLP/opus-mt-ja-en", "zh": "Helsinki-NLP/opus-mt-zh-en", "vi": "Helsinki-NLP/opus-mt-vi-en" } # Muat model dan tokenizer untuk setiap bahasa models = {} tokenizers = {} device = torch.device("cuda" if torch.cuda.is_available() else "cpu") try: for lang, model_name in MODEL_MAPPING.items(): tokenizers[lang] = MarianTokenizer.from_pretrained(model_name) models[lang] = MarianMTModel.from_pretrained(model_name).to(device) models[lang].eval() except Exception as e: raise Exception(f"Gagal memuat model: {str(e)}") # Model untuk respons JSON class TranslationResponse(BaseModel): translated_text: str | None = None source_lang: str | None = None message: str | None = None error: str | None = None # Fungsi terjemahan def translate_text(text: str, source_lang: str = None): try: # Validasi input teks if not text.strip(): return {"error": "Teks tidak boleh kosong"}, None # Autodeteksi bahasa jika source_lang tidak diberikan if not source_lang: try: detected_lang = detect(text) if detected_lang == "en": return {"translated_text": text, "message": "Teks sudah dalam bahasa Inggris"}, detected_lang if detected_lang not in MODEL_MAPPING: return {"error": f"Bahasa terdeteksi '{detected_lang}' tidak didukung. Hanya mendukung: {list(MODEL_MAPPING.keys())}"}, detected_lang source_lang = detected_lang except LangDetectException: return {"error": "Gagal mendeteksi bahasa. Harap masukkan kode bahasa sumber (th, ja, zh, vi)"}, None else: if source_lang == "en": return {"translated_text": text, "message": "Teks sudah dalam bahasa Inggris"}, source_lang if source_lang not in MODEL_MAPPING: return {"error": f"Kode bahasa '{source_lang}' tidak didukung. Hanya mendukung: {list(MODEL_MAPPING.keys())}"}, None # Ambil model dan tokenizer berdasarkan bahasa tokenizer = tokenizers[source_lang] model = models[source_lang] # Encode dan terjemahkan encoded = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device) generated_tokens = model.generate(**encoded) translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] return {"translated_text": translated_text}, source_lang except Exception as e: return {"error": f"Terjemahan gagal: {str(e)}"}, None # Endpoint API @app.get("/translate", response_model=TranslationResponse) async def translate(text: str, lang: str | None = None): result, detected_lang = translate_text(text, lang) if "error" in result: raise HTTPException(status_code=400, detail=result["error"]) return { "translated_text": result.get("translated_text"), "source_lang": detected_lang, "message": result.get("message") } # Jalankan aplikasi (untuk pengembangan lokal) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)