|
import gradio as gr |
|
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer |
|
import torch |
|
from langdetect import detect, LangDetectException |
|
|
|
|
|
model_name = "facebook/m2m100_418M" |
|
try: |
|
tokenizer = M2M100Tokenizer.from_pretrained(model_name) |
|
model = M2M100ForConditionalGeneration.from_pretrained(model_name) |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model.to(device) |
|
model.eval() |
|
except Exception as e: |
|
raise Exception(f"Gagal memuat model: {str(e)}") |
|
|
|
|
|
def translate_text(text, source_lang=None): |
|
try: |
|
|
|
if not source_lang: |
|
try: |
|
detected_lang = detect(text) |
|
if detected_lang not in tokenizer.supported_languages: |
|
return f"Bahasa terdeteksi '{detected_lang}' tidak didukung.", detected_lang |
|
source_lang = detected_lang |
|
except LangDetectException: |
|
return "Gagal mendeteksi bahasa. Harap masukkan kode bahasa sumber.", None |
|
else: |
|
if source_lang not in tokenizer.supported_languages: |
|
return f"Kode bahasa '{source_lang}' tidak didukung.", None |
|
|
|
|
|
tokenizer.src_lang = source_lang |
|
|
|
|
|
encoded = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device) |
|
generated_tokens = model.generate( |
|
**encoded, |
|
forced_bos_token_id=tokenizer.get_lang_id("en") |
|
) |
|
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] |
|
|
|
return translated_text, source_lang |
|
|
|
except Exception as e: |
|
return f"Error: {str(e)}", None |
|
|
|
|
|
iface = gr.Interface( |
|
fn=translate_text, |
|
inputs=[ |
|
gr.Textbox(label="Teks untuk Diterjemahkan"), |
|
gr.Dropdown( |
|
choices=["id", "fr", "es", "de", "ja", ""], |
|
label="Bahasa Sumber (kosongkan untuk autodeteksi)", |
|
value="" |
|
) |
|
], |
|
outputs=[ |
|
gr.Textbox(label="Terjemahan ke Bahasa Inggris"), |
|
gr.Textbox(label="Bahasa Sumber Terdeteksi") |
|
], |
|
title="M2M100 Translation to English", |
|
description="Masukkan teks untuk diterjemahkan ke bahasa Inggris. Biarkan bahasa sumber kosong untuk autodeteksi." |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
iface.launch() |