feliksius commited on
Commit
b548d03
·
verified ·
1 Parent(s): 64a78bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -28
app.py CHANGED
@@ -1,24 +1,41 @@
1
  from fastapi import FastAPI, HTTPException
2
- from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
3
  import torch
4
  from langdetect import detect, LangDetectException
5
  from pydantic import BaseModel
6
 
7
  # Inisialisasi FastAPI
8
- app = FastAPI(title="M2M100 Translation API")
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- # Load model dan tokenizer
11
- model_name = "facebook/m2m100_418M"
12
  try:
13
- tokenizer = M2M100Tokenizer.from_pretrained(model_name)
14
- model = M2M100ForConditionalGeneration.from_pretrained(model_name)
15
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
- model.to(device)
17
- model.eval()
18
  except Exception as e:
19
  raise Exception(f"Gagal memuat model: {str(e)}")
20
 
21
- # Fungsi terjemahan (sama seperti sebelumnya)
 
 
 
 
 
 
 
22
  def translate_text(text: str, source_lang: str = None):
23
  try:
24
  # Validasi input teks
@@ -29,24 +46,26 @@ def translate_text(text: str, source_lang: str = None):
29
  if not source_lang:
30
  try:
31
  detected_lang = detect(text)
32
- if detected_lang not in tokenizer.supported_languages:
33
- return {"error": f"Bahasa terdeteksi '{detected_lang}' tidak didukung"}, detected_lang
 
 
34
  source_lang = detected_lang
35
  except LangDetectException:
36
- return {"error": "Gagal mendeteksi bahasa. Harap masukkan kode bahasa sumber"}, None
37
  else:
38
- if source_lang not in tokenizer.supported_languages:
39
- return {"error": f"Kode bahasa '{source_lang}' tidak didukung"}, None
 
 
40
 
41
- # Set bahasa sumber
42
- tokenizer.src_lang = source_lang
 
43
 
44
  # Encode dan terjemahkan
45
  encoded = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)
46
- generated_tokens = model.generate(
47
- **encoded,
48
- forced_bos_token_id=tokenizer.get_lang_id("en")
49
- )
50
  translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
51
 
52
  return {"translated_text": translated_text}, source_lang
@@ -54,19 +73,17 @@ def translate_text(text: str, source_lang: str = None):
54
  except Exception as e:
55
  return {"error": f"Terjemahan gagal: {str(e)}"}, None
56
 
57
- # Model untuk respons JSON
58
- class TranslationResponse(BaseModel):
59
- translated_text: str | None = None
60
- source_lang: str | None = None
61
- error: str | None = None
62
-
63
  # Endpoint API
64
  @app.get("/translate", response_model=TranslationResponse)
65
  async def translate(text: str, lang: str | None = None):
66
  result, detected_lang = translate_text(text, lang)
67
  if "error" in result:
68
  raise HTTPException(status_code=400, detail=result["error"])
69
- return {"translated_text": result["translated_text"], "source_lang": detected_lang}
 
 
 
 
70
 
71
  # Jalankan aplikasi (untuk pengembangan lokal)
72
  if __name__ == "__main__":
 
1
  from fastapi import FastAPI, HTTPException
2
+ from transformers import MarianMTModel, MarianTokenizer
3
  import torch
4
  from langdetect import detect, LangDetectException
5
  from pydantic import BaseModel
6
 
7
  # Inisialisasi FastAPI
8
+ app = FastAPI(title="Helsinki-NLP Translation API")
9
+
10
+ # Daftar model untuk setiap bahasa
11
+ MODEL_MAPPING = {
12
+ "th": "Helsinki-NLP/opus-mt-th-en",
13
+ "ja": "Helsinki-NLP/opus-mt-ja-en",
14
+ "zh": "Helsinki-NLP/opus-mt-zh-en",
15
+ "vi": "Helsinki-NLP/opus-mt-vi-en"
16
+ }
17
+
18
+ # Muat model dan tokenizer untuk setiap bahasa
19
+ models = {}
20
+ tokenizers = {}
21
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
 
 
 
23
  try:
24
+ for lang, model_name in MODEL_MAPPING.items():
25
+ tokenizers[lang] = MarianTokenizer.from_pretrained(model_name)
26
+ models[lang] = MarianMTModel.from_pretrained(model_name).to(device)
27
+ models[lang].eval()
 
28
  except Exception as e:
29
  raise Exception(f"Gagal memuat model: {str(e)}")
30
 
31
+ # Model untuk respons JSON
32
+ class TranslationResponse(BaseModel):
33
+ translated_text: str | None = None
34
+ source_lang: str | None = None
35
+ message: str | None = None
36
+ error: str | None = None
37
+
38
+ # Fungsi terjemahan
39
  def translate_text(text: str, source_lang: str = None):
40
  try:
41
  # Validasi input teks
 
46
  if not source_lang:
47
  try:
48
  detected_lang = detect(text)
49
+ if detected_lang == "en":
50
+ return {"translated_text": text, "message": "Teks sudah dalam bahasa Inggris"}, detected_lang
51
+ if detected_lang not in MODEL_MAPPING:
52
+ return {"error": f"Bahasa terdeteksi '{detected_lang}' tidak didukung. Hanya mendukung: {list(MODEL_MAPPING.keys())}"}, detected_lang
53
  source_lang = detected_lang
54
  except LangDetectException:
55
+ return {"error": "Gagal mendeteksi bahasa. Harap masukkan kode bahasa sumber (th, ja, zh, vi)"}, None
56
  else:
57
+ if source_lang == "en":
58
+ return {"translated_text": text, "message": "Teks sudah dalam bahasa Inggris"}, source_lang
59
+ if source_lang not in MODEL_MAPPING:
60
+ return {"error": f"Kode bahasa '{source_lang}' tidak didukung. Hanya mendukung: {list(MODEL_MAPPING.keys())}"}, None
61
 
62
+ # Ambil model dan tokenizer berdasarkan bahasa
63
+ tokenizer = tokenizers[source_lang]
64
+ model = models[source_lang]
65
 
66
  # Encode dan terjemahkan
67
  encoded = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)
68
+ generated_tokens = model.generate(**encoded)
 
 
 
69
  translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
70
 
71
  return {"translated_text": translated_text}, source_lang
 
73
  except Exception as e:
74
  return {"error": f"Terjemahan gagal: {str(e)}"}, None
75
 
 
 
 
 
 
 
76
  # Endpoint API
77
  @app.get("/translate", response_model=TranslationResponse)
78
  async def translate(text: str, lang: str | None = None):
79
  result, detected_lang = translate_text(text, lang)
80
  if "error" in result:
81
  raise HTTPException(status_code=400, detail=result["error"])
82
+ return {
83
+ "translated_text": result.get("translated_text"),
84
+ "source_lang": detected_lang,
85
+ "message": result.get("message")
86
+ }
87
 
88
  # Jalankan aplikasi (untuk pengembangan lokal)
89
  if __name__ == "__main__":