feliksius commited on
Commit
1fd0997
·
verified ·
1 Parent(s): 19dec0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -40
app.py CHANGED
@@ -1,10 +1,10 @@
1
  from fastapi import FastAPI, HTTPException
2
- from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
3
  import langdetect
4
  import logging
5
  import os
6
 
7
- # Atur direktori cache untuk Hugging Face
8
  os.environ["HF_HOME"] = "/app/cache"
9
  os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
10
 
@@ -13,53 +13,36 @@ app = FastAPI()
13
  logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  try:
17
- logger.info("Loading translation model...")
18
- # Load model dan tokenizer M2M100
19
- model_name = "facebook/m2m100_418M"
20
- model = M2M100ForConditionalGeneration.from_pretrained(model_name)
21
- tokenizer = M2M100Tokenizer.from_pretrained(model_name)
22
- logger.info("Model loaded successfully")
23
  except Exception as e:
24
- logger.error(f"Failed to load model: {str(e)}")
25
  raise Exception(f"Model initialization failed: {str(e)}")
26
 
27
  def detect_language(text: str) -> str:
28
  """Deteksi bahasa dari teks menggunakan langdetect."""
29
  try:
30
  lang = langdetect.detect(text)
31
- # Konversi kode bahasa ke format M2M100
32
- lang_map = {
33
- "id": "id", # Indonesia
34
- "fr": "fr", # Prancis
35
- "es": "es", # Spanyol
36
- "de": "de", # Jerman
37
- "ja": "ja", # Jepang
38
- "zh": "zh", # Mandarin
39
- "ru": "ru", # Rusia
40
- "th": "th", # Thailand
41
- }
42
- return lang_map.get(lang, "en") # Default ke Inggris jika tidak dikenali
43
  except Exception as e:
44
  logger.warning(f"Language detection failed: {str(e)}, defaulting to English")
45
  return "en"
46
 
47
- def translate_to_english(text: str, source_lang: str) -> str:
48
- """Terjemahkan teks ke Inggris menggunakan M2M100."""
49
- try:
50
- # Set bahasa sumber
51
- tokenizer.src_lang = source_lang
52
- # Encode teks
53
- encoded = tokenizer(text, return_tensors="pt")
54
- # Generate terjemahan (ke Inggris)
55
- generated_tokens = model.generate(**encoded, forced_bos_token_id=tokenizer.get_lang_id("en"))
56
- # Decode hasil
57
- translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
58
- return translated_text
59
- except Exception as e:
60
- logger.error(f"Translation failed: {str(e)}")
61
- raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
62
-
63
  @app.post("/translate")
64
  async def translate(text: str):
65
  if not text:
@@ -68,12 +51,15 @@ async def translate(text: str):
68
  # Deteksi bahasa
69
  source_lang = detect_language(text)
70
  logger.info(f"Detected source language: {source_lang}")
71
- # Jika sudah Inggris, kembalikan teks asli
72
  if source_lang == "en":
73
  return {"translated_text": text}
74
  # Terjemahkan ke Inggris
75
- translated_text = translate_to_english(text, source_lang)
76
- return {"translated_text": translated_text}
 
 
 
77
  except Exception as e:
78
  logger.error(f"Processing failed: {str(e)}")
79
  raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")
 
1
  from fastapi import FastAPI, HTTPException
2
+ from transformers import pipeline
3
  import langdetect
4
  import logging
5
  import os
6
 
7
+ # Atur direktori cache
8
  os.environ["HF_HOME"] = "/app/cache"
9
  os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
10
 
 
13
  logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
16
+ # Daftar model untuk bahasa yang didukung
17
+ MODEL_MAP = {
18
+ "id": "Helsinki-NLP/opus-mt-id-en", # Indonesia ke Inggris
19
+ "th": "Helsinki-NLP/opus-mt-th-en", # Thailand ke Inggris
20
+ "fr": "Helsinki-NLP/opus-mt-fr-en", # Prancis ke Inggris
21
+ "es": "Helsinki-NLP/opus-mt-es-en", # Spanyol ke Inggris
22
+ "ja": "Helsinki-NLP/opus-mt-ja-en", # Jepang ke Inggris
23
+ "zh": "Helsinki-NLP/opus-mt-zh-en", # Mandarin ke Inggris
24
+ }
25
+
26
+ # Inisialisasi pipeline untuk setiap model
27
+ translators = {}
28
  try:
29
+ for lang, model_name in MODEL_MAP.items():
30
+ logger.info(f"Loading model for {lang}...")
31
+ translators[lang] = pipeline("translation", model=model_name)
32
+ logger.info(f"Model for {lang} loaded successfully")
 
 
33
  except Exception as e:
34
+ logger.error(f"Model initialization failed: {str(e)}")
35
  raise Exception(f"Model initialization failed: {str(e)}")
36
 
37
  def detect_language(text: str) -> str:
38
  """Deteksi bahasa dari teks menggunakan langdetect."""
39
  try:
40
  lang = langdetect.detect(text)
41
+ return lang if lang in MODEL_MAP else "en" # Default ke Inggris jika tidak didukung
 
 
 
 
 
 
 
 
 
 
 
42
  except Exception as e:
43
  logger.warning(f"Language detection failed: {str(e)}, defaulting to English")
44
  return "en"
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  @app.post("/translate")
47
  async def translate(text: str):
48
  if not text:
 
51
  # Deteksi bahasa
52
  source_lang = detect_language(text)
53
  logger.info(f"Detected source language: {source_lang}")
54
+ # Jika sudah Inggris atau bahasa tidak didukung, kembalikan teks asli
55
  if source_lang == "en":
56
  return {"translated_text": text}
57
  # Terjemahkan ke Inggris
58
+ translator = translators.get(source_lang)
59
+ if not translator:
60
+ raise HTTPException(status_code=400, detail=f"Translation not supported for language: {source_lang}")
61
+ result = translator(text)
62
+ return {"translated_text": result[0]["translation_text"]}
63
  except Exception as e:
64
  logger.error(f"Processing failed: {str(e)}")
65
  raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")