feliksius commited on
Commit
e8b7c49
·
verified ·
1 Parent(s): 373ff5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -11
app.py CHANGED
@@ -4,56 +4,115 @@ import langdetect
4
  import logging
5
  import os
6
 
 
 
7
  os.environ["HF_HOME"] = "/app/cache"
8
  os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
9
 
10
  app = FastAPI()
11
 
 
12
  logging.basicConfig(level=logging.INFO)
13
  logger = logging.getLogger(__name__)
14
 
 
15
  MODEL_MAP = {
16
  "id": "Helsinki-NLP/opus-mt-id-en",
17
  "th": "Helsinki-NLP/opus-mt-th-en",
18
  "fr": "Helsinki-NLP/opus-mt-fr-en",
19
  "es": "Helsinki-NLP/opus-mt-es-en",
20
  "ja": "Helsinki-NLP/opus-mt-ja-en",
21
- "zh": "Helsinki-NLP/opus-mt-zh-en", # Satu entri untuk Mandarin (Sederhana dan Tradisional)
 
22
  "vi": "Helsinki-NLP/opus-mt-vi-en",
23
  }
24
 
25
  translators = {}
26
  try:
 
27
  for lang, model_name in MODEL_MAP.items():
28
- logger.info(f"Loading model for {lang}...")
 
 
29
  translators[lang] = pipeline("translation", model=model_name)
30
- logger.info(f"Model for {lang} loaded successfully")
31
  except Exception as e:
 
32
  logger.error(f"Model initialization failed: {str(e)}")
 
33
  raise Exception(f"Model initialization failed: {str(e)}")
34
 
 
 
 
 
 
 
 
35
  def detect_language(text: str) -> str:
36
  try:
37
- lang = langdetect.detect(text)
38
- return lang if lang in MODEL_MAP else "en"
 
 
 
 
 
 
 
 
 
 
39
  except Exception as e:
40
- logger.warning(f"Language detection failed: {str(e)}, defaulting to English")
41
  return "en"
42
 
 
 
 
 
 
43
  @app.post("/translate")
44
  async def translate(text: str):
 
 
 
 
45
  if not text:
46
- raise HTTPException(status_code=400, detail="Text input is required")
 
47
  try:
 
48
  source_lang = detect_language(text)
49
- logger.info(f"Detected source language: {source_lang}")
 
 
50
  if source_lang == "en":
 
51
  return {"translated_text": text}
 
 
52
  translator = translators.get(source_lang)
 
 
53
  if not translator:
54
- raise HTTPException(status_code=400, detail=f"Translation not supported for language: {source_lang}")
 
 
 
 
 
 
 
55
  result = translator(text)
56
- return {"translated_text": result[0]["translation_text"]}
 
 
 
 
 
 
57
  except Exception as e:
58
- logger.error(f"Processing failed: {str(e)}")
 
59
  raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")
 
4
  import logging
5
  import os
6
 
7
+ # Set environment variables for Hugging Face cache
8
+ # Ini penting agar model di-cache di lokasi yang benar di dalam container Hugging Face Space
9
  os.environ["HF_HOME"] = "/app/cache"
10
  os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
11
 
12
  app = FastAPI()
13
 
14
+ # Konfigurasi logging untuk melihat pesan debug di log Space kamu
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
18
+ # Peta model untuk setiap bahasa yang didukung
19
  MODEL_MAP = {
20
  "id": "Helsinki-NLP/opus-mt-id-en",
21
  "th": "Helsinki-NLP/opus-mt-th-en",
22
  "fr": "Helsinki-NLP/opus-mt-fr-en",
23
  "es": "Helsinki-NLP/opus-mt-es-en",
24
  "ja": "Helsinki-NLP/opus-mt-ja-en",
25
+ # Entri tunggal untuk Mandarin, kita akan normalisasi deteksi bahasanya
26
+ "zh": "Helsinki-NLP/opus-mt-zh-en",
27
  "vi": "Helsinki-NLP/opus-mt-vi-en",
28
  }
29
 
30
  translators = {}
31
  try:
32
+ # Inisialisasi semua model saat aplikasi dimulai
33
  for lang, model_name in MODEL_MAP.items():
34
+ logger.info(f"Loading model for {lang} from {model_name}...")
35
+ # Pastikan kita menggunakan device="cpu" atau "cuda" jika GPU tersedia
36
+ # Untuk Hugging Face Space gratis biasanya CPU, jadi lebih aman tidak specify device
37
  translators[lang] = pipeline("translation", model=model_name)
38
+ logger.info(f"Model for {lang} loaded successfully.")
39
  except Exception as e:
40
+ # Tangani kegagalan inisialisasi model
41
  logger.error(f"Model initialization failed: {str(e)}")
42
+ # Hentikan aplikasi jika model gagal dimuat, karena aplikasi tidak akan berfungsi
43
  raise Exception(f"Model initialization failed: {str(e)}")
44
 
45
+ ---
46
+
47
+ ## Fungsi Deteksi Bahasa yang Ditingkatkan
48
+
49
+ Ini adalah bagian kunci perbaikannya. Kita akan **normalisasi kode bahasa Mandarin** yang mungkin dideteksi oleh `langdetect`.
50
+
51
+ ```python
52
  def detect_language(text: str) -> str:
53
  try:
54
+ detected_lang = langdetect.detect(text)
55
+ logger.info(f"langdetect detected: '{detected_lang}' for text: '{text[:50]}...'") # Log deteksi awal
56
+
57
+ # Normalisasi untuk bahasa Mandarin:
58
+ # Jika langdetect mengembalikan 'zh-cn', 'zh-tw', 'zh-hk', dll.,
59
+ # kita paksa menjadi 'zh' agar sesuai dengan kunci di MODEL_MAP.
60
+ if detected_lang.startswith('zh'):
61
+ return 'zh'
62
+
63
+ # Jika bahasa terdeteksi ada di MODEL_MAP, gunakan itu.
64
+ # Jika tidak, default ke 'en' (bahasa Inggris).
65
+ return detected_lang if detected_lang in MODEL_MAP else "en"
66
  except Exception as e:
67
+ logger.warning(f"Language detection failed for text: '{text[:50]}...' Error: {str(e)}. Defaulting to English.")
68
  return "en"
69
 
70
+ ---
71
+
72
+ ## Endpoint API untuk Terjemahan
73
+
74
+ ```python
75
  @app.post("/translate")
76
  async def translate(text: str):
77
+ """
78
+ Menerima teks dan mengembalikannya dalam bahasa Inggris.
79
+ Secara otomatis mendeteksi bahasa sumber.
80
+ """
81
  if not text:
82
+ raise HTTPException(status_code=400, detail="Text input is required.")
83
+
84
  try:
85
+ # Deteksi bahasa sumber dari teks
86
  source_lang = detect_language(text)
87
+ logger.info(f"Determined source language: '{source_lang}' for translation.")
88
+
89
+ # Jika bahasa sumber sudah Bahasa Inggris, kembalikan teks aslinya
90
  if source_lang == "en":
91
+ logger.info("Source language is English, returning original text.")
92
  return {"translated_text": text}
93
+
94
+ # Dapatkan translator yang sesuai dari kamus translators
95
  translator = translators.get(source_lang)
96
+
97
+ # Jika tidak ada translator yang mendukung bahasa yang terdeteksi
98
  if not translator:
99
+ logger.error(f"No translator found for language: '{source_lang}'.")
100
+ raise HTTPException(
101
+ status_code=400,
102
+ detail=f"Translation not supported for language: {source_lang}."
103
+ )
104
+
105
+ # Lakukan terjemahan
106
+ logger.info(f"Translating from {source_lang}...")
107
  result = translator(text)
108
+ translated_text = result[0]["translation_text"]
109
+ logger.info(f"Translation successful. Original: '{text[:50]}...', Translated: '{translated_text[:50]}...'")
110
+
111
+ return {"translated_text": translated_text}
112
+ except HTTPException as e:
113
+ # Tangani HTTPExceptions yang sudah kita definisikan sebelumnya
114
+ raise e
115
  except Exception as e:
116
+ # Tangani error tak terduga lainnya
117
+ logger.error(f"An unexpected error occurred during processing: {str(e)}", exc_info=True)
118
  raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")