feliksius commited on
Commit
d94a3aa
·
verified ·
1 Parent(s): b4b1ecb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -34
app.py CHANGED
@@ -3,9 +3,7 @@ from transformers import pipeline
3
  import langdetect
4
  import logging
5
  import os
6
- import opencc # Untuk konversi Mandarin Sederhana/Tradisional
7
 
8
- # Atur direktori cache
9
  os.environ["HF_HOME"] = "/app/cache"
10
  os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
11
 
@@ -14,19 +12,16 @@ app = FastAPI()
14
  logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
16
 
17
- # Daftar model untuk bahasa yang didukung
18
  MODEL_MAP = {
19
- "id": "Helsinki-NLP/opus-mt-id-en", # Indonesia ke Inggris
20
- "th": "Helsinki-NLP/opus-mt-th-en", # Thailand ke Inggris
21
- "fr": "Helsinki-NLP/opus-mt-fr-en", # Prancis ke Inggris
22
- "es": "Helsinki-NLP/opus-mt-es-en", # Spanyol ke Inggris
23
- "ja": "Helsinki-NLP/opus-mt-ja-en", # Jepang ke Inggris
24
- "zh-CN": "Helsinki-NLP/opus-mt-zh-en", # Mandarin Sederhana ke Inggris
25
- "zh-TW": "Helsinki-NLP/opus-mt-zh-en", # Mandarin Tradisional ke Inggris
26
- "vi": "Helsinki-NLP/opus-mt-vi-en", # Vietnam ke Inggris
27
  }
28
 
29
- # Inisialisasi pipeline untuk setiap model
30
  translators = {}
31
  try:
32
  for lang, model_name in MODEL_MAP.items():
@@ -37,22 +32,10 @@ except Exception as e:
37
  logger.error(f"Model initialization failed: {str(e)}")
38
  raise Exception(f"Model initialization failed: {str(e)}")
39
 
40
- # Inisialisasi konverter OpenCC untuk Mandarin Tradisional ke Sederhana
41
- converter = opencc.OpenCC('t2s') # Tradisional ke Sederhana
42
-
43
- def is_traditional_chinese(text: str) -> bool:
44
- """Cek apakah teks menggunakan karakter Mandarin Tradisional."""
45
- traditional_chars = set('繁體字') # Contoh karakter Tradisional
46
- return any(char in traditional_chars for char in text)
47
-
48
  def detect_language(text: str) -> str:
49
- """Deteksi bahasa dari teks menggunakan langdetect."""
50
  try:
51
  lang = langdetect.detect(text)
52
- # Jika terdeteksi sebagai Mandarin (zh), cek apakah Tradisional atau Sederhana
53
- if lang == "zh":
54
- return "zh-TW" if is_traditional_chinese(text) else "zh-CN"
55
- return lang if lang in MODEL_MAP else "en" # Default ke Inggris jika tidak didukung
56
  except Exception as e:
57
  logger.warning(f"Language detection failed: {str(e)}, defaulting to English")
58
  return "en"
@@ -62,22 +45,14 @@ async def translate(text: str):
62
  if not text:
63
  raise HTTPException(status_code=400, detail="Text input is required")
64
  try:
65
- # Deteksi bahasa
66
  source_lang = detect_language(text)
67
  logger.info(f"Detected source language: {source_lang}")
68
- # Jika sudah Inggris atau bahasa tidak didukung, kembalikan teks asli
69
  if source_lang == "en":
70
  return {"translated_text": text}
71
- # Jika Mandarin Tradisional, konversi ke Sederhana
72
- input_text = text
73
- if source_lang == "zh-TW":
74
- input_text = converter.convert(text)
75
- logger.info("Converted Traditional Chinese to Simplified Chinese")
76
- # Terjemahkan ke Inggris
77
  translator = translators.get(source_lang)
78
  if not translator:
79
  raise HTTPException(status_code=400, detail=f"Translation not supported for language: {source_lang}")
80
- result = translator(input_text)
81
  return {"translated_text": result[0]["translation_text"]}
82
  except Exception as e:
83
  logger.error(f"Processing failed: {str(e)}")
 
3
  import langdetect
4
  import logging
5
  import os
 
6
 
 
7
  os.environ["HF_HOME"] = "/app/cache"
8
  os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
9
 
 
12
  logging.basicConfig(level=logging.INFO)
13
  logger = logging.getLogger(__name__)
14
 
 
15
  MODEL_MAP = {
16
+ "id": "Helsinki-NLP/opus-mt-id-en",
17
+ "th": "Helsinki-NLP/opus-mt-th-en",
18
+ "fr": "Helsinki-NLP/opus-mt-fr-en",
19
+ "es": "Helsinki-NLP/opus-mt-es-en",
20
+ "ja": "Helsinki-NLP/opus-mt-ja-en",
21
+ "zh": "Helsinki-NLP/opus-mt-zh-en", # Gunakan zh untuk Sederhana dan Tradisional
22
+ "vi": "Helsinki-NLP/opus-mt-vi-en",
 
23
  }
24
 
 
25
  translators = {}
26
  try:
27
  for lang, model_name in MODEL_MAP.items():
 
32
  logger.error(f"Model initialization failed: {str(e)}")
33
  raise Exception(f"Model initialization failed: {str(e)}")
34
 
 
 
 
 
 
 
 
 
35
  def detect_language(text: str) -> str:
 
36
  try:
37
  lang = langdetect.detect(text)
38
+ return lang if lang in MODEL_MAP else "en"
 
 
 
39
  except Exception as e:
40
  logger.warning(f"Language detection failed: {str(e)}, defaulting to English")
41
  return "en"
 
45
  if not text:
46
  raise HTTPException(status_code=400, detail="Text input is required")
47
  try:
 
48
  source_lang = detect_language(text)
49
  logger.info(f"Detected source language: {source_lang}")
 
50
  if source_lang == "en":
51
  return {"translated_text": text}
 
 
 
 
 
 
52
  translator = translators.get(source_lang)
53
  if not translator:
54
  raise HTTPException(status_code=400, detail=f"Translation not supported for language: {source_lang}")
55
+ result = translator(text)
56
  return {"translated_text": result[0]["translation_text"]}
57
  except Exception as e:
58
  logger.error(f"Processing failed: {str(e)}")