feliksius commited on
Commit
a371d81
·
verified ·
1 Parent(s): 0b1a01c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -32
app.py CHANGED
@@ -3,7 +3,6 @@ from transformers import pipeline
3
  import langdetect
4
  import logging
5
  import os
6
- import opencc # Untuk konversi Mandarin Tradisional ke Sederhana
7
 
8
  os.environ["HF_HOME"] = "/app/cache"
9
  os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
@@ -14,14 +13,13 @@ logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
16
  MODEL_MAP = {
17
- "id": "Helsinki-NLP/opus-mt-id-en", # Indonesia ke Inggris
18
- "th": "Helsinki-NLP/opus-mt-th-en", # Thailand ke Inggris
19
- "fr": "Helsinki-NLP/opus-mt-fr-en", # Prancis ke Inggris
20
- "es": "Helsinki-NLP/opus-mt-es-en", # Spanyol ke Inggris
21
- "ja": "Helsinki-NLP/opus-mt-ja-en", # Jepang ke Inggris
22
- "zh-CN": "Helsinki-NLP/opus-mt-zh-en", # Mandarin Sederhana ke Inggris
23
- "zh-TW": "Helsinki-NLP/opus-mt-zh-en", # Mandarin Tradisional ke Inggris
24
- "vi": "Helsinki-NLP/opus-mt-vi-en", # Vietnam ke Inggris
25
  }
26
 
27
  translators = {}
@@ -34,23 +32,10 @@ except Exception as e:
34
  logger.error(f"Model initialization failed: {str(e)}")
35
  raise Exception(f"Model initialization failed: {str(e)}")
36
 
37
- # Inisialisasi konverter OpenCC untuk Tradisional ke Sederhana
38
- converter = opencc.OpenCC('t2s') # t2s = Traditional to Simplified
39
-
40
- def is_traditional_chinese(text: str) -> bool:
41
- """Cek apakah teks menggunakan karakter Mandarin Tradisional."""
42
- # Contoh sederhana: deteksi beberapa karakter Tradisional
43
- traditional_chars = set('繁體字') # Bisa diperluas dengan daftar karakter yang lebih lengkap
44
- return any(char in traditional_chars for char in text)
45
-
46
  def detect_language(text: str) -> str:
47
- """Deteksi bahasa dari teks menggunakan langdetect."""
48
  try:
49
  lang = langdetect.detect(text)
50
- # Jika terdeteksi sebagai Mandarin (zh), cek apakah Tradisional atau Sederhana
51
- if lang == "zh":
52
- return "zh-TW" if is_traditional_chinese(text) else "zh-CN"
53
- return lang if lang in MODEL_MAP else "en" # Default ke Inggris jika tidak didukung
54
  except Exception as e:
55
  logger.warning(f"Language detection failed: {str(e)}, defaulting to English")
56
  return "en"
@@ -60,22 +45,14 @@ async def translate(text: str):
60
  if not text:
61
  raise HTTPException(status_code=400, detail="Text input is required")
62
  try:
63
- # Deteksi bahasa
64
  source_lang = detect_language(text)
65
  logger.info(f"Detected source language: {source_lang}")
66
- # Jika sudah Inggris, kembalikan teks asli
67
  if source_lang == "en":
68
  return {"translated_text": text}
69
- # Jika Mandarin Tradisional, konversi ke Sederhana
70
- input_text = text
71
- if source_lang == "zh-TW":
72
- input_text = converter.convert(text)
73
- logger.info("Converted Traditional Chinese to Simplified Chinese")
74
- # Terjemahkan ke Inggris
75
  translator = translators.get(source_lang)
76
  if not translator:
77
  raise HTTPException(status_code=400, detail=f"Translation not supported for language: {source_lang}")
78
- result = translator(input_text)
79
  return {"translated_text": result[0]["translation_text"]}
80
  except Exception as e:
81
  logger.error(f"Processing failed: {str(e)}")
 
3
  import langdetect
4
  import logging
5
  import os
 
6
 
7
  os.environ["HF_HOME"] = "/app/cache"
8
  os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
 
13
  logger = logging.getLogger(__name__)
14
 
15
  MODEL_MAP = {
16
+ "id": "Helsinki-NLP/opus-mt-id-en",
17
+ "th": "Helsinki-NLP/opus-mt-th-en",
18
+ "fr": "Helsinki-NLP/opus-mt-fr-en",
19
+ "es": "Helsinki-NLP/opus-mt-es-en",
20
+ "ja": "Helsinki-NLP/opus-mt-ja-en",
21
+ "zh": "Helsinki-NLP/opus-mt-zh-en", # Satu entri untuk Mandarin (Sederhana dan Tradisional)
22
+ "vi": "Helsinki-NLP/opus-mt-vi-en",
 
23
  }
24
 
25
  translators = {}
 
32
  logger.error(f"Model initialization failed: {str(e)}")
33
  raise Exception(f"Model initialization failed: {str(e)}")
34
 
 
 
 
 
 
 
 
 
 
35
  def detect_language(text: str) -> str:
 
36
  try:
37
  lang = langdetect.detect(text)
38
+ return lang if lang in MODEL_MAP else "en"
 
 
 
39
  except Exception as e:
40
  logger.warning(f"Language detection failed: {str(e)}, defaulting to English")
41
  return "en"
 
45
  if not text:
46
  raise HTTPException(status_code=400, detail="Text input is required")
47
  try:
 
48
  source_lang = detect_language(text)
49
  logger.info(f"Detected source language: {source_lang}")
 
50
  if source_lang == "en":
51
  return {"translated_text": text}
 
 
 
 
 
 
52
  translator = translators.get(source_lang)
53
  if not translator:
54
  raise HTTPException(status_code=400, detail=f"Translation not supported for language: {source_lang}")
55
+ result = translator(text)
56
  return {"translated_text": result[0]["translation_text"]}
57
  except Exception as e:
58
  logger.error(f"Processing failed: {str(e)}")