feliksius commited on
Commit
6ddc4c2
·
verified ·
1 Parent(s): db6cecb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -62
app.py CHANGED
@@ -7,20 +7,20 @@ import logging
7
  import os
8
  from typing import Optional, Dict
9
  import re
10
- from functools import lru_cache
11
  import asyncio
 
12
 
13
- # --- 1. Konfigurasi Awal (Tetap Sama) ---
14
- # Create necessary directories
15
  os.makedirs("./cache", exist_ok=True)
16
  os.makedirs("./logs", exist_ok=True)
17
 
18
- # Set environment variables for Hugging Face cache
19
  os.environ["HF_HOME"] = "./cache"
20
  os.environ["TRANSFORMERS_CACHE"] = "./cache"
21
 
22
  # Environment configuration
23
- DEVICE = -1 # Selalu CPU untuk efisiensi di banyak environment
24
  MAX_TEXT_LENGTH = int(os.getenv("MAX_TEXT_LENGTH", "5000"))
25
 
26
  # Configure logging
@@ -44,7 +44,7 @@ PROTECTED_TERMS = ["2030 Aspirations", "Griffith"]
44
  # Cache untuk translator (pipeline)
45
  translators: Dict[str, pipeline] = {}
46
 
47
- # --- Pydantic Models (Tetap Sama) ---
48
  class TranslationRequest(BaseModel):
49
  text: str
50
  source_lang_override: Optional[str] = None
@@ -53,13 +53,10 @@ class TranslationResponse(BaseModel):
53
  translated_text: str
54
  source_language: Optional[str] = None
55
 
56
- # --- 2. Inisialisasi Aplikasi FastAPI ---
57
- app = FastAPI(title="Translation Service API")
58
-
59
- # --- 3. OPTIMASI: Prapemuatan Model saat Startup ---
60
- @app.on_event("startup")
61
- async def startup_event():
62
- """Memuat semua model translasi saat aplikasi dimulai."""
63
  logger.info("Memulai prapemuatan model translasi...")
64
  for lang, model_name in MODEL_MAP.items():
65
  try:
@@ -69,7 +66,12 @@ async def startup_event():
69
  except Exception as e:
70
  logger.error(f"Gagal memuat model untuk {lang}: {str(e)}")
71
  logger.info("Semua model telah dimuat.")
 
 
 
 
72
 
 
73
  def get_translator(lang: str) -> pipeline:
74
  """Mengambil translator yang sudah dimuat dari cache."""
75
  translator = translators.get(lang)
@@ -78,12 +80,10 @@ def get_translator(lang: str) -> pipeline:
78
  raise HTTPException(status_code=500, detail=f"Model terjemahan untuk '{lang}' tidak tersedia.")
79
  return translator
80
 
81
- # --- Fungsi Utility (Hampir Sama, Sedikit Perbaikan) ---
82
- @lru_cache(maxsize=128) # Cache lebih besar jika perlu
83
  def detect_language(text: str) -> str:
84
  """Deteksi bahasa dengan cache."""
85
  try:
86
- # Potong teks untuk deteksi yang lebih cepat jika teks sangat panjang
87
  preview_text = text[:500]
88
  detected_lang = langdetect.detect(preview_text)
89
  if detected_lang.startswith('zh'):
@@ -94,100 +94,77 @@ def detect_language(text: str) -> str:
94
  return "en"
95
 
96
  def protect_terms(text: str, protected_terms: list) -> tuple[str, dict]:
97
- """Mengganti istilah yang dilindungi dengan placeholder."""
98
  replacements = {}
99
  for i, term in enumerate(protected_terms):
100
  placeholder = f"__PROTECTED_{i}__"
101
- # Gunakan word boundary (\b) untuk memastikan hanya kata utuh yang diganti
102
  modified_text = re.sub(r'\b' + re.escape(term) + r'\b', placeholder, text, flags=re.IGNORECASE)
103
- # Hanya tambahkan ke replacement jika ada perubahan
104
  if modified_text != text:
105
- replacements[placeholder] = term
106
- text = modified_text
107
  return text, replacements
108
 
109
  def restore_terms(text: str, replacements: dict) -> str:
110
- """Mengembalikan istilah yang dilindungi."""
111
  for placeholder, term in replacements.items():
112
  text = text.replace(placeholder, term)
113
  return text
114
 
115
- # --- 4. OPTIMASI: Fungsi Inti dan Endpoint API menjadi Full Asynchronous ---
116
  async def perform_translation(text: str, source_lang_override: Optional[str] = None) -> TranslationResponse:
117
- """Fungsi inti translasi yang sepenuhnya async."""
118
  if not text or not text.strip():
119
  raise HTTPException(status_code=400, detail="Teks input tidak boleh kosong.")
120
-
121
  if len(text) > MAX_TEXT_LENGTH:
122
  raise HTTPException(
123
  status_code=413,
124
  detail=f"Teks terlalu panjang. Panjang maksimal yang diizinkan: {MAX_TEXT_LENGTH}."
125
  )
126
-
127
  try:
128
- # Tentukan bahasa sumber
129
  if source_lang_override and source_lang_override in MODEL_MAP:
130
  source_lang = source_lang_override
131
  else:
132
  source_lang = detect_language(text)
133
 
134
- # Jika bahasa sumber adalah Inggris, kembalikan teks asli
135
  if source_lang == "en":
136
  return TranslationResponse(translated_text=text, source_language=source_lang)
137
 
138
- # Ambil translator
139
  translator = get_translator(source_lang)
140
-
141
- # Lindungi istilah sebelum translasi
142
  modified_text, replacements = protect_terms(text, PROTECTED_TERMS)
143
 
144
- # --- OPTIMASI KUNCI: Jalankan model di thread terpisah ---
145
- # Ini mencegah pipeline yang berat memblokir event loop utama
146
  def _translate_task():
147
  return translator(modified_text, max_length=512, num_beams=4)
148
 
149
  result = await asyncio.to_thread(_translate_task)
150
  translated_text = result[0]["translation_text"]
151
-
152
- # Kembalikan istilah yang dilindungi
153
  final_text = restore_terms(translated_text, replacements)
154
 
155
  return TranslationResponse(translated_text=final_text, source_language=source_lang)
156
-
157
  except HTTPException as e:
158
- raise e # Re-raise HTTPException agar status code-nya benar
159
  except Exception as e:
160
  logger.error(f"Terjadi kesalahan saat translasi: {str(e)}")
161
  raise HTTPException(status_code=500, detail=f"Proses translasi gagal: {str(e)}")
162
 
163
  @app.post("/translate", response_model=TranslationResponse)
164
  async def translate_api(request: TranslationRequest):
165
- """Endpoint API untuk translasi."""
166
  return await perform_translation(request.text, request.source_lang_override)
167
 
168
  @app.get("/health")
169
  async def health_check():
170
  return {"status": "healthy", "loaded_models": list(translators.keys())}
171
 
172
-
173
- # --- 5. OPTIMASI: Handler Gradio menjadi Asynchronous ---
174
  async def translate_gradio(text: str, source_lang: str = "auto"):
175
- """Wrapper Gradio yang sekarang async dan lebih efisien."""
176
  if not text or not text.strip():
177
  return "Masukkan teks untuk diterjemahkan.", "N/A"
178
-
179
  try:
180
  source_lang_param = source_lang if source_lang != "auto" else None
181
  result = await perform_translation(text, source_lang_param)
182
  return result.translated_text, result.source_language or "Unknown"
183
-
184
  except HTTPException as e:
185
  return f"Error: {e.detail}", "Error"
186
  except Exception as e:
187
  return f"Error: {str(e)}", "Error"
188
 
189
- # --- 6. OPTIMASI: Mount Gradio ke FastAPI ---
190
- # Fungsi untuk membuat UI Gradio tetap sama
191
  def create_gradio_interface():
192
  with gr.Blocks(
193
  title="Multi-Language Translation Service",
@@ -199,7 +176,6 @@ def create_gradio_interface():
199
  Terjemahkan teks dari **Thai**, **Jepang**, **Mandarin**, atau **Vietnam** ke **Inggris**.
200
  ✨ Fitur: Deteksi bahasa otomatis • Perlindungan istilah • Model Helsinki-NLP yang cepat.
201
  """)
202
-
203
  with gr.Row():
204
  with gr.Column(scale=1):
205
  text_input = gr.Textbox(label="📝 Input Text", placeholder="Enter text to translate...", lines=6, max_lines=10)
@@ -212,11 +188,9 @@ def create_gradio_interface():
212
  value="auto", label="Source Language"
213
  )
214
  translate_btn = gr.Button("🚀 Translate", variant="primary", size="lg")
215
-
216
  with gr.Column(scale=1):
217
  output_text = gr.Textbox(label="🎯 Translation Result", lines=6, max_lines=10, interactive=False)
218
  detected_lang = gr.Textbox(label="🔍 Detected Language", interactive=False, max_lines=1)
219
-
220
  gr.Examples(
221
  examples=[
222
  ["สวัสดีครับ ยินดีที่ได้รู้จัก การพัฒนา 2030 Aspirations เป็นเป้าหมายสำคัญ", "th"],
@@ -226,24 +200,13 @@ def create_gradio_interface():
226
  ],
227
  inputs=[text_input, lang_dropdown],
228
  outputs=[output_text, detected_lang],
229
- fn=translate_gradio, # Sekarang memanggil fungsi async secara langsung
230
  cache_examples=False
231
  )
232
-
233
- # Event handlers sekarang bisa langsung memanggil fungsi async
234
  translate_btn.click(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang])
235
  text_input.submit(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang])
236
-
237
  return interface
238
 
239
- # Buat UI Gradio
240
  gradio_app = create_gradio_interface()
241
-
242
- # Mount Gradio app ke FastAPI di path "/"
243
- # Ini adalah cara yang benar untuk mengintegrasikan keduanya
244
- app = gr.mount_gradio_app(app, gradio_app, path="/")
245
-
246
-
247
- # Untuk menjalankan:
248
- # Simpan file ini sebagai app.py dan jalankan dengan uvicorn
249
- # > uvicorn app:app --reload --port 7860
 
7
  import os
8
  from typing import Optional, Dict
9
  import re
10
+ from functools import lru_cache, partial
11
  import asyncio
12
+ from contextlib import asynccontextmanager
13
 
14
+ # --- 1. Konfigurasi Awal ---
 
15
  os.makedirs("./cache", exist_ok=True)
16
  os.makedirs("./logs", exist_ok=True)
17
 
18
+ # Set environment variables untuk Hugging Face cache
19
  os.environ["HF_HOME"] = "./cache"
20
  os.environ["TRANSFORMERS_CACHE"] = "./cache"
21
 
22
  # Environment configuration
23
+ DEVICE = -1 # Selalu CPU untuk kompatibilitas
24
  MAX_TEXT_LENGTH = int(os.getenv("MAX_TEXT_LENGTH", "5000"))
25
 
26
  # Configure logging
 
44
  # Cache untuk translator (pipeline)
45
  translators: Dict[str, pipeline] = {}
46
 
47
+ # --- Pydantic Models ---
48
  class TranslationRequest(BaseModel):
49
  text: str
50
  source_lang_override: Optional[str] = None
 
53
  translated_text: str
54
  source_language: Optional[str] = None
55
 
56
+ # --- Lifespan Event Handler ---
57
+ @asynccontextmanager
58
+ async def lifespan(app: FastAPI):
59
+ """Handler lifecycle aplikasi menggunakan lifespan"""
 
 
 
60
  logger.info("Memulai prapemuatan model translasi...")
61
  for lang, model_name in MODEL_MAP.items():
62
  try:
 
66
  except Exception as e:
67
  logger.error(f"Gagal memuat model untuk {lang}: {str(e)}")
68
  logger.info("Semua model telah dimuat.")
69
+ yield # Aplikasi berjalan di sini
70
+
71
+ # --- Inisialisasi Aplikasi FastAPI dengan Lifespan ---
72
+ app = FastAPI(title="Translation Service API", lifespan=lifespan)
73
 
74
+ # --- Fungsi Utility ---
75
  def get_translator(lang: str) -> pipeline:
76
  """Mengambil translator yang sudah dimuat dari cache."""
77
  translator = translators.get(lang)
 
80
  raise HTTPException(status_code=500, detail=f"Model terjemahan untuk '{lang}' tidak tersedia.")
81
  return translator
82
 
83
+ @lru_cache(maxsize=128)
 
84
  def detect_language(text: str) -> str:
85
  """Deteksi bahasa dengan cache."""
86
  try:
 
87
  preview_text = text[:500]
88
  detected_lang = langdetect.detect(preview_text)
89
  if detected_lang.startswith('zh'):
 
94
  return "en"
95
 
96
  def protect_terms(text: str, protected_terms: list) -> tuple[str, dict]:
 
97
  replacements = {}
98
  for i, term in enumerate(protected_terms):
99
  placeholder = f"__PROTECTED_{i}__"
 
100
  modified_text = re.sub(r'\b' + re.escape(term) + r'\b', placeholder, text, flags=re.IGNORECASE)
 
101
  if modified_text != text:
102
+ replacements[placeholder] = term
103
+ text = modified_text
104
  return text, replacements
105
 
106
  def restore_terms(text: str, replacements: dict) -> str:
 
107
  for placeholder, term in replacements.items():
108
  text = text.replace(placeholder, term)
109
  return text
110
 
111
+ # --- Fungsi Inti dan Endpoint API ---
112
  async def perform_translation(text: str, source_lang_override: Optional[str] = None) -> TranslationResponse:
 
113
  if not text or not text.strip():
114
  raise HTTPException(status_code=400, detail="Teks input tidak boleh kosong.")
 
115
  if len(text) > MAX_TEXT_LENGTH:
116
  raise HTTPException(
117
  status_code=413,
118
  detail=f"Teks terlalu panjang. Panjang maksimal yang diizinkan: {MAX_TEXT_LENGTH}."
119
  )
 
120
  try:
 
121
  if source_lang_override and source_lang_override in MODEL_MAP:
122
  source_lang = source_lang_override
123
  else:
124
  source_lang = detect_language(text)
125
 
 
126
  if source_lang == "en":
127
  return TranslationResponse(translated_text=text, source_language=source_lang)
128
 
 
129
  translator = get_translator(source_lang)
 
 
130
  modified_text, replacements = protect_terms(text, PROTECTED_TERMS)
131
 
 
 
132
  def _translate_task():
133
  return translator(modified_text, max_length=512, num_beams=4)
134
 
135
  result = await asyncio.to_thread(_translate_task)
136
  translated_text = result[0]["translation_text"]
 
 
137
  final_text = restore_terms(translated_text, replacements)
138
 
139
  return TranslationResponse(translated_text=final_text, source_language=source_lang)
 
140
  except HTTPException as e:
141
+ raise e
142
  except Exception as e:
143
  logger.error(f"Terjadi kesalahan saat translasi: {str(e)}")
144
  raise HTTPException(status_code=500, detail=f"Proses translasi gagal: {str(e)}")
145
 
146
  @app.post("/translate", response_model=TranslationResponse)
147
  async def translate_api(request: TranslationRequest):
 
148
  return await perform_translation(request.text, request.source_lang_override)
149
 
150
  @app.get("/health")
151
  async def health_check():
152
  return {"status": "healthy", "loaded_models": list(translators.keys())}
153
 
154
+ # --- Handler Gradio Async ---
 
155
  async def translate_gradio(text: str, source_lang: str = "auto"):
 
156
  if not text or not text.strip():
157
  return "Masukkan teks untuk diterjemahkan.", "N/A"
 
158
  try:
159
  source_lang_param = source_lang if source_lang != "auto" else None
160
  result = await perform_translation(text, source_lang_param)
161
  return result.translated_text, result.source_language or "Unknown"
 
162
  except HTTPException as e:
163
  return f"Error: {e.detail}", "Error"
164
  except Exception as e:
165
  return f"Error: {str(e)}", "Error"
166
 
167
+ # --- UI Gradio ---
 
168
  def create_gradio_interface():
169
  with gr.Blocks(
170
  title="Multi-Language Translation Service",
 
176
  Terjemahkan teks dari **Thai**, **Jepang**, **Mandarin**, atau **Vietnam** ke **Inggris**.
177
  ✨ Fitur: Deteksi bahasa otomatis • Perlindungan istilah • Model Helsinki-NLP yang cepat.
178
  """)
 
179
  with gr.Row():
180
  with gr.Column(scale=1):
181
  text_input = gr.Textbox(label="📝 Input Text", placeholder="Enter text to translate...", lines=6, max_lines=10)
 
188
  value="auto", label="Source Language"
189
  )
190
  translate_btn = gr.Button("🚀 Translate", variant="primary", size="lg")
 
191
  with gr.Column(scale=1):
192
  output_text = gr.Textbox(label="🎯 Translation Result", lines=6, max_lines=10, interactive=False)
193
  detected_lang = gr.Textbox(label="🔍 Detected Language", interactive=False, max_lines=1)
 
194
  gr.Examples(
195
  examples=[
196
  ["สวัสดีครับ ยินดีที่ได้รู้จัก การพัฒนา 2030 Aspirations เป็นเป้าหมายสำคัญ", "th"],
 
200
  ],
201
  inputs=[text_input, lang_dropdown],
202
  outputs=[output_text, detected_lang],
203
+ fn=partial(asyncio.run, translate_gradio), # Agar bisa dipakai di contoh
204
  cache_examples=False
205
  )
 
 
206
  translate_btn.click(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang])
207
  text_input.submit(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang])
 
208
  return interface
209
 
210
+ # Mount Gradio ke FastAPI
211
  gradio_app = create_gradio_interface()
212
+ app = gr.mount_gradio_app(app, gradio_app, path="/")