Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -7,20 +7,20 @@ import logging
|
|
7 |
import os
|
8 |
from typing import Optional, Dict
|
9 |
import re
|
10 |
-
from functools import lru_cache
|
11 |
import asyncio
|
|
|
12 |
|
13 |
-
# --- 1. Konfigurasi Awal
|
14 |
-
# Create necessary directories
|
15 |
os.makedirs("./cache", exist_ok=True)
|
16 |
os.makedirs("./logs", exist_ok=True)
|
17 |
|
18 |
-
# Set environment variables
|
19 |
os.environ["HF_HOME"] = "./cache"
|
20 |
os.environ["TRANSFORMERS_CACHE"] = "./cache"
|
21 |
|
22 |
# Environment configuration
|
23 |
-
DEVICE = -1 # Selalu CPU untuk
|
24 |
MAX_TEXT_LENGTH = int(os.getenv("MAX_TEXT_LENGTH", "5000"))
|
25 |
|
26 |
# Configure logging
|
@@ -44,7 +44,7 @@ PROTECTED_TERMS = ["2030 Aspirations", "Griffith"]
|
|
44 |
# Cache untuk translator (pipeline)
|
45 |
translators: Dict[str, pipeline] = {}
|
46 |
|
47 |
-
# --- Pydantic Models
|
48 |
class TranslationRequest(BaseModel):
|
49 |
text: str
|
50 |
source_lang_override: Optional[str] = None
|
@@ -53,13 +53,10 @@ class TranslationResponse(BaseModel):
|
|
53 |
translated_text: str
|
54 |
source_language: Optional[str] = None
|
55 |
|
56 |
-
# ---
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
@app.on_event("startup")
|
61 |
-
async def startup_event():
|
62 |
-
"""Memuat semua model translasi saat aplikasi dimulai."""
|
63 |
logger.info("Memulai prapemuatan model translasi...")
|
64 |
for lang, model_name in MODEL_MAP.items():
|
65 |
try:
|
@@ -69,7 +66,12 @@ async def startup_event():
|
|
69 |
except Exception as e:
|
70 |
logger.error(f"Gagal memuat model untuk {lang}: {str(e)}")
|
71 |
logger.info("Semua model telah dimuat.")
|
|
|
|
|
|
|
|
|
72 |
|
|
|
73 |
def get_translator(lang: str) -> pipeline:
|
74 |
"""Mengambil translator yang sudah dimuat dari cache."""
|
75 |
translator = translators.get(lang)
|
@@ -78,12 +80,10 @@ def get_translator(lang: str) -> pipeline:
|
|
78 |
raise HTTPException(status_code=500, detail=f"Model terjemahan untuk '{lang}' tidak tersedia.")
|
79 |
return translator
|
80 |
|
81 |
-
|
82 |
-
@lru_cache(maxsize=128) # Cache lebih besar jika perlu
|
83 |
def detect_language(text: str) -> str:
|
84 |
"""Deteksi bahasa dengan cache."""
|
85 |
try:
|
86 |
-
# Potong teks untuk deteksi yang lebih cepat jika teks sangat panjang
|
87 |
preview_text = text[:500]
|
88 |
detected_lang = langdetect.detect(preview_text)
|
89 |
if detected_lang.startswith('zh'):
|
@@ -94,100 +94,77 @@ def detect_language(text: str) -> str:
|
|
94 |
return "en"
|
95 |
|
96 |
def protect_terms(text: str, protected_terms: list) -> tuple[str, dict]:
|
97 |
-
"""Mengganti istilah yang dilindungi dengan placeholder."""
|
98 |
replacements = {}
|
99 |
for i, term in enumerate(protected_terms):
|
100 |
placeholder = f"__PROTECTED_{i}__"
|
101 |
-
# Gunakan word boundary (\b) untuk memastikan hanya kata utuh yang diganti
|
102 |
modified_text = re.sub(r'\b' + re.escape(term) + r'\b', placeholder, text, flags=re.IGNORECASE)
|
103 |
-
# Hanya tambahkan ke replacement jika ada perubahan
|
104 |
if modified_text != text:
|
105 |
-
|
106 |
-
|
107 |
return text, replacements
|
108 |
|
109 |
def restore_terms(text: str, replacements: dict) -> str:
|
110 |
-
"""Mengembalikan istilah yang dilindungi."""
|
111 |
for placeholder, term in replacements.items():
|
112 |
text = text.replace(placeholder, term)
|
113 |
return text
|
114 |
|
115 |
-
# ---
|
116 |
async def perform_translation(text: str, source_lang_override: Optional[str] = None) -> TranslationResponse:
|
117 |
-
"""Fungsi inti translasi yang sepenuhnya async."""
|
118 |
if not text or not text.strip():
|
119 |
raise HTTPException(status_code=400, detail="Teks input tidak boleh kosong.")
|
120 |
-
|
121 |
if len(text) > MAX_TEXT_LENGTH:
|
122 |
raise HTTPException(
|
123 |
status_code=413,
|
124 |
detail=f"Teks terlalu panjang. Panjang maksimal yang diizinkan: {MAX_TEXT_LENGTH}."
|
125 |
)
|
126 |
-
|
127 |
try:
|
128 |
-
# Tentukan bahasa sumber
|
129 |
if source_lang_override and source_lang_override in MODEL_MAP:
|
130 |
source_lang = source_lang_override
|
131 |
else:
|
132 |
source_lang = detect_language(text)
|
133 |
|
134 |
-
# Jika bahasa sumber adalah Inggris, kembalikan teks asli
|
135 |
if source_lang == "en":
|
136 |
return TranslationResponse(translated_text=text, source_language=source_lang)
|
137 |
|
138 |
-
# Ambil translator
|
139 |
translator = get_translator(source_lang)
|
140 |
-
|
141 |
-
# Lindungi istilah sebelum translasi
|
142 |
modified_text, replacements = protect_terms(text, PROTECTED_TERMS)
|
143 |
|
144 |
-
# --- OPTIMASI KUNCI: Jalankan model di thread terpisah ---
|
145 |
-
# Ini mencegah pipeline yang berat memblokir event loop utama
|
146 |
def _translate_task():
|
147 |
return translator(modified_text, max_length=512, num_beams=4)
|
148 |
|
149 |
result = await asyncio.to_thread(_translate_task)
|
150 |
translated_text = result[0]["translation_text"]
|
151 |
-
|
152 |
-
# Kembalikan istilah yang dilindungi
|
153 |
final_text = restore_terms(translated_text, replacements)
|
154 |
|
155 |
return TranslationResponse(translated_text=final_text, source_language=source_lang)
|
156 |
-
|
157 |
except HTTPException as e:
|
158 |
-
raise e
|
159 |
except Exception as e:
|
160 |
logger.error(f"Terjadi kesalahan saat translasi: {str(e)}")
|
161 |
raise HTTPException(status_code=500, detail=f"Proses translasi gagal: {str(e)}")
|
162 |
|
163 |
@app.post("/translate", response_model=TranslationResponse)
|
164 |
async def translate_api(request: TranslationRequest):
|
165 |
-
"""Endpoint API untuk translasi."""
|
166 |
return await perform_translation(request.text, request.source_lang_override)
|
167 |
|
168 |
@app.get("/health")
|
169 |
async def health_check():
|
170 |
return {"status": "healthy", "loaded_models": list(translators.keys())}
|
171 |
|
172 |
-
|
173 |
-
# --- 5. OPTIMASI: Handler Gradio menjadi Asynchronous ---
|
174 |
async def translate_gradio(text: str, source_lang: str = "auto"):
|
175 |
-
"""Wrapper Gradio yang sekarang async dan lebih efisien."""
|
176 |
if not text or not text.strip():
|
177 |
return "Masukkan teks untuk diterjemahkan.", "N/A"
|
178 |
-
|
179 |
try:
|
180 |
source_lang_param = source_lang if source_lang != "auto" else None
|
181 |
result = await perform_translation(text, source_lang_param)
|
182 |
return result.translated_text, result.source_language or "Unknown"
|
183 |
-
|
184 |
except HTTPException as e:
|
185 |
return f"Error: {e.detail}", "Error"
|
186 |
except Exception as e:
|
187 |
return f"Error: {str(e)}", "Error"
|
188 |
|
189 |
-
# ---
|
190 |
-
# Fungsi untuk membuat UI Gradio tetap sama
|
191 |
def create_gradio_interface():
|
192 |
with gr.Blocks(
|
193 |
title="Multi-Language Translation Service",
|
@@ -199,7 +176,6 @@ def create_gradio_interface():
|
|
199 |
Terjemahkan teks dari **Thai**, **Jepang**, **Mandarin**, atau **Vietnam** ke **Inggris**.
|
200 |
✨ Fitur: Deteksi bahasa otomatis • Perlindungan istilah • Model Helsinki-NLP yang cepat.
|
201 |
""")
|
202 |
-
|
203 |
with gr.Row():
|
204 |
with gr.Column(scale=1):
|
205 |
text_input = gr.Textbox(label="📝 Input Text", placeholder="Enter text to translate...", lines=6, max_lines=10)
|
@@ -212,11 +188,9 @@ def create_gradio_interface():
|
|
212 |
value="auto", label="Source Language"
|
213 |
)
|
214 |
translate_btn = gr.Button("🚀 Translate", variant="primary", size="lg")
|
215 |
-
|
216 |
with gr.Column(scale=1):
|
217 |
output_text = gr.Textbox(label="🎯 Translation Result", lines=6, max_lines=10, interactive=False)
|
218 |
detected_lang = gr.Textbox(label="🔍 Detected Language", interactive=False, max_lines=1)
|
219 |
-
|
220 |
gr.Examples(
|
221 |
examples=[
|
222 |
["สวัสดีครับ ยินดีที่ได้รู้จัก การพัฒนา 2030 Aspirations เป็นเป้าหมายสำคัญ", "th"],
|
@@ -226,24 +200,13 @@ def create_gradio_interface():
|
|
226 |
],
|
227 |
inputs=[text_input, lang_dropdown],
|
228 |
outputs=[output_text, detected_lang],
|
229 |
-
fn=translate_gradio,
|
230 |
cache_examples=False
|
231 |
)
|
232 |
-
|
233 |
-
# Event handlers sekarang bisa langsung memanggil fungsi async
|
234 |
translate_btn.click(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang])
|
235 |
text_input.submit(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang])
|
236 |
-
|
237 |
return interface
|
238 |
|
239 |
-
#
|
240 |
gradio_app = create_gradio_interface()
|
241 |
-
|
242 |
-
# Mount Gradio app ke FastAPI di path "/"
|
243 |
-
# Ini adalah cara yang benar untuk mengintegrasikan keduanya
|
244 |
-
app = gr.mount_gradio_app(app, gradio_app, path="/")
|
245 |
-
|
246 |
-
|
247 |
-
# Untuk menjalankan:
|
248 |
-
# Simpan file ini sebagai app.py dan jalankan dengan uvicorn
|
249 |
-
# > uvicorn app:app --reload --port 7860
|
|
|
7 |
import os
|
8 |
from typing import Optional, Dict
|
9 |
import re
|
10 |
+
from functools import lru_cache, partial
|
11 |
import asyncio
|
12 |
+
from contextlib import asynccontextmanager
|
13 |
|
14 |
+
# --- 1. Konfigurasi Awal ---
|
|
|
15 |
os.makedirs("./cache", exist_ok=True)
|
16 |
os.makedirs("./logs", exist_ok=True)
|
17 |
|
18 |
+
# Set environment variables untuk Hugging Face cache
|
19 |
os.environ["HF_HOME"] = "./cache"
|
20 |
os.environ["TRANSFORMERS_CACHE"] = "./cache"
|
21 |
|
22 |
# Environment configuration
|
23 |
+
DEVICE = -1 # Selalu CPU untuk kompatibilitas
|
24 |
MAX_TEXT_LENGTH = int(os.getenv("MAX_TEXT_LENGTH", "5000"))
|
25 |
|
26 |
# Configure logging
|
|
|
44 |
# Cache untuk translator (pipeline)
|
45 |
translators: Dict[str, pipeline] = {}
|
46 |
|
47 |
+
# --- Pydantic Models ---
|
48 |
class TranslationRequest(BaseModel):
|
49 |
text: str
|
50 |
source_lang_override: Optional[str] = None
|
|
|
53 |
translated_text: str
|
54 |
source_language: Optional[str] = None
|
55 |
|
56 |
+
# --- Lifespan Event Handler ---
|
57 |
+
@asynccontextmanager
|
58 |
+
async def lifespan(app: FastAPI):
|
59 |
+
"""Handler lifecycle aplikasi menggunakan lifespan"""
|
|
|
|
|
|
|
60 |
logger.info("Memulai prapemuatan model translasi...")
|
61 |
for lang, model_name in MODEL_MAP.items():
|
62 |
try:
|
|
|
66 |
except Exception as e:
|
67 |
logger.error(f"Gagal memuat model untuk {lang}: {str(e)}")
|
68 |
logger.info("Semua model telah dimuat.")
|
69 |
+
yield # Aplikasi berjalan di sini
|
70 |
+
|
71 |
+
# --- Inisialisasi Aplikasi FastAPI dengan Lifespan ---
|
72 |
+
app = FastAPI(title="Translation Service API", lifespan=lifespan)
|
73 |
|
74 |
+
# --- Fungsi Utility ---
|
75 |
def get_translator(lang: str) -> pipeline:
|
76 |
"""Mengambil translator yang sudah dimuat dari cache."""
|
77 |
translator = translators.get(lang)
|
|
|
80 |
raise HTTPException(status_code=500, detail=f"Model terjemahan untuk '{lang}' tidak tersedia.")
|
81 |
return translator
|
82 |
|
83 |
+
@lru_cache(maxsize=128)
|
|
|
84 |
def detect_language(text: str) -> str:
|
85 |
"""Deteksi bahasa dengan cache."""
|
86 |
try:
|
|
|
87 |
preview_text = text[:500]
|
88 |
detected_lang = langdetect.detect(preview_text)
|
89 |
if detected_lang.startswith('zh'):
|
|
|
94 |
return "en"
|
95 |
|
96 |
def protect_terms(text: str, protected_terms: list) -> tuple[str, dict]:
|
|
|
97 |
replacements = {}
|
98 |
for i, term in enumerate(protected_terms):
|
99 |
placeholder = f"__PROTECTED_{i}__"
|
|
|
100 |
modified_text = re.sub(r'\b' + re.escape(term) + r'\b', placeholder, text, flags=re.IGNORECASE)
|
|
|
101 |
if modified_text != text:
|
102 |
+
replacements[placeholder] = term
|
103 |
+
text = modified_text
|
104 |
return text, replacements
|
105 |
|
106 |
def restore_terms(text: str, replacements: dict) -> str:
|
|
|
107 |
for placeholder, term in replacements.items():
|
108 |
text = text.replace(placeholder, term)
|
109 |
return text
|
110 |
|
111 |
+
# --- Fungsi Inti dan Endpoint API ---
|
112 |
async def perform_translation(text: str, source_lang_override: Optional[str] = None) -> TranslationResponse:
|
|
|
113 |
if not text or not text.strip():
|
114 |
raise HTTPException(status_code=400, detail="Teks input tidak boleh kosong.")
|
|
|
115 |
if len(text) > MAX_TEXT_LENGTH:
|
116 |
raise HTTPException(
|
117 |
status_code=413,
|
118 |
detail=f"Teks terlalu panjang. Panjang maksimal yang diizinkan: {MAX_TEXT_LENGTH}."
|
119 |
)
|
|
|
120 |
try:
|
|
|
121 |
if source_lang_override and source_lang_override in MODEL_MAP:
|
122 |
source_lang = source_lang_override
|
123 |
else:
|
124 |
source_lang = detect_language(text)
|
125 |
|
|
|
126 |
if source_lang == "en":
|
127 |
return TranslationResponse(translated_text=text, source_language=source_lang)
|
128 |
|
|
|
129 |
translator = get_translator(source_lang)
|
|
|
|
|
130 |
modified_text, replacements = protect_terms(text, PROTECTED_TERMS)
|
131 |
|
|
|
|
|
132 |
def _translate_task():
|
133 |
return translator(modified_text, max_length=512, num_beams=4)
|
134 |
|
135 |
result = await asyncio.to_thread(_translate_task)
|
136 |
translated_text = result[0]["translation_text"]
|
|
|
|
|
137 |
final_text = restore_terms(translated_text, replacements)
|
138 |
|
139 |
return TranslationResponse(translated_text=final_text, source_language=source_lang)
|
|
|
140 |
except HTTPException as e:
|
141 |
+
raise e
|
142 |
except Exception as e:
|
143 |
logger.error(f"Terjadi kesalahan saat translasi: {str(e)}")
|
144 |
raise HTTPException(status_code=500, detail=f"Proses translasi gagal: {str(e)}")
|
145 |
|
146 |
@app.post("/translate", response_model=TranslationResponse)
|
147 |
async def translate_api(request: TranslationRequest):
|
|
|
148 |
return await perform_translation(request.text, request.source_lang_override)
|
149 |
|
150 |
@app.get("/health")
|
151 |
async def health_check():
|
152 |
return {"status": "healthy", "loaded_models": list(translators.keys())}
|
153 |
|
154 |
+
# --- Handler Gradio Async ---
|
|
|
155 |
async def translate_gradio(text: str, source_lang: str = "auto"):
|
|
|
156 |
if not text or not text.strip():
|
157 |
return "Masukkan teks untuk diterjemahkan.", "N/A"
|
|
|
158 |
try:
|
159 |
source_lang_param = source_lang if source_lang != "auto" else None
|
160 |
result = await perform_translation(text, source_lang_param)
|
161 |
return result.translated_text, result.source_language or "Unknown"
|
|
|
162 |
except HTTPException as e:
|
163 |
return f"Error: {e.detail}", "Error"
|
164 |
except Exception as e:
|
165 |
return f"Error: {str(e)}", "Error"
|
166 |
|
167 |
+
# --- UI Gradio ---
|
|
|
168 |
def create_gradio_interface():
|
169 |
with gr.Blocks(
|
170 |
title="Multi-Language Translation Service",
|
|
|
176 |
Terjemahkan teks dari **Thai**, **Jepang**, **Mandarin**, atau **Vietnam** ke **Inggris**.
|
177 |
✨ Fitur: Deteksi bahasa otomatis • Perlindungan istilah • Model Helsinki-NLP yang cepat.
|
178 |
""")
|
|
|
179 |
with gr.Row():
|
180 |
with gr.Column(scale=1):
|
181 |
text_input = gr.Textbox(label="📝 Input Text", placeholder="Enter text to translate...", lines=6, max_lines=10)
|
|
|
188 |
value="auto", label="Source Language"
|
189 |
)
|
190 |
translate_btn = gr.Button("🚀 Translate", variant="primary", size="lg")
|
|
|
191 |
with gr.Column(scale=1):
|
192 |
output_text = gr.Textbox(label="🎯 Translation Result", lines=6, max_lines=10, interactive=False)
|
193 |
detected_lang = gr.Textbox(label="🔍 Detected Language", interactive=False, max_lines=1)
|
|
|
194 |
gr.Examples(
|
195 |
examples=[
|
196 |
["สวัสดีครับ ยินดีที่ได้รู้จัก การพัฒนา 2030 Aspirations เป็นเป้าหมายสำคัญ", "th"],
|
|
|
200 |
],
|
201 |
inputs=[text_input, lang_dropdown],
|
202 |
outputs=[output_text, detected_lang],
|
203 |
+
fn=partial(asyncio.run, translate_gradio), # Agar bisa dipakai di contoh
|
204 |
cache_examples=False
|
205 |
)
|
|
|
|
|
206 |
translate_btn.click(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang])
|
207 |
text_input.submit(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang])
|
|
|
208 |
return interface
|
209 |
|
210 |
+
# Mount Gradio ke FastAPI
|
211 |
gradio_app = create_gradio_interface()
|
212 |
+
app = gr.mount_gradio_app(app, gradio_app, path="/")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|