amine_dubs
commited on
Commit
·
3a2e47e
1
Parent(s):
ec41997
added prompt
Browse files- backend/main.py +41 -8
backend/main.py
CHANGED
@@ -178,6 +178,23 @@ def translate_text(text, source_lang, target_lang):
|
|
178 |
return use_fallback_translation(text, source_lang, target_lang)
|
179 |
|
180 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
# Prepare input with explicit instruction format for better results with NLLB
|
182 |
src_lang_code = f"{source_lang}_Latn" if source_lang != "ar" else f"{source_lang}_Arab"
|
183 |
tgt_lang_code = f"{target_lang}_Latn" if target_lang != "ar" else f"{target_lang}_Arab"
|
@@ -186,10 +203,10 @@ def translate_text(text, source_lang, target_lang):
|
|
186 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
187 |
future = executor.submit(
|
188 |
lambda: translator(
|
189 |
-
|
190 |
src_lang=src_lang_code,
|
191 |
tgt_lang=tgt_lang_code,
|
192 |
-
max_length=
|
193 |
)[0]["translation_text"]
|
194 |
)
|
195 |
|
@@ -197,6 +214,10 @@ def translate_text(text, source_lang, target_lang):
|
|
197 |
# Set a reasonable timeout (15 seconds instead of 10)
|
198 |
result = future.result(timeout=15)
|
199 |
|
|
|
|
|
|
|
|
|
200 |
return result
|
201 |
except concurrent.futures.TimeoutError:
|
202 |
print(f"Model inference timed out after 15 seconds, falling back to online translation")
|
@@ -213,6 +234,24 @@ def translate_text(text, source_lang, target_lang):
|
|
213 |
traceback.print_exc()
|
214 |
return use_fallback_translation(text, source_lang, target_lang)
|
215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
# --- Function to check model status and trigger re-initialization if needed ---
|
217 |
def check_and_reinitialize_model():
|
218 |
"""Check if model needs to be reinitialized and do so if necessary"""
|
@@ -288,12 +327,6 @@ def use_fallback_translation(text, source_lang, target_lang):
|
|
288 |
# Final fallback - return original text with error message
|
289 |
return f"[Translation failed] {text}"
|
290 |
|
291 |
-
def culturally_adapt_arabic(text: str) -> str:
|
292 |
-
"""Apply post-processing rules to enhance Arabic translation with cultural sensitivity."""
|
293 |
-
# Replace any Latin punctuation with Arabic ones
|
294 |
-
text = text.replace('?', '؟').replace(';', '؛').replace(',', '،')
|
295 |
-
return text
|
296 |
-
|
297 |
# --- Helper Functions ---
|
298 |
async def extract_text_from_file(file: UploadFile) -> str:
|
299 |
"""Extracts text content from uploaded files without writing to disk."""
|
|
|
178 |
return use_fallback_translation(text, source_lang, target_lang)
|
179 |
|
180 |
try:
|
181 |
+
# Get full language name for better prompt context
|
182 |
+
source_lang_name = LANGUAGE_MAP.get(source_lang, source_lang)
|
183 |
+
|
184 |
+
# Create a culturally-aware prompt with focus on Arabic eloquence (Balagha)
|
185 |
+
if target_lang == "ar":
|
186 |
+
prompt = f"""Translate the following {source_lang_name} text into Modern Standard Arabic (Fusha).
|
187 |
+
Focus on conveying the meaning elegantly using proper Balagha (Arabic eloquence).
|
188 |
+
Adapt any cultural references or idioms appropriately rather than translating literally.
|
189 |
+
Ensure the translation reads naturally to a native Arabic speaker.
|
190 |
+
|
191 |
+
Text to translate:
|
192 |
+
{text}"""
|
193 |
+
print("Using culturally-aware prompt for Arabic translation with Balagha focus")
|
194 |
+
else:
|
195 |
+
# For non-Arabic target languages, use standard approach
|
196 |
+
prompt = text
|
197 |
+
|
198 |
# Prepare input with explicit instruction format for better results with NLLB
|
199 |
src_lang_code = f"{source_lang}_Latn" if source_lang != "ar" else f"{source_lang}_Arab"
|
200 |
tgt_lang_code = f"{target_lang}_Latn" if target_lang != "ar" else f"{target_lang}_Arab"
|
|
|
203 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
204 |
future = executor.submit(
|
205 |
lambda: translator(
|
206 |
+
prompt, # Using our enhanced prompt instead of raw text
|
207 |
src_lang=src_lang_code,
|
208 |
tgt_lang=tgt_lang_code,
|
209 |
+
max_length=768 # Increased max_length to accommodate longer prompt
|
210 |
)[0]["translation_text"]
|
211 |
)
|
212 |
|
|
|
214 |
# Set a reasonable timeout (15 seconds instead of 10)
|
215 |
result = future.result(timeout=15)
|
216 |
|
217 |
+
# Post-process the result for Arabic cultural adaptation
|
218 |
+
if target_lang == "ar":
|
219 |
+
result = culturally_adapt_arabic(result)
|
220 |
+
|
221 |
return result
|
222 |
except concurrent.futures.TimeoutError:
|
223 |
print(f"Model inference timed out after 15 seconds, falling back to online translation")
|
|
|
234 |
traceback.print_exc()
|
235 |
return use_fallback_translation(text, source_lang, target_lang)
|
236 |
|
237 |
+
def culturally_adapt_arabic(text: str) -> str:
|
238 |
+
"""Apply post-processing rules to enhance Arabic translation with cultural sensitivity."""
|
239 |
+
# Replace Latin punctuation with Arabic ones
|
240 |
+
text = text.replace('?', '؟').replace(';', '؛').replace(',', '،')
|
241 |
+
|
242 |
+
# If the text starts with common translation artifacts like "Translation:" or the prompt instructions, remove them
|
243 |
+
common_prefixes = [
|
244 |
+
"الترجمة:", "ترجمة:", "النص المترجم:",
|
245 |
+
"Translation:", "Arabic translation:"
|
246 |
+
]
|
247 |
+
for prefix in common_prefixes:
|
248 |
+
if text.startswith(prefix):
|
249 |
+
text = text[len(prefix):].strip()
|
250 |
+
|
251 |
+
# Additional cultural adaptations can be added here
|
252 |
+
|
253 |
+
return text
|
254 |
+
|
255 |
# --- Function to check model status and trigger re-initialization if needed ---
|
256 |
def check_and_reinitialize_model():
|
257 |
"""Check if model needs to be reinitialized and do so if necessary"""
|
|
|
327 |
# Final fallback - return original text with error message
|
328 |
return f"[Translation failed] {text}"
|
329 |
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
# --- Helper Functions ---
|
331 |
async def extract_text_from_file(file: UploadFile) -> str:
|
332 |
"""Extracts text content from uploaded files without writing to disk."""
|