amine_dubs commited on
Commit
3a2e47e
·
1 Parent(s): ec41997

added prompt

Browse files
Files changed (1) hide show
  1. backend/main.py +41 -8
backend/main.py CHANGED
@@ -178,6 +178,23 @@ def translate_text(text, source_lang, target_lang):
178
  return use_fallback_translation(text, source_lang, target_lang)
179
 
180
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  # Prepare input with explicit instruction format for better results with NLLB
182
  src_lang_code = f"{source_lang}_Latn" if source_lang != "ar" else f"{source_lang}_Arab"
183
  tgt_lang_code = f"{target_lang}_Latn" if target_lang != "ar" else f"{target_lang}_Arab"
@@ -186,10 +203,10 @@ def translate_text(text, source_lang, target_lang):
186
  with concurrent.futures.ThreadPoolExecutor() as executor:
187
  future = executor.submit(
188
  lambda: translator(
189
- text,
190
  src_lang=src_lang_code,
191
  tgt_lang=tgt_lang_code,
192
- max_length=512
193
  )[0]["translation_text"]
194
  )
195
 
@@ -197,6 +214,10 @@ def translate_text(text, source_lang, target_lang):
197
  # Set a reasonable timeout (15 seconds instead of 10)
198
  result = future.result(timeout=15)
199
 
 
 
 
 
200
  return result
201
  except concurrent.futures.TimeoutError:
202
  print(f"Model inference timed out after 15 seconds, falling back to online translation")
@@ -213,6 +234,24 @@ def translate_text(text, source_lang, target_lang):
213
  traceback.print_exc()
214
  return use_fallback_translation(text, source_lang, target_lang)
215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  # --- Function to check model status and trigger re-initialization if needed ---
217
  def check_and_reinitialize_model():
218
  """Check if model needs to be reinitialized and do so if necessary"""
@@ -288,12 +327,6 @@ def use_fallback_translation(text, source_lang, target_lang):
288
  # Final fallback - return original text with error message
289
  return f"[Translation failed] {text}"
290
 
291
- def culturally_adapt_arabic(text: str) -> str:
292
- """Apply post-processing rules to enhance Arabic translation with cultural sensitivity."""
293
- # Replace any Latin punctuation with Arabic ones
294
- text = text.replace('?', '؟').replace(';', '؛').replace(',', '،')
295
- return text
296
-
297
  # --- Helper Functions ---
298
  async def extract_text_from_file(file: UploadFile) -> str:
299
  """Extracts text content from uploaded files without writing to disk."""
 
178
  return use_fallback_translation(text, source_lang, target_lang)
179
 
180
  try:
181
+ # Get full language name for better prompt context
182
+ source_lang_name = LANGUAGE_MAP.get(source_lang, source_lang)
183
+
184
+ # Create a culturally-aware prompt with focus on Arabic eloquence (Balagha)
185
+ if target_lang == "ar":
186
+ prompt = f"""Translate the following {source_lang_name} text into Modern Standard Arabic (Fusha).
187
+ Focus on conveying the meaning elegantly using proper Balagha (Arabic eloquence).
188
+ Adapt any cultural references or idioms appropriately rather than translating literally.
189
+ Ensure the translation reads naturally to a native Arabic speaker.
190
+
191
+ Text to translate:
192
+ {text}"""
193
+ print("Using culturally-aware prompt for Arabic translation with Balagha focus")
194
+ else:
195
+ # For non-Arabic target languages, use standard approach
196
+ prompt = text
197
+
198
  # Prepare input with explicit instruction format for better results with NLLB
199
  src_lang_code = f"{source_lang}_Latn" if source_lang != "ar" else f"{source_lang}_Arab"
200
  tgt_lang_code = f"{target_lang}_Latn" if target_lang != "ar" else f"{target_lang}_Arab"
 
203
  with concurrent.futures.ThreadPoolExecutor() as executor:
204
  future = executor.submit(
205
  lambda: translator(
206
+ prompt, # Using our enhanced prompt instead of raw text
207
  src_lang=src_lang_code,
208
  tgt_lang=tgt_lang_code,
209
+ max_length=768 # Increased max_length to accommodate longer prompt
210
  )[0]["translation_text"]
211
  )
212
 
 
214
  # Set a reasonable timeout (15 seconds instead of 10)
215
  result = future.result(timeout=15)
216
 
217
+ # Post-process the result for Arabic cultural adaptation
218
+ if target_lang == "ar":
219
+ result = culturally_adapt_arabic(result)
220
+
221
  return result
222
  except concurrent.futures.TimeoutError:
223
  print(f"Model inference timed out after 15 seconds, falling back to online translation")
 
234
  traceback.print_exc()
235
  return use_fallback_translation(text, source_lang, target_lang)
236
 
237
+ def culturally_adapt_arabic(text: str) -> str:
238
+ """Apply post-processing rules to enhance Arabic translation with cultural sensitivity."""
239
+ # Replace Latin punctuation with Arabic ones
240
+ text = text.replace('?', '؟').replace(';', '؛').replace(',', '،')
241
+
242
+ # If the text starts with common translation artifacts like "Translation:" or the prompt instructions, remove them
243
+ common_prefixes = [
244
+ "الترجمة:", "ترجمة:", "النص المترجم:",
245
+ "Translation:", "Arabic translation:"
246
+ ]
247
+ for prefix in common_prefixes:
248
+ if text.startswith(prefix):
249
+ text = text[len(prefix):].strip()
250
+
251
+ # Additional cultural adaptations can be added here
252
+
253
+ return text
254
+
255
  # --- Function to check model status and trigger re-initialization if needed ---
256
  def check_and_reinitialize_model():
257
  """Check if model needs to be reinitialized and do so if necessary"""
 
327
  # Final fallback - return original text with error message
328
  return f"[Translation failed] {text}"
329
 
 
 
 
 
 
 
330
  # --- Helper Functions ---
331
  async def extract_text_from_file(file: UploadFile) -> str:
332
  """Extracts text content from uploaded files without writing to disk."""