bakrianoo commited on
Commit
06c3d9b
·
1 Parent(s): f039cae

set custom Arabic extended language option

Browse files
Files changed (2) hide show
  1. app.py +1 -0
  2. utils/llm_prompts.py +20 -0
app.py CHANGED
@@ -8,6 +8,7 @@ import json_repair
8
  # Define language options for translation
9
  LANGUAGES = {
10
  "Arabic": "ar",
 
11
  "English": "en",
12
  "Spanish": "es",
13
  "French": "fr",
 
8
  # Define language options for translation
9
  LANGUAGES = {
10
  "Arabic": "ar",
11
+ "Arabic-Extended": "ar-x-extended",
12
  "English": "en",
13
  "Spanish": "es",
14
  "French": "fr",
utils/llm_prompts.py CHANGED
@@ -42,6 +42,26 @@ def get_translate_prompt(article_title, artice_summary, content_format, original
42
  "7. Maintain the same paragraph structure and information hierarchy\n"
43
  )
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  # Add user preference prompt if provided
46
  if preference_prompt and preference_prompt.strip():
47
  translate_prompt += (
 
42
  "7. Maintain the same paragraph structure and information hierarchy\n"
43
  )
44
 
45
+ # Add special instructions for Arabic-Extended
46
+ if target_lang in ["ar-x-extended", "Arabic-Extended"]:
47
+ translate_prompt += (
48
+ "\n# Arabic-Extended Alphabet Guidelines\n"
49
+ "When translating to Arabic-Extended, use the extended Arabic alphabet ONLY for entity names "
50
+ "(people, places, brands, foreign terms) that contain sounds not in standard Arabic. Use these special characters:\n\n"
51
+ "- ڤ (V): Use for 'v' sound in foreign names instead of ف\n"
52
+ "- پ (P): Use for 'p' sound in foreign names instead of ب\n"
53
+ "- چ (Ch): Use for 'ch' sound in foreign names instead of تش\n"
54
+ "- گ (G): Use for 'g' sound in foreign names instead of ج/غ/ك\n"
55
+ "- ژ (Zh): Use for 'zh/j' sound in foreign names instead of ز/ج\n"
56
+ "- ڠ (ng): Use for 'ng' sound in foreign names instead of نج/نغ\n\n"
57
+ "Examples:\n"
58
+ "- 'Vancouver' → 'ڤانكوڤر' (using ڤ for V)\n"
59
+ "- 'Pakistan' → 'پاكستان' (using پ for P)\n"
60
+ "- 'Chicago' → 'چيكاغو' (using چ for Ch)\n"
61
+ "- 'Google' → 'گوگل' (using گ for G)\n\n"
62
+ "Important: Use these extended characters ONLY for entity names. Use standard Arabic for all other content.\n"
63
+ )
64
+
65
  # Add user preference prompt if provided
66
  if preference_prompt and preference_prompt.strip():
67
  translate_prompt += (