File size: 2,418 Bytes
58bde27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
LANGUAGES = {
    "el": "Greek",
    "en": "English",
    "es": "Spanish",
    "fr": "French",
    "de": "German",
    "it": "Italian",
}


LANG_LEX_2_CODE = {
    "English": "eng_Latn",
    "French": "fra_Latn",
    "Spanish": "spa_Latn",
    "Italian": "ita_Latn",
    "German": "deu_Latn",
    "Greek": "ell_Grek",
    "Chinese": "zho_Hans",
    "Japanese": "jpn_Jpan",
    "Russian": "rus_Cyrl",
    "Arabic": "arb_Arab",
    "Portuguese": "por_Latn",
    "Dutch": "nld_Latn",
    "Turkish": "tur_Latn",
    "Hindi": "hin_Deva",
    "Korean": "kor_Hang",
    "Vietnamese": "vie_Latn",
    "Thai": "tha_Thai",
    "Polish": "pol_Latn",
    "Swedish": "swe_Latn",
    "Finnish": "fin_Latn",
    "Danish": "dan_Latn",
    "Norwegian": "nob_Latn",
    "Czech": "ces_Latn",
    "Hungarian": "hun_Latn",
    "Romanian": "ron_Latn",
    "Hebrew": "heb_Hebr",
    "Ukrainian": "ukr_Cyrl",
    "Bulgarian": "bul_Cyrl",
    "Indonesian": "ind_Latn",
    "Malay": "zsm_Latn",
    "Tamil": "tam_Taml",
    "Telugu": "tel_Telu",
    "Urdu": "urd_Arab",
}


# SUMMARIZATION_PREFIXES = {
#     "en": "summarize: ",  # English
#     "fr": "résume: ",  # French
#     "es": "resume: ",  # Spanish
#     "it": "riassumi: ",  # Italian
#     "de": "fasse zusammen: ",  # German
#     "el": "σύνοψη: ",  # Greek
#     "zh": "总结: ",  # Chinese (Simplified)
#     "ja": "要約: ",  # Japanese
#     "ru": "резюме: ",  # Russian
#     "ar": "لخص: ",  # Arabic
#     "pt": "resuma: ",  # Portuguese
#     "nl": "vat samen: ",  # Dutch
#     "tr": "özetle: ",  # Turkish
#     "hi": "सारांश: ",  # Hindi
#     "ko": "요약: ",  # Korean
#     "vi": "tóm tắt: ",  # Vietnamese
#     "th": "สรุป: ",  # Thai
#     "pl": "podsumuj: ",  # Polish
#     "sv": "sammanfatta: ",  # Swedish
#     "fi": "tiivistä: ",  # Finnish
#     "da": "opsummer: ",  # Danish
#     "no": "oppsummer: ",  # Norwegian
#     "cs": "shrnutí: ",  # Czech
#     "hu": "összefoglalás: ",  # Hungarian
#     "ro": "rezumă: ",  # Romanian
#     "he": "לסכם: ",  # Hebrew
#     "uk": "резюме: ",  # Ukrainian
#     "bg": "резюме: ",  # Bulgarian
#     "id": "ringkasan: ",  # Indonesian
#     "ms": "ringkasan: ",  # Malay
#     "ta": "சுருக்கம்: ",  # Tamil
#     "te": "సారాంశం: ",  # Telugu
#     "ur": "خلاصہ: ",  # Urdu
#     # Add more languages as needed
# }