|
import requests |
|
|
|
BASE_URLS = { |
|
"GOOGLE_TRANSLATE": "https://translate.google.com/m", |
|
"PONS": "https://en.pons.com/translate/", |
|
"YANDEX": "https://translate.yandex.net/api/{version}/tr.json/{endpoint}", |
|
"LINGUEE": "https://www.linguee.com/", |
|
"MYMEMORY": "http://api.mymemory.translated.net/get", |
|
"QCRI": "https://mt.qcri.org/api/v1/{endpoint}?", |
|
"DEEPL": "https://api.deepl.com/{version}/", |
|
"DEEPL_FREE": "https://api-free.deepl.com/v2/", |
|
"MICROSOFT_TRANSLATE": "https://api.cognitive.microsofttranslator.com/translate?api-version=3.0", |
|
"PAPAGO": "https://papago.naver.com/", |
|
"PAPAGO_API": "https://openapi.naver.com/v1/papago/n2mt" |
|
} |
|
|
|
GOOGLE_CODES_TO_LANGUAGES = { |
|
'af': 'afrikaans', |
|
'sq': 'albanian', |
|
'am': 'amharic', |
|
'ar': 'arabic', |
|
'hy': 'armenian', |
|
'az': 'azerbaijani', |
|
'eu': 'basque', |
|
'be': 'belarusian', |
|
'bn': 'bengali', |
|
'bs': 'bosnian', |
|
'bg': 'bulgarian', |
|
'ca': 'catalan', |
|
'ceb': 'cebuano', |
|
'ny': 'chichewa', |
|
'zh-CN': 'chinese (simplified)', |
|
'zh-TW': 'chinese (traditional)', |
|
'co': 'corsican', |
|
'hr': 'croatian', |
|
'cs': 'czech', |
|
'da': 'danish', |
|
'nl': 'dutch', |
|
'en': 'english', |
|
'eo': 'esperanto', |
|
'et': 'estonian', |
|
'tl': 'filipino', |
|
'fi': 'finnish', |
|
'fr': 'french', |
|
'fy': 'frisian', |
|
'gl': 'galician', |
|
'ka': 'georgian', |
|
'de': 'german', |
|
'el': 'greek', |
|
'gu': 'gujarati', |
|
'ht': 'haitian creole', |
|
'ha': 'hausa', |
|
'haw': 'hawaiian', |
|
'iw': 'hebrew', |
|
'hi': 'hindi', |
|
'hmn': 'hmong', |
|
'hu': 'hungarian', |
|
'is': 'icelandic', |
|
'ig': 'igbo', |
|
'id': 'indonesian', |
|
'ga': 'irish', |
|
'it': 'italian', |
|
'ja': 'japanese', |
|
'jw': 'javanese', |
|
'kn': 'kannada', |
|
'kk': 'kazakh', |
|
'km': 'khmer', |
|
'rw': 'kinyarwanda', |
|
'ko': 'korean', |
|
'ku': 'kurdish', |
|
'ky': 'kyrgyz', |
|
'lo': 'lao', |
|
'la': 'latin', |
|
'lv': 'latvian', |
|
'lt': 'lithuanian', |
|
'lb': 'luxembourgish', |
|
'mk': 'macedonian', |
|
'mg': 'malagasy', |
|
'ms': 'malay', |
|
'ml': 'malayalam', |
|
'mt': 'maltese', |
|
'mi': 'maori', |
|
'mr': 'marathi', |
|
'mn': 'mongolian', |
|
'my': 'myanmar', |
|
'ne': 'nepali', |
|
'no': 'norwegian', |
|
'or': 'odia', |
|
'ps': 'pashto', |
|
'fa': 'persian', |
|
'pl': 'polish', |
|
'pt': 'portuguese', |
|
'pa': 'punjabi', |
|
'ro': 'romanian', |
|
'ru': 'russian', |
|
'sm': 'samoan', |
|
'gd': 'scots gaelic', |
|
'sr': 'serbian', |
|
'st': 'sesotho', |
|
'sn': 'shona', |
|
'sd': 'sindhi', |
|
'si': 'sinhala', |
|
'sk': 'slovak', |
|
'sl': 'slovenian', |
|
'so': 'somali', |
|
'es': 'spanish', |
|
'su': 'sundanese', |
|
'sw': 'swahili', |
|
'sv': 'swedish', |
|
'tg': 'tajik', |
|
'ta': 'tamil', |
|
'tt': 'tatar', |
|
'te': 'telugu', |
|
'th': 'thai', |
|
'tr': 'turkish', |
|
'tk': 'turkmen', |
|
'uk': 'ukrainian', |
|
'ur': 'urdu', |
|
'ug': 'uyghur', |
|
'uz': 'uzbek', |
|
'vi': 'vietnamese', |
|
'cy': 'welsh', |
|
'xh': 'xhosa', |
|
'yi': 'yiddish', |
|
'yo': 'yoruba', |
|
'zu': 'zulu', |
|
} |
|
|
|
GOOGLE_LANGUAGES_TO_CODES = {v: k for k, v in GOOGLE_CODES_TO_LANGUAGES.items()} |
|
|
|
|
|
GOOGLE_LANGUAGES_SECONDARY_NAMES = { |
|
'myanmar': ['burmese'], |
|
'odia': ['oriya'], |
|
'kurdish': ['kurmanji'] |
|
} |
|
|
|
test_text_standard = 'Hello world.' |
|
|
|
TRANSLATED_RESULTS = { |
|
"afrikaans": "Hello Wêreld.", |
|
"albanian": "Përshendetje Botë.", |
|
"amharic": "ሰላም ልዑል.", |
|
"arabic": "مرحبا بالعالم.", |
|
"armenian": "Բարեւ աշխարհ.", |
|
"azerbaijani": "Salam dünya.", |
|
"basque": "Kaixo Mundua.", |
|
"belarusian": "Прывітанне Сусвет.", |
|
"bengali": "ওহে বিশ্ব.", |
|
"bosnian": "Zdravo svijete.", |
|
"bulgarian": "Здравей свят.", |
|
"catalan": "Hola món.", |
|
"cebuano": "Kumusta kalibutan.", |
|
"chichewa": "Moni Dziko Lapansi.", |
|
"chinese (simplified)": "你好,世界。", |
|
"chinese (traditional)": "你好,世界。", |
|
"corsican": "Bonghjornu mondu.", |
|
"croatian": "Pozdrav svijete.", |
|
"czech": "Ahoj světe.", |
|
"danish": "Hej Verden.", |
|
"dutch": "Hallo Wereld.", |
|
"esperanto": "Saluton mondo.", |
|
"estonian": "Tere, Maailm.", |
|
"filipino": "Kamusta mundo", |
|
"finnish": "Hei maailma.", |
|
"french": "Bonjour le monde.", |
|
"frisian": "Hallo wrâld.", |
|
"galician": "Ola mundo.", |
|
"georgian": "Გამარჯობა მსოფლიო.", |
|
"german": "Hallo Welt.", |
|
"greek": "Γειά σου Κόσμε.", |
|
"gujarati": "હેલો વર્લ્ડ.", |
|
"haitian creole": "Bonjou mond.", |
|
"hausa": "Sannu Duniya.", |
|
"hawaiian": "Aloha honua.", |
|
"hebrew": "שלום עולם.", |
|
"hindi": "नमस्ते दुनिया।", |
|
"hmong": "Nyob zoo ntiaj teb.", |
|
"hungarian": "Helló Világ.", |
|
"icelandic": "Halló heimur.", |
|
"igbo": "Ndewo Ụwa.", |
|
"indonesian": "Halo Dunia.", |
|
"irish": "Dia duit ar domhan.", |
|
"italian": "Ciao mondo.", |
|
"japanese": "こんにちは世界。", |
|
"javanese": "Halo jagad.", |
|
"kannada": "ಹಲೋ ವಿಶ್ವ.", |
|
"kazakh": "Сәлем Әлем.", |
|
"khmer": "សួស្តីពិភពលោក។", |
|
"kinyarwanda": "Mwaramutse isi.", |
|
"korean": "안녕하세요 세계입니다.", |
|
"kurdish": "Hello cîhanê.", |
|
"kyrgyz": "Салам дүйнө.", |
|
"lao": "ສະບາຍດີຊາວໂລກ.", |
|
"latin": "Salve mundi.", |
|
"latvian": "Sveika pasaule.", |
|
"lithuanian": "Labas pasauli.", |
|
"luxembourgish": "Moien Welt.", |
|
"macedonian": "Здраво свету.", |
|
"malagasy": "Hello World.", |
|
"malay": "Hai dunia.", |
|
"malayalam": "ഹലോ വേൾഡ്.", |
|
"maltese": "Hello dinja.", |
|
"maori": "Kia ora te ao.", |
|
"marathi": "नमस्कार जग.", |
|
"mongolian": "Сайн уу ертөнц.", |
|
"myanmar": "မင်္ဂလာပါကမ္ဘာလောက။", |
|
"nepali": "नमस्कार संसार।", |
|
"norwegian": "Hei Verden.", |
|
"odia": "ନମସ୍କାର ବିଶ୍ୱବାସି।", |
|
"pashto": "سلام نړی.", |
|
"persian": "سلام دنیا.", |
|
"polish": "Witaj świecie.", |
|
"portuguese": "Olá Mundo.", |
|
"punjabi": "ਸਤਿ ਸ੍ਰੀ ਅਕਾਲ ਦੁਨਿਆ.", |
|
"romanian": "Salut Lume.", |
|
"russian": "Привет, мир.", |
|
"samoan": "Talofa lalolagi.", |
|
"scots gaelic": "Hàlo a Shaoghail.", |
|
"serbian": "Здраво Свете.", |
|
"sesotho": "Lefatše Lumela.", |
|
"shona": "Mhoro nyika.", |
|
"sindhi": "هيلو دنيا.", |
|
"sinhala": "හෙලෝ වර්ල්ඩ්.", |
|
"slovak": "Ahoj svet.", |
|
"slovenian": "Pozdravljen, svet.", |
|
"somali": "Salaamu calaykum.", |
|
"spanish": "Hola Mundo.", |
|
"sundanese": "Halo Dunya.", |
|
"swahili": "Salamu, Dunia.", |
|
"swedish": "Hej världen.", |
|
"tajik": "Салом Ҷаҳон.", |
|
"tamil": "வணக்கம் உலகம்.", |
|
"tatar": "Сәлам, Дөнья.", |
|
"telugu": "హలో వరల్డ్.", |
|
"thai": "สวัสดีชาวโลก.", |
|
"turkish": "Selam Dünya.", |
|
"turkmen": "Salam dünýä.", |
|
"ukrainian": "Привіт Світ.", |
|
"urdu": "سلام دنیا۔", |
|
"uyghur": "ياخشىمۇسىز دۇنيا.", |
|
"uzbek": "Salom Dunyo.", |
|
"vietnamese": "Chào thế giới.", |
|
"welsh": "Helo Byd.", |
|
"xhosa": "Molo Lizwe.", |
|
"yiddish": "העלא וועלט.", |
|
"yoruba": "Mo ki O Ile Aiye.", |
|
"zulu": "Sawubona Mhlaba." |
|
} |
|
|
|
|
|
PONS_CODES_TO_LANGUAGES = { |
|
'ar': 'arabic', |
|
'bg': 'bulgarian', |
|
'zh-cn': 'chinese', |
|
'cs': 'czech', |
|
'da': 'danish', |
|
'nl': 'dutch', |
|
'en': 'english', |
|
'fr': 'french', |
|
'de': 'german', |
|
'el': 'greek', |
|
'hu': 'hungarian', |
|
'it': 'italian', |
|
'la': 'latin', |
|
'no': 'norwegian', |
|
'pl': 'polish', |
|
'pt': 'portuguese', |
|
'ru': 'russian', |
|
'sl': 'slovenian', |
|
'es': 'spanish', |
|
'sv': 'swedish', |
|
'tr': 'turkish', |
|
'elv': 'elvish' |
|
} |
|
|
|
PONS_LANGUAGES_TO_CODES = {v: k for k, v in PONS_CODES_TO_LANGUAGES.items()} |
|
|
|
LINGUEE_LANGUAGES_TO_CODES = { |
|
"maltese": "mt", |
|
"english": "en", |
|
"german": "de", |
|
"bulgarian": "bg", |
|
"polish": "pl", |
|
"portuguese": "pt", |
|
"hungarian": "hu", |
|
"romanian": "ro", |
|
"russian": "ru", |
|
|
|
"dutch": "nl", |
|
"slovakian": "sk", |
|
"greek": "el", |
|
"slovenian": "sl", |
|
"danish": "da", |
|
"italian": "it", |
|
"spanish": "es", |
|
"finnish": "fi", |
|
"chinese": "zh", |
|
"french": "fr", |
|
|
|
"czech": "cs", |
|
"laotian": "lo", |
|
"swedish": "sv", |
|
"latvian": "lv", |
|
"estonian": "et", |
|
"japanese": "ja" |
|
} |
|
|
|
LINGUEE_CODE_TO_LANGUAGE = {v: k for k, v in LINGUEE_LANGUAGES_TO_CODES.items()} |
|
|
|
|
|
|
|
|
|
|
|
microsoft_languages_api_url = "https://api.cognitive.microsofttranslator.com/languages?api-version=3.0&scope=translation" |
|
microsoft_languages_response = requests.get(microsoft_languages_api_url) |
|
translation_dict = microsoft_languages_response.json()['translation'] |
|
|
|
MICROSOFT_CODES_TO_LANGUAGES = {translation_dict[k]['name'].lower(): k for k in translation_dict.keys()} |
|
|
|
DEEPL_LANGUAGE_TO_CODE = { |
|
"bulgarian": "bg", |
|
"czech": "cs", |
|
"danish": "da", |
|
"german": "de", |
|
"greek": "el", |
|
"english": "en", |
|
"spanish": "es", |
|
"estonian": "et", |
|
"finnish": "fi", |
|
"french": "fr", |
|
"hungarian": "hu", |
|
"italian": "it", |
|
"japanese": "ja", |
|
"lithuanian": "lt", |
|
"latvian": "lv", |
|
"dutch": "nl", |
|
"polish": "pl", |
|
"portuguese": "pt", |
|
"romanian": "ro", |
|
"russian": "ru", |
|
"slovak": "sk", |
|
"slovenian": "sl", |
|
"swedish": "sv", |
|
"chinese": "zh" |
|
} |
|
|
|
DEEPL_CODE_TO_LANGUAGE = {v: k for k, v in DEEPL_LANGUAGE_TO_CODE.items()} |
|
|
|
PAPAGO_CODE_TO_LANGUAGE = { |
|
'ko': 'Korean', |
|
'en': 'English', |
|
'ja': 'Japanese', |
|
'zh-CN': 'Chinese', |
|
'zh-TW': 'Chinese traditional', |
|
'es': 'Spanish', |
|
'fr': 'French', |
|
'vi': 'Vietnamese', |
|
'th': 'Thai', |
|
'id': 'Indonesia' |
|
} |
|
|
|
PAPAGO_LANGUAGE_TO_CODE = {v: k for v, k in PAPAGO_CODE_TO_LANGUAGE.items()} |
|
|
|
QCRI_CODE_TO_LANGUAGE = { |
|
'ar': 'Arabic', |
|
'en': 'English', |
|
'es': 'Spanish' |
|
} |
|
|
|
QCRI_LANGUAGE_TO_CODE = { |
|
v: k for k, v in QCRI_CODE_TO_LANGUAGE.items() |
|
} |
|
|