Merge pull request #95 from kuspia/master
Browse files- deep_translator/constants.py +18 -8
- deep_translator/google_trans.py +28 -7
- tests/test_data.py +112 -0
- tests/test_google_trans.py +160 -9
deep_translator/constants.py
CHANGED
@@ -29,9 +29,8 @@ GOOGLE_CODES_TO_LANGUAGES = {
|
|
29 |
'ca': 'catalan',
|
30 |
'ceb': 'cebuano',
|
31 |
'ny': 'chichewa',
|
32 |
-
'zh': 'chinese',
|
33 |
-
'zh-
|
34 |
-
'zh-tw': 'chinese (traditional)',
|
35 |
'co': 'corsican',
|
36 |
'hr': 'croatian',
|
37 |
'cs': 'czech',
|
@@ -66,8 +65,9 @@ GOOGLE_CODES_TO_LANGUAGES = {
|
|
66 |
'kn': 'kannada',
|
67 |
'kk': 'kazakh',
|
68 |
'km': 'khmer',
|
|
|
69 |
'ko': 'korean',
|
70 |
-
'ku': 'kurdish
|
71 |
'ky': 'kyrgyz',
|
72 |
'lo': 'lao',
|
73 |
'la': 'latin',
|
@@ -82,9 +82,10 @@ GOOGLE_CODES_TO_LANGUAGES = {
|
|
82 |
'mi': 'maori',
|
83 |
'mr': 'marathi',
|
84 |
'mn': 'mongolian',
|
85 |
-
'my': 'myanmar
|
86 |
'ne': 'nepali',
|
87 |
'no': 'norwegian',
|
|
|
88 |
'ps': 'pashto',
|
89 |
'fa': 'persian',
|
90 |
'pl': 'polish',
|
@@ -108,11 +109,14 @@ GOOGLE_CODES_TO_LANGUAGES = {
|
|
108 |
'sv': 'swedish',
|
109 |
'tg': 'tajik',
|
110 |
'ta': 'tamil',
|
|
|
111 |
'te': 'telugu',
|
112 |
'th': 'thai',
|
113 |
'tr': 'turkish',
|
|
|
114 |
'uk': 'ukrainian',
|
115 |
'ur': 'urdu',
|
|
|
116 |
'uz': 'uzbek',
|
117 |
'vi': 'vietnamese',
|
118 |
'cy': 'welsh',
|
@@ -120,12 +124,18 @@ GOOGLE_CODES_TO_LANGUAGES = {
|
|
120 |
'yi': 'yiddish',
|
121 |
'yo': 'yoruba',
|
122 |
'zu': 'zulu',
|
123 |
-
'fil': 'Filipino',
|
124 |
-
'he': 'Hebrew'
|
125 |
}
|
126 |
|
127 |
GOOGLE_LANGUAGES_TO_CODES = {v: k for k, v in GOOGLE_CODES_TO_LANGUAGES.items()}
|
128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
PONS_CODES_TO_LANGUAGES = {
|
130 |
'ar': 'arabic',
|
131 |
'bg': 'bulgarian',
|
@@ -247,4 +257,4 @@ QCRI_CODE_TO_LANGUAGE = {
|
|
247 |
|
248 |
QCRI_LANGUAGE_TO_CODE = {
|
249 |
v: k for k, v in QCRI_CODE_TO_LANGUAGE.items()
|
250 |
-
}
|
|
|
29 |
'ca': 'catalan',
|
30 |
'ceb': 'cebuano',
|
31 |
'ny': 'chichewa',
|
32 |
+
'zh-CN': 'chinese (simplified)',
|
33 |
+
'zh-TW': 'chinese (traditional)',
|
|
|
34 |
'co': 'corsican',
|
35 |
'hr': 'croatian',
|
36 |
'cs': 'czech',
|
|
|
65 |
'kn': 'kannada',
|
66 |
'kk': 'kazakh',
|
67 |
'km': 'khmer',
|
68 |
+
'rw': 'kinyarwanda',
|
69 |
'ko': 'korean',
|
70 |
+
'ku': 'kurdish',
|
71 |
'ky': 'kyrgyz',
|
72 |
'lo': 'lao',
|
73 |
'la': 'latin',
|
|
|
82 |
'mi': 'maori',
|
83 |
'mr': 'marathi',
|
84 |
'mn': 'mongolian',
|
85 |
+
'my': 'myanmar',
|
86 |
'ne': 'nepali',
|
87 |
'no': 'norwegian',
|
88 |
+
'or': 'odia',
|
89 |
'ps': 'pashto',
|
90 |
'fa': 'persian',
|
91 |
'pl': 'polish',
|
|
|
109 |
'sv': 'swedish',
|
110 |
'tg': 'tajik',
|
111 |
'ta': 'tamil',
|
112 |
+
'tt': 'tatar',
|
113 |
'te': 'telugu',
|
114 |
'th': 'thai',
|
115 |
'tr': 'turkish',
|
116 |
+
'tk': 'turkmen',
|
117 |
'uk': 'ukrainian',
|
118 |
'ur': 'urdu',
|
119 |
+
'ug': 'uyghur',
|
120 |
'uz': 'uzbek',
|
121 |
'vi': 'vietnamese',
|
122 |
'cy': 'welsh',
|
|
|
124 |
'yi': 'yiddish',
|
125 |
'yo': 'yoruba',
|
126 |
'zu': 'zulu',
|
|
|
|
|
127 |
}
|
128 |
|
129 |
GOOGLE_LANGUAGES_TO_CODES = {v: k for k, v in GOOGLE_CODES_TO_LANGUAGES.items()}
|
130 |
|
131 |
+
# This dictionary maps the primary name of language to its secondary names in list manner (if any)
|
132 |
+
GOOGLE_LANGUAGES_SECONDARY_NAMES = {
|
133 |
+
'myanmar': ['burmese'],
|
134 |
+
'odia': ['oriya'],
|
135 |
+
'kurdish': ['kurmanji']
|
136 |
+
}
|
137 |
+
|
138 |
+
|
139 |
PONS_CODES_TO_LANGUAGES = {
|
140 |
'ar': 'arabic',
|
141 |
'bg': 'bulgarian',
|
|
|
257 |
|
258 |
QCRI_LANGUAGE_TO_CODE = {
|
259 |
v: k for k, v in QCRI_CODE_TO_LANGUAGE.items()
|
260 |
+
}
|
deep_translator/google_trans.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
google translator API
|
3 |
"""
|
4 |
|
5 |
-
from .constants import BASE_URLS, GOOGLE_LANGUAGES_TO_CODES
|
6 |
from .exceptions import TooManyRequests, LanguageNotSupportedException, TranslationNotFound, NotValidPayload, RequestError
|
7 |
from .parent import BaseTranslator
|
8 |
from bs4 import BeautifulSoup
|
@@ -27,8 +27,19 @@ class GoogleTranslator(BaseTranslator):
|
|
27 |
self.__base_url = BASE_URLS.get("GOOGLE_TRANSLATE")
|
28 |
self.proxies = proxies
|
29 |
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
super(GoogleTranslator, self).__init__(base_url=self.__base_url,
|
34 |
source=self._source,
|
@@ -51,6 +62,17 @@ class GoogleTranslator(BaseTranslator):
|
|
51 |
"""
|
52 |
return GoogleTranslator.supported_languages if not as_dict else GoogleTranslator._languages
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
def _map_language_to_code(self, *languages):
|
55 |
"""
|
56 |
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
@@ -63,7 +85,7 @@ class GoogleTranslator(BaseTranslator):
|
|
63 |
elif language in self._languages.keys():
|
64 |
yield self._languages[language]
|
65 |
else:
|
66 |
-
|
67 |
|
68 |
def is_language_supported(self, *languages):
|
69 |
"""
|
@@ -74,7 +96,8 @@ class GoogleTranslator(BaseTranslator):
|
|
74 |
for lang in languages:
|
75 |
if lang != 'auto' and lang not in self._languages.keys():
|
76 |
if lang != 'auto' and lang not in self._languages.values():
|
77 |
-
|
|
|
78 |
return True
|
79 |
|
80 |
def translate(self, text, **kwargs):
|
@@ -181,8 +204,6 @@ class GoogleTranslator(BaseTranslator):
|
|
181 |
|
182 |
return arr
|
183 |
|
184 |
-
|
185 |
-
|
186 |
if __name__ == '__main__':
|
187 |
translator = GoogleTranslator(source='ru', target='uk')
|
188 |
t = translator.translate("Я разработчик") # => "I am a developer"
|
|
|
2 |
google translator API
|
3 |
"""
|
4 |
|
5 |
+
from .constants import BASE_URLS, GOOGLE_LANGUAGES_TO_CODES, GOOGLE_LANGUAGES_SECONDARY_NAMES
|
6 |
from .exceptions import TooManyRequests, LanguageNotSupportedException, TranslationNotFound, NotValidPayload, RequestError
|
7 |
from .parent import BaseTranslator
|
8 |
from bs4 import BeautifulSoup
|
|
|
27 |
self.__base_url = BASE_URLS.get("GOOGLE_TRANSLATE")
|
28 |
self.proxies = proxies
|
29 |
|
30 |
+
# code snipppet that converts the language into lower-case and skip lower-case conversion for abbreviations
|
31 |
+
# since abbreviations like zh-CN if converted to lower-case will result into error
|
32 |
+
#######################################
|
33 |
+
source_lower = source
|
34 |
+
target_lower = target
|
35 |
+
if not source in self._languages.values():
|
36 |
+
source_lower=source.lower()
|
37 |
+
if not target in self._languages.values():
|
38 |
+
target_lower=target.lower()
|
39 |
+
#######################################
|
40 |
+
|
41 |
+
if self.is_language_supported(source_lower, target_lower):
|
42 |
+
self._source, self._target = self._map_language_to_code(source_lower, target_lower)
|
43 |
|
44 |
super(GoogleTranslator, self).__init__(base_url=self.__base_url,
|
45 |
source=self._source,
|
|
|
62 |
"""
|
63 |
return GoogleTranslator.supported_languages if not as_dict else GoogleTranslator._languages
|
64 |
|
65 |
+
def is_secondary(self, lang):
|
66 |
+
"""
|
67 |
+
Function to check if lang is a secondary name of any primary language
|
68 |
+
@param lang: language name
|
69 |
+
@return: primary name of a language if found otherwise False
|
70 |
+
"""
|
71 |
+
for primary_name, secondary_names in GOOGLE_LANGUAGES_SECONDARY_NAMES.items():
|
72 |
+
if lang in secondary_names:
|
73 |
+
return primary_name
|
74 |
+
return False
|
75 |
+
|
76 |
def _map_language_to_code(self, *languages):
|
77 |
"""
|
78 |
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
|
|
85 |
elif language in self._languages.keys():
|
86 |
yield self._languages[language]
|
87 |
else:
|
88 |
+
yield self._languages[self.is_secondary(language)]
|
89 |
|
90 |
def is_language_supported(self, *languages):
|
91 |
"""
|
|
|
96 |
for lang in languages:
|
97 |
if lang != 'auto' and lang not in self._languages.keys():
|
98 |
if lang != 'auto' and lang not in self._languages.values():
|
99 |
+
if self.is_secondary(lang) == False:
|
100 |
+
raise LanguageNotSupportedException(lang)
|
101 |
return True
|
102 |
|
103 |
def translate(self, text, **kwargs):
|
|
|
204 |
|
205 |
return arr
|
206 |
|
|
|
|
|
207 |
if __name__ == '__main__':
|
208 |
translator = GoogleTranslator(source='ru', target='uk')
|
209 |
t = translator.translate("Я разработчик") # => "I am a developer"
|
tests/test_data.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
test_text_standard = 'Hello world.'
|
2 |
+
|
3 |
+
TRANSLATED_RESULTS = {
|
4 |
+
"afrikaans": "Hello Wêreld.",
|
5 |
+
"albanian": "Përshendetje Botë.",
|
6 |
+
"amharic": "ሰላም ልዑል.",
|
7 |
+
"arabic": "مرحبا بالعالم.",
|
8 |
+
"armenian": "Բարեւ աշխարհ.",
|
9 |
+
"azerbaijani": "Salam dünya.",
|
10 |
+
"basque": "Kaixo Mundua.",
|
11 |
+
"belarusian": "Прывітанне Сусвет.",
|
12 |
+
"bengali": "ওহে বিশ্ব.",
|
13 |
+
"bosnian": "Zdravo svijete.",
|
14 |
+
"bulgarian": "Здравей свят.",
|
15 |
+
"catalan": "Hola món.",
|
16 |
+
"cebuano": "Kumusta kalibutan.",
|
17 |
+
"chichewa": "Moni Dziko Lapansi.",
|
18 |
+
"chinese (simplified)": "你好,世界。",
|
19 |
+
"chinese (traditional)": "你好,世界。",
|
20 |
+
"corsican": "Bonghjornu mondu.",
|
21 |
+
"croatian": "Pozdrav svijete.",
|
22 |
+
"czech": "Ahoj světe.",
|
23 |
+
"danish": "Hej Verden.",
|
24 |
+
"dutch": "Hallo Wereld.",
|
25 |
+
"esperanto": "Saluton mondo.",
|
26 |
+
"estonian": "Tere, Maailm.",
|
27 |
+
"filipino": "Kamusta mundo",
|
28 |
+
"finnish": "Hei maailma.",
|
29 |
+
"french": "Bonjour le monde.",
|
30 |
+
"frisian": "Hallo wrâld.",
|
31 |
+
"galician": "Ola mundo.",
|
32 |
+
"georgian": "Გამარჯობა მსოფლიო.",
|
33 |
+
"german": "Hallo Welt.",
|
34 |
+
"greek": "Γειά σου Κόσμε.",
|
35 |
+
"gujarati": "હેલો વર્લ્ડ.",
|
36 |
+
"haitian creole": "Bonjou mond.",
|
37 |
+
"hausa": "Sannu Duniya.",
|
38 |
+
"hawaiian": "Aloha honua.",
|
39 |
+
"hebrew": "שלום עולם.",
|
40 |
+
"hindi": "नमस्ते दुनिया।",
|
41 |
+
"hmong": "Nyob zoo ntiaj teb.",
|
42 |
+
"hungarian": "Helló Világ.",
|
43 |
+
"icelandic": "Halló heimur.",
|
44 |
+
"igbo": "Ndewo Ụwa.",
|
45 |
+
"indonesian": "Halo Dunia.",
|
46 |
+
"irish": "Dia duit ar domhan.",
|
47 |
+
"italian": "Ciao mondo.",
|
48 |
+
"japanese": "こんにちは世界。",
|
49 |
+
"javanese": "Halo jagad.",
|
50 |
+
"kannada": "ಹಲೋ ವಿಶ್ವ.",
|
51 |
+
"kazakh": "Сәлем Әлем.",
|
52 |
+
"khmer": "សួស្តីពិភពលោក។",
|
53 |
+
"kinyarwanda": "Mwaramutse isi.",
|
54 |
+
"korean": "안녕하세요 세계입니다.",
|
55 |
+
"kurdish": "Hello cîhanê.",
|
56 |
+
"kyrgyz": "Салам дүйнө.",
|
57 |
+
"lao": "ສະບາຍດີຊາວໂລກ.",
|
58 |
+
"latin": "Salve mundi.",
|
59 |
+
"latvian": "Sveika pasaule.",
|
60 |
+
"lithuanian": "Labas pasauli.",
|
61 |
+
"luxembourgish": "Moien Welt.",
|
62 |
+
"macedonian": "Здраво свету.",
|
63 |
+
"malagasy": "Hello World.",
|
64 |
+
"malay": "Hai dunia.",
|
65 |
+
"malayalam": "ഹലോ വേൾഡ്.",
|
66 |
+
"maltese": "Hello dinja.",
|
67 |
+
"maori": "Kia ora te ao.",
|
68 |
+
"marathi": "नमस्कार जग.",
|
69 |
+
"mongolian": "Сайн уу ертөнц.",
|
70 |
+
"myanmar": "မင်္ဂလာပါကမ္ဘာလောက။",
|
71 |
+
"nepali": "नमस्कार संसार।",
|
72 |
+
"norwegian": "Hei Verden.",
|
73 |
+
"odia": "ନମସ୍କାର ବିଶ୍ୱବାସି।",
|
74 |
+
"pashto": "سلام نړی.",
|
75 |
+
"persian": "سلام دنیا.",
|
76 |
+
"polish": "Witaj świecie.",
|
77 |
+
"portuguese": "Olá Mundo.",
|
78 |
+
"punjabi": "ਸਤਿ ਸ੍ਰੀ ਅਕਾਲ ਦੁਨਿਆ.",
|
79 |
+
"romanian": "Salut Lume.",
|
80 |
+
"russian": "Привет, мир.",
|
81 |
+
"samoan": "Talofa lalolagi.",
|
82 |
+
"scots gaelic": "Hàlo a Shaoghail.",
|
83 |
+
"serbian": "Здраво Свете.",
|
84 |
+
"sesotho": "Lefatše Lumela.",
|
85 |
+
"shona": "Mhoro nyika.",
|
86 |
+
"sindhi": "هيلو دنيا.",
|
87 |
+
"sinhala": "හෙලෝ වර්ල්ඩ්.",
|
88 |
+
"slovak": "Ahoj svet.",
|
89 |
+
"slovenian": "Pozdravljen, svet.",
|
90 |
+
"somali": "Salaamu calaykum.",
|
91 |
+
"spanish": "Hola Mundo.",
|
92 |
+
"sundanese": "Halo Dunya.",
|
93 |
+
"swahili": "Salamu, Dunia.",
|
94 |
+
"swedish": "Hej världen.",
|
95 |
+
"tajik": "Салом Ҷаҳон.",
|
96 |
+
"tamil": "வணக்கம் உலகம்.",
|
97 |
+
"tatar": "Сәлам, Дөнья.",
|
98 |
+
"telugu": "హలో వరల్డ్.",
|
99 |
+
"thai": "สวัสดีชาวโลก.",
|
100 |
+
"turkish": "Selam Dünya.",
|
101 |
+
"turkmen": "Salam dünýä.",
|
102 |
+
"ukrainian": "Привіт Світ.",
|
103 |
+
"urdu": "سلام دنیا۔",
|
104 |
+
"uyghur": "ياخشىمۇسىز دۇنيا.",
|
105 |
+
"uzbek": "Salom Dunyo.",
|
106 |
+
"vietnamese": "Chào thế giới.",
|
107 |
+
"welsh": "Helo Byd.",
|
108 |
+
"xhosa": "Molo Lizwe.",
|
109 |
+
"yiddish": "העלא וועלט.",
|
110 |
+
"yoruba": "Mo ki O Ile Aiye.",
|
111 |
+
"zulu": "Sawubona Mhlaba."
|
112 |
+
}
|
tests/test_google_trans.py
CHANGED
@@ -4,23 +4,182 @@
|
|
4 |
|
5 |
import pytest
|
6 |
from deep_translator import exceptions, GoogleTranslator
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
|
9 |
@pytest.fixture
|
10 |
def google_translator():
|
11 |
"""Sample pytest fixture.
|
12 |
-
|
13 |
See more at: http://doc.pytest.org/en/latest/fixture.html
|
14 |
"""
|
15 |
return GoogleTranslator(target='en')
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
def test_content(google_translator):
|
19 |
"""Sample pytest test function with the pytest fixture as an argument."""
|
20 |
# from bs4 import BeautifulSoup
|
21 |
# assert 'GitHub' in BeautifulSoup(response.content).title.string
|
22 |
assert google_translator.translate(text='좋은') == "good"
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
def test_inputs():
|
26 |
with pytest.raises(exceptions.LanguageNotSupportedException):
|
@@ -29,13 +188,6 @@ def test_inputs():
|
|
29 |
with pytest.raises(exceptions.LanguageNotSupportedException):
|
30 |
GoogleTranslator(source="auto", target="nothing")
|
31 |
|
32 |
-
# test abbreviations and languages
|
33 |
-
g1 = GoogleTranslator("en", "fr")
|
34 |
-
g2 = GoogleTranslator("english", "french")
|
35 |
-
assert g1._source == g2._source
|
36 |
-
assert g1._target == g2._target
|
37 |
-
|
38 |
-
|
39 |
def test_payload(google_translator):
|
40 |
|
41 |
with pytest.raises(exceptions.NotValidPayload):
|
@@ -59,6 +211,5 @@ def test_payload(google_translator):
|
|
59 |
#for _ in range(1):
|
60 |
#assert google_translator.translate(text='좋은') == "good"
|
61 |
|
62 |
-
|
63 |
def test_one_character_words():
|
64 |
assert GoogleTranslator(source='es', target='en').translate('o') == 'or'
|
|
|
4 |
|
5 |
import pytest
|
6 |
from deep_translator import exceptions, GoogleTranslator
|
7 |
+
from deep_translator.constants import GOOGLE_CODES_TO_LANGUAGES
|
8 |
+
#from test_data import test_text_standard, TRANSLATED_RESULTS
|
9 |
+
import random
|
10 |
+
|
11 |
+
test_text_standard = 'Hello world.'
|
12 |
+
|
13 |
+
TRANSLATED_RESULTS = {
|
14 |
+
"afrikaans": "Hello Wêreld.",
|
15 |
+
"albanian": "Përshendetje Botë.",
|
16 |
+
"amharic": "ሰላም ልዑል.",
|
17 |
+
"arabic": "مرحبا بالعالم.",
|
18 |
+
"armenian": "Բարեւ աշխարհ.",
|
19 |
+
"azerbaijani": "Salam dünya.",
|
20 |
+
"basque": "Kaixo Mundua.",
|
21 |
+
"belarusian": "Прывітанне Сусвет.",
|
22 |
+
"bengali": "ওহে বিশ্ব.",
|
23 |
+
"bosnian": "Zdravo svijete.",
|
24 |
+
"bulgarian": "Здравей свят.",
|
25 |
+
"catalan": "Hola món.",
|
26 |
+
"cebuano": "Kumusta kalibutan.",
|
27 |
+
"chichewa": "Moni Dziko Lapansi.",
|
28 |
+
"chinese (simplified)": "你好,世界。",
|
29 |
+
"chinese (traditional)": "你好,世界。",
|
30 |
+
"corsican": "Bonghjornu mondu.",
|
31 |
+
"croatian": "Pozdrav svijete.",
|
32 |
+
"czech": "Ahoj světe.",
|
33 |
+
"danish": "Hej Verden.",
|
34 |
+
"dutch": "Hallo Wereld.",
|
35 |
+
"esperanto": "Saluton mondo.",
|
36 |
+
"estonian": "Tere, Maailm.",
|
37 |
+
"filipino": "Kamusta mundo",
|
38 |
+
"finnish": "Hei maailma.",
|
39 |
+
"french": "Bonjour le monde.",
|
40 |
+
"frisian": "Hallo wrâld.",
|
41 |
+
"galician": "Ola mundo.",
|
42 |
+
"georgian": "Გამარჯობა მსოფლიო.",
|
43 |
+
"german": "Hallo Welt.",
|
44 |
+
"greek": "Γειά σου Κόσμε.",
|
45 |
+
"gujarati": "હેલો વર્લ્ડ.",
|
46 |
+
"haitian creole": "Bonjou mond.",
|
47 |
+
"hausa": "Sannu Duniya.",
|
48 |
+
"hawaiian": "Aloha honua.",
|
49 |
+
"hebrew": "שלום עולם.",
|
50 |
+
"hindi": "नमस्ते दुनिया।",
|
51 |
+
"hmong": "Nyob zoo ntiaj teb.",
|
52 |
+
"hungarian": "Helló Világ.",
|
53 |
+
"icelandic": "Halló heimur.",
|
54 |
+
"igbo": "Ndewo Ụwa.",
|
55 |
+
"indonesian": "Halo Dunia.",
|
56 |
+
"irish": "Dia duit ar domhan.",
|
57 |
+
"italian": "Ciao mondo.",
|
58 |
+
"japanese": "こんにちは世界。",
|
59 |
+
"javanese": "Halo jagad.",
|
60 |
+
"kannada": "ಹಲೋ ವಿಶ್ವ.",
|
61 |
+
"kazakh": "Сәлем Әлем.",
|
62 |
+
"khmer": "សួស្តីពិភពលោក។",
|
63 |
+
"kinyarwanda": "Mwaramutse isi.",
|
64 |
+
"korean": "안녕하세요 세계입니다.",
|
65 |
+
"kurdish": "Hello cîhanê.",
|
66 |
+
"kyrgyz": "Салам дүйнө.",
|
67 |
+
"lao": "ສະບາຍດີຊາວໂລກ.",
|
68 |
+
"latin": "Salve mundi.",
|
69 |
+
"latvian": "Sveika pasaule.",
|
70 |
+
"lithuanian": "Labas pasauli.",
|
71 |
+
"luxembourgish": "Moien Welt.",
|
72 |
+
"macedonian": "Здраво свету.",
|
73 |
+
"malagasy": "Hello World.",
|
74 |
+
"malay": "Hai dunia.",
|
75 |
+
"malayalam": "ഹലോ വേൾഡ്.",
|
76 |
+
"maltese": "Hello dinja.",
|
77 |
+
"maori": "Kia ora te ao.",
|
78 |
+
"marathi": "नमस्कार जग.",
|
79 |
+
"mongolian": "Сайн уу ертөнц.",
|
80 |
+
"myanmar": "မင်္ဂလာပါကမ္ဘာလောက။",
|
81 |
+
"nepali": "नमस्कार संसार।",
|
82 |
+
"norwegian": "Hei Verden.",
|
83 |
+
"odia": "ନମସ୍କାର ବିଶ୍ୱବାସି।",
|
84 |
+
"pashto": "سلام نړی.",
|
85 |
+
"persian": "سلام دنیا.",
|
86 |
+
"polish": "Witaj świecie.",
|
87 |
+
"portuguese": "Olá Mundo.",
|
88 |
+
"punjabi": "ਸਤਿ ਸ੍ਰੀ ਅਕਾਲ ਦੁਨਿਆ.",
|
89 |
+
"romanian": "Salut Lume.",
|
90 |
+
"russian": "Привет, мир.",
|
91 |
+
"samoan": "Talofa lalolagi.",
|
92 |
+
"scots gaelic": "Hàlo a Shaoghail.",
|
93 |
+
"serbian": "Здраво Свете.",
|
94 |
+
"sesotho": "Lefatše Lumela.",
|
95 |
+
"shona": "Mhoro nyika.",
|
96 |
+
"sindhi": "هيلو دنيا.",
|
97 |
+
"sinhala": "හෙලෝ වර්ල්ඩ්.",
|
98 |
+
"slovak": "Ahoj svet.",
|
99 |
+
"slovenian": "Pozdravljen, svet.",
|
100 |
+
"somali": "Salaamu calaykum.",
|
101 |
+
"spanish": "Hola Mundo.",
|
102 |
+
"sundanese": "Halo Dunya.",
|
103 |
+
"swahili": "Salamu, Dunia.",
|
104 |
+
"swedish": "Hej världen.",
|
105 |
+
"tajik": "Салом Ҷаҳон.",
|
106 |
+
"tamil": "வணக்கம் உலகம்.",
|
107 |
+
"tatar": "Сәлам, Дөнья.",
|
108 |
+
"telugu": "హలో వరల్డ్.",
|
109 |
+
"thai": "สวัสดีชาวโลก.",
|
110 |
+
"turkish": "Selam Dünya.",
|
111 |
+
"turkmen": "Salam dünýä.",
|
112 |
+
"ukrainian": "Привіт Світ.",
|
113 |
+
"urdu": "سلام دنیا۔",
|
114 |
+
"uyghur": "ياخشىمۇسىز دۇنيا.",
|
115 |
+
"uzbek": "Salom Dunyo.",
|
116 |
+
"vietnamese": "Chào thế giới.",
|
117 |
+
"welsh": "Helo Byd.",
|
118 |
+
"xhosa": "Molo Lizwe.",
|
119 |
+
"yiddish": "העלא וועלט.",
|
120 |
+
"yoruba": "Mo ki O Ile Aiye.",
|
121 |
+
"zulu": "Sawubona Mhlaba."
|
122 |
+
}
|
123 |
|
124 |
|
125 |
@pytest.fixture
|
126 |
def google_translator():
|
127 |
"""Sample pytest fixture.
|
|
|
128 |
See more at: http://doc.pytest.org/en/latest/fixture.html
|
129 |
"""
|
130 |
return GoogleTranslator(target='en')
|
131 |
|
132 |
|
133 |
+
def case_sensitivity_checks():
|
134 |
+
test_lang = 'Czech'
|
135 |
+
test_text = 'Hi, the sky is dark while the moon is white. Hurrah!! Denver is a city name in Colorado.'
|
136 |
+
translated_text = 'Ahoj, obloha je tmavá, zatímco měsíc je bílý. Hurá!! Denver je název města v Coloradu.'
|
137 |
+
test_cases = []
|
138 |
+
n = len(test_lang)
|
139 |
+
mx = 1 << n
|
140 |
+
test = test_lang.lower()
|
141 |
+
for i in range(mx):
|
142 |
+
combination = [k for k in test_lang]
|
143 |
+
for j in range(n):
|
144 |
+
if (((i >> j) & 1) == 1):
|
145 |
+
combination[j] = test_lang[j].upper()
|
146 |
+
temp = ""
|
147 |
+
for i in combination:
|
148 |
+
temp += i
|
149 |
+
test_cases.append(temp)
|
150 |
+
random_cases = 5
|
151 |
+
random_test_cases = random.sample(test_cases, random_cases) # randomly choosing any five cases since list is in order of 2^n containing all cases
|
152 |
+
for case in random_test_cases:
|
153 |
+
assert GoogleTranslator(source='en', target=case).translate(test_text) == translated_text
|
154 |
+
|
155 |
+
def multiple_names_lang_checks():
|
156 |
+
assert GoogleTranslator(source='en', target='burMeSe').translate("Hello") == 'မင်္ဂလာပါ'
|
157 |
+
assert GoogleTranslator(source='en', target='Oriya').translate("What's up?") == 'କଣ ଚାଲିଛି?'
|
158 |
+
assert GoogleTranslator(source='en', target='kurManJi').translate("Nice is dice.") == 'Xweş xweş e.'
|
159 |
+
|
160 |
+
def test_random_tranlations_cases_multiple_names():
|
161 |
+
random_sample_size = 5
|
162 |
+
d = dict.fromkeys(list(TRANSLATED_RESULTS.keys()))
|
163 |
+
random_lang_names = random.sample(d.keys(), random_sample_size)
|
164 |
+
random_subset_dict = {k: TRANSLATED_RESULTS[k] for k in random_lang_names}
|
165 |
+
for lang, translation in random_subset_dict.items():
|
166 |
+
assert GoogleTranslator(source='en', target=lang).translate(test_text_standard) == translation
|
167 |
+
|
168 |
+
case_sensitivity_checks()
|
169 |
+
multiple_names_lang_checks()
|
170 |
+
|
171 |
def test_content(google_translator):
|
172 |
"""Sample pytest test function with the pytest fixture as an argument."""
|
173 |
# from bs4 import BeautifulSoup
|
174 |
# assert 'GitHub' in BeautifulSoup(response.content).title.string
|
175 |
assert google_translator.translate(text='좋은') == "good"
|
176 |
|
177 |
+
def test_abbreviations_and_languages_mapping():
|
178 |
+
for abb, lang in GOOGLE_CODES_TO_LANGUAGES.items():
|
179 |
+
if(abb!= 'en'):
|
180 |
+
g1 = GoogleTranslator(abb)
|
181 |
+
g2 = GoogleTranslator(lang)
|
182 |
+
assert g1._source == g2._source
|
183 |
|
184 |
def test_inputs():
|
185 |
with pytest.raises(exceptions.LanguageNotSupportedException):
|
|
|
188 |
with pytest.raises(exceptions.LanguageNotSupportedException):
|
189 |
GoogleTranslator(source="auto", target="nothing")
|
190 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
def test_payload(google_translator):
|
192 |
|
193 |
with pytest.raises(exceptions.NotValidPayload):
|
|
|
211 |
#for _ in range(1):
|
212 |
#assert google_translator.translate(text='좋은') == "good"
|
213 |
|
|
|
214 |
def test_one_character_words():
|
215 |
assert GoogleTranslator(source='es', target='en').translate('o') == 'or'
|