nidhal baccouri
commited on
Commit
·
b1f5dde
1
Parent(s):
f1c6875
added api url
Browse files- deep_translator/constants.py +18 -1
- deep_translator/papago.py +177 -0
deep_translator/constants.py
CHANGED
@@ -9,7 +9,9 @@ BASE_URLS = {
|
|
9 |
"QCRI": "https://mt.qcri.org/api/v1/{endpoint}?",
|
10 |
"DEEPL": "https://api.deepl.com/{version}/",
|
11 |
"DEEPL_FREE": "https://api-free.deepl.com",
|
12 |
-
"MICROSOFT_TRANSLATE": "https://api.cognitive.microsofttranslator.com/translate?api-version=3.0"
|
|
|
|
|
13 |
}
|
14 |
|
15 |
GOOGLE_CODES_TO_LANGUAGES = {
|
@@ -221,3 +223,18 @@ DEEPL_LANGUAGE_TO_CODE = {
|
|
221 |
}
|
222 |
|
223 |
DEEPL_CODE_TO_LANGUAGE = {v: k for k, v in DEEPL_LANGUAGE_TO_CODE.items()}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
"QCRI": "https://mt.qcri.org/api/v1/{endpoint}?",
|
10 |
"DEEPL": "https://api.deepl.com/{version}/",
|
11 |
"DEEPL_FREE": "https://api-free.deepl.com",
|
12 |
+
"MICROSOFT_TRANSLATE": "https://api.cognitive.microsofttranslator.com/translate?api-version=3.0",
|
13 |
+
"PAPAGO": "https://papago.naver.com/",
|
14 |
+
"PAPAGO_API": "https://openapi.naver.com/v1/papago/n2mt"
|
15 |
}
|
16 |
|
17 |
GOOGLE_CODES_TO_LANGUAGES = {
|
|
|
223 |
}
|
224 |
|
225 |
DEEPL_CODE_TO_LANGUAGE = {v: k for k, v in DEEPL_LANGUAGE_TO_CODE.items()}
|
226 |
+
|
227 |
+
PAPAGO_CODE_TO_LANGUAGE = {
|
228 |
+
'ko': 'Korean',
|
229 |
+
'en': 'English',
|
230 |
+
'ja': 'Japanese',
|
231 |
+
'zh-CN': 'Chinese',
|
232 |
+
'zh-TW': 'Chinese traditional',
|
233 |
+
'es': 'Spanish',
|
234 |
+
'fr': 'French',
|
235 |
+
'vi': 'Vietnamese',
|
236 |
+
'th': 'Thai',
|
237 |
+
'id': 'Indonesia'
|
238 |
+
}
|
239 |
+
|
240 |
+
PAPAGO_LANGUAGE_TO_CODE = {v: k for v, k in PAPAGO_CODE_TO_LANGUAGE.items()}
|
deep_translator/papago.py
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
google translator API
|
3 |
+
"""
|
4 |
+
|
5 |
+
from deep_translator.constants import BASE_URLS, PAPAGO_LANGUAGE_TO_CODE
|
6 |
+
from deep_translator.exceptions import TooManyRequests, LanguageNotSupportedException, TranslationNotFound, NotValidPayload, RequestError
|
7 |
+
from deep_translator.parent import BaseTranslator
|
8 |
+
from bs4 import BeautifulSoup
|
9 |
+
import requests
|
10 |
+
from time import sleep
|
11 |
+
import warnings
|
12 |
+
import logging
|
13 |
+
|
14 |
+
|
15 |
+
class PapagoTranslator(BaseTranslator):
|
16 |
+
"""
|
17 |
+
class that wraps functions, which use google translate under the hood to translate text(s)
|
18 |
+
"""
|
19 |
+
_languages = PAPAGO_LANGUAGE_TO_CODE
|
20 |
+
supported_languages = list(_languages.keys())
|
21 |
+
|
22 |
+
def __init__(self, source="auto", target="en", proxies=None, **kwargs):
|
23 |
+
"""
|
24 |
+
@param source: source language to translate from
|
25 |
+
@param target: target language to translate to
|
26 |
+
"""
|
27 |
+
self.__base_url = BASE_URLS.get("PAPAGO_API")
|
28 |
+
self.proxies = proxies
|
29 |
+
|
30 |
+
if self.is_language_supported(source, target):
|
31 |
+
self._source, self._target = self._map_language_to_code(source.lower(), target.lower())
|
32 |
+
|
33 |
+
super(PapagoTranslator, self).__init__(base_url=self.__base_url,
|
34 |
+
source=self._source,
|
35 |
+
target=self._target,
|
36 |
+
element_tag='div',
|
37 |
+
element_query={"id": "txtTarget"},
|
38 |
+
payload_key='st', # key of text in the url
|
39 |
+
tk=self._target,
|
40 |
+
sk=self._source,
|
41 |
+
**kwargs)
|
42 |
+
|
43 |
+
@staticmethod
|
44 |
+
def get_supported_languages(as_dict=False):
|
45 |
+
"""
|
46 |
+
return the supported languages by the google translator
|
47 |
+
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
48 |
+
@return: list or dict
|
49 |
+
"""
|
50 |
+
return PapagoTranslator.supported_languages if not as_dict else PapagoTranslator._languages
|
51 |
+
|
52 |
+
def _map_language_to_code(self, *languages):
|
53 |
+
"""
|
54 |
+
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
55 |
+
@param languages: list of languages
|
56 |
+
@return: mapped value of the language or raise an exception if the language is not supported
|
57 |
+
"""
|
58 |
+
for language in languages:
|
59 |
+
if language in self._languages.values() or language == 'auto':
|
60 |
+
yield language
|
61 |
+
elif language in self._languages.keys():
|
62 |
+
yield self._languages[language]
|
63 |
+
else:
|
64 |
+
raise LanguageNotSupportedException(language)
|
65 |
+
|
66 |
+
def is_language_supported(self, *languages):
|
67 |
+
"""
|
68 |
+
check if the language is supported by the translator
|
69 |
+
@param languages: list of languages
|
70 |
+
@return: bool or raise an Exception
|
71 |
+
"""
|
72 |
+
for lang in languages:
|
73 |
+
if lang != 'auto' and lang not in self._languages.keys():
|
74 |
+
if lang != 'auto' and lang not in self._languages.values():
|
75 |
+
raise LanguageNotSupportedException(lang)
|
76 |
+
return True
|
77 |
+
|
78 |
+
def translate(self, text, **kwargs):
|
79 |
+
"""
|
80 |
+
function that uses google translate to translate a text
|
81 |
+
@param text: desired text to translate
|
82 |
+
@return: str: translated text
|
83 |
+
"""
|
84 |
+
|
85 |
+
if self._validate_payload(text):
|
86 |
+
text = text.strip()
|
87 |
+
|
88 |
+
if self.payload_key:
|
89 |
+
self._url_params[self.payload_key] = text
|
90 |
+
|
91 |
+
response = requests.get(self.__base_url,
|
92 |
+
params=self._url_params,
|
93 |
+
proxies=self.proxies)
|
94 |
+
print("url: ", response.url)
|
95 |
+
if response.status_code == 429:
|
96 |
+
raise TooManyRequests()
|
97 |
+
|
98 |
+
if response.status_code != 200:
|
99 |
+
raise RequestError()
|
100 |
+
|
101 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
102 |
+
print("soup: ", soup)
|
103 |
+
#exit()
|
104 |
+
element = soup.find(self._element_tag, self._element_query)
|
105 |
+
print("element: ", element)
|
106 |
+
if not element:
|
107 |
+
raise TranslationNotFound(text)
|
108 |
+
else:
|
109 |
+
return element.get_text(strip=True)
|
110 |
+
|
111 |
+
def translate_file(self, path, **kwargs):
|
112 |
+
"""
|
113 |
+
translate directly from file
|
114 |
+
@param path: path to the target file
|
115 |
+
@type path: str
|
116 |
+
@param kwargs: additional args
|
117 |
+
@return: str
|
118 |
+
"""
|
119 |
+
try:
|
120 |
+
with open(path) as f:
|
121 |
+
text = f.read().strip()
|
122 |
+
return self.translate(text)
|
123 |
+
except Exception as e:
|
124 |
+
raise e
|
125 |
+
|
126 |
+
def translate_sentences(self, sentences=None, **kwargs):
|
127 |
+
"""
|
128 |
+
translate many sentences together. This makes sense if you have sentences with different languages
|
129 |
+
and you want to translate all to unified language. This is handy because it detects
|
130 |
+
automatically the language of each sentence and then translate it.
|
131 |
+
|
132 |
+
@param sentences: list of sentences to translate
|
133 |
+
@return: list of all translated sentences
|
134 |
+
"""
|
135 |
+
warnings.warn("deprecated. Use the translate_batch function instead", DeprecationWarning, stacklevel=2)
|
136 |
+
logging.warning("deprecated. Use the translate_batch function instead")
|
137 |
+
if not sentences:
|
138 |
+
raise NotValidPayload(sentences)
|
139 |
+
|
140 |
+
translated_sentences = []
|
141 |
+
try:
|
142 |
+
for sentence in sentences:
|
143 |
+
translated = self.translate(text=sentence)
|
144 |
+
translated_sentences.append(translated)
|
145 |
+
|
146 |
+
return translated_sentences
|
147 |
+
|
148 |
+
except Exception as e:
|
149 |
+
raise e
|
150 |
+
|
151 |
+
def translate_batch(self, batch=None):
|
152 |
+
"""
|
153 |
+
translate a list of texts
|
154 |
+
@param batch: list of texts you want to translate
|
155 |
+
@return: list of translations
|
156 |
+
"""
|
157 |
+
if not batch:
|
158 |
+
raise Exception("Enter your text list that you want to translate")
|
159 |
+
|
160 |
+
print("Please wait.. This may take a couple of seconds because deep_translator sleeps "
|
161 |
+
"for two seconds after each request in order to not spam the google server.")
|
162 |
+
arr = []
|
163 |
+
for i, text in enumerate(batch):
|
164 |
+
|
165 |
+
translated = self.translate(text)
|
166 |
+
arr.append(translated)
|
167 |
+
print("sentence number ", i+1, " has been translated successfully")
|
168 |
+
sleep(2)
|
169 |
+
|
170 |
+
return arr
|
171 |
+
|
172 |
+
|
173 |
+
|
174 |
+
if __name__ == '__main__':
|
175 |
+
t = PapagoTranslator(source="en", target="de").translate("cute")
|
176 |
+
print(t)
|
177 |
+
|