Nidhal Baccouri commited on
Commit
71647e5
·
unverified ·
2 Parent(s): f1c6875 714079e

Merge pull request #57 from nidhaloff/feature/papago

Browse files
deep_translator/constants.py CHANGED
@@ -9,7 +9,9 @@ BASE_URLS = {
9
  "QCRI": "https://mt.qcri.org/api/v1/{endpoint}?",
10
  "DEEPL": "https://api.deepl.com/{version}/",
11
  "DEEPL_FREE": "https://api-free.deepl.com",
12
- "MICROSOFT_TRANSLATE": "https://api.cognitive.microsofttranslator.com/translate?api-version=3.0"
 
 
13
  }
14
 
15
  GOOGLE_CODES_TO_LANGUAGES = {
@@ -221,3 +223,18 @@ DEEPL_LANGUAGE_TO_CODE = {
221
  }
222
 
223
  DEEPL_CODE_TO_LANGUAGE = {v: k for k, v in DEEPL_LANGUAGE_TO_CODE.items()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "QCRI": "https://mt.qcri.org/api/v1/{endpoint}?",
10
  "DEEPL": "https://api.deepl.com/{version}/",
11
  "DEEPL_FREE": "https://api-free.deepl.com",
12
+ "MICROSOFT_TRANSLATE": "https://api.cognitive.microsofttranslator.com/translate?api-version=3.0",
13
+ "PAPAGO": "https://papago.naver.com/",
14
+ "PAPAGO_API": "https://openapi.naver.com/v1/papago/n2mt"
15
  }
16
 
17
  GOOGLE_CODES_TO_LANGUAGES = {
 
223
  }
224
 
225
  DEEPL_CODE_TO_LANGUAGE = {v: k for k, v in DEEPL_LANGUAGE_TO_CODE.items()}
226
+
227
+ PAPAGO_CODE_TO_LANGUAGE = {
228
+ 'ko': 'Korean',
229
+ 'en': 'English',
230
+ 'ja': 'Japanese',
231
+ 'zh-CN': 'Chinese',
232
+ 'zh-TW': 'Chinese traditional',
233
+ 'es': 'Spanish',
234
+ 'fr': 'French',
235
+ 'vi': 'Vietnamese',
236
+ 'th': 'Thai',
237
+ 'id': 'Indonesia'
238
+ }
239
+
240
+ PAPAGO_LANGUAGE_TO_CODE = {v: k for v, k in PAPAGO_CODE_TO_LANGUAGE.items()}
deep_translator/papago.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ google translator API
3
+ """
4
+ import json
5
+
6
+ from deep_translator.constants import BASE_URLS, PAPAGO_LANGUAGE_TO_CODE
7
+ from deep_translator.exceptions import TooManyRequests, LanguageNotSupportedException, TranslationNotFound, NotValidPayload, RequestError
8
+ from deep_translator.parent import BaseTranslator
9
+ from bs4 import BeautifulSoup
10
+ import requests
11
+ from time import sleep
12
+ import warnings
13
+ import logging
14
+
15
+
16
+ class PapagoTranslator(object):
17
+ """
18
+ class that wraps functions, which use google translate under the hood to translate text(s)
19
+ """
20
+ _languages = PAPAGO_LANGUAGE_TO_CODE
21
+ supported_languages = list(_languages.keys())
22
+
23
+ def __init__(self, client_id, secret_key, source="auto", target="en"):
24
+ """
25
+ @param source: source language to translate from
26
+ @param target: target language to translate to
27
+ """
28
+ self.__base_url = BASE_URLS.get("PAPAGO_API")
29
+ self.client_id = client_id
30
+ self.secret_key = secret_key
31
+ if self.is_language_supported(source, target):
32
+ self._source, self._target = self._map_language_to_code(source.lower(), target.lower())
33
+
34
+ @staticmethod
35
+ def get_supported_languages(as_dict=False):
36
+ """
37
+ return the supported languages by the google translator
38
+ @param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
39
+ @return: list or dict
40
+ """
41
+ return PapagoTranslator.supported_languages if not as_dict else PapagoTranslator._languages
42
+
43
+ def _map_language_to_code(self, *languages):
44
+ """
45
+ map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
46
+ @param languages: list of languages
47
+ @return: mapped value of the language or raise an exception if the language is not supported
48
+ """
49
+ for language in languages:
50
+ if language in self._languages.values() or language == 'auto':
51
+ yield language
52
+ elif language in self._languages.keys():
53
+ yield self._languages[language]
54
+ else:
55
+ raise LanguageNotSupportedException(language)
56
+
57
+ def is_language_supported(self, *languages):
58
+ """
59
+ check if the language is supported by the translator
60
+ @param languages: list of languages
61
+ @return: bool or raise an Exception
62
+ """
63
+ for lang in languages:
64
+ if lang != 'auto' and lang not in self._languages.keys():
65
+ if lang != 'auto' and lang not in self._languages.values():
66
+ raise LanguageNotSupportedException(lang)
67
+ return True
68
+
69
+ def translate(self, text, **kwargs):
70
+ """
71
+ function that uses google translate to translate a text
72
+ @param text: desired text to translate
73
+ @return: str: translated text
74
+ """
75
+
76
+ payload = {
77
+ "source": self._source,
78
+ "target": self._target,
79
+ "text": text
80
+ }
81
+ headers = {
82
+ 'X-Naver-Client-Id': self.client_id,
83
+ 'X-Naver-Client-Secret': self.secret_key,
84
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
85
+ }
86
+ response = requests.post(self.__base_url, headers=headers, data=payload)
87
+ if response.status_code != 200:
88
+ raise Exception(f'Translation error! -> status code: {response.status_code}')
89
+ res_body = json.loads(response.text)
90
+ if "message" not in res_body:
91
+ raise TranslationNotFound(text)
92
+
93
+ msg = res_body.get("message")
94
+ result = msg.get("result", None)
95
+ if not result:
96
+ raise TranslationNotFound(text)
97
+ translated_text = result.get("translatedText")
98
+ return translated_text
99
+
100
+ def translate_file(self, path, **kwargs):
101
+ """
102
+ translate directly from file
103
+ @param path: path to the target file
104
+ @type path: str
105
+ @param kwargs: additional args
106
+ @return: str
107
+ """
108
+ try:
109
+ with open(path) as f:
110
+ text = f.read().strip()
111
+ return self.translate(text)
112
+ except Exception as e:
113
+ raise e
114
+
115
+ def translate_sentences(self, sentences=None, **kwargs):
116
+ """
117
+ translate many sentences together. This makes sense if you have sentences with different languages
118
+ and you want to translate all to unified language. This is handy because it detects
119
+ automatically the language of each sentence and then translate it.
120
+
121
+ @param sentences: list of sentences to translate
122
+ @return: list of all translated sentences
123
+ """
124
+ warnings.warn("deprecated. Use the translate_batch function instead", DeprecationWarning, stacklevel=2)
125
+ logging.warning("deprecated. Use the translate_batch function instead")
126
+ if not sentences:
127
+ raise NotValidPayload(sentences)
128
+
129
+ translated_sentences = []
130
+ try:
131
+ for sentence in sentences:
132
+ translated = self.translate(text=sentence)
133
+ translated_sentences.append(translated)
134
+
135
+ return translated_sentences
136
+
137
+ except Exception as e:
138
+ raise e
139
+
140
+ def translate_batch(self, batch=None):
141
+ """
142
+ translate a list of texts
143
+ @param batch: list of texts you want to translate
144
+ @return: list of translations
145
+ """
146
+ if not batch:
147
+ raise Exception("Enter your text list that you want to translate")
148
+ arr = []
149
+ for i, text in enumerate(batch):
150
+
151
+ translated = self.translate(text)
152
+ arr.append(translated)
153
+ return arr
154
+
155
+