= commited on
Commit
58568a7
·
1 Parent(s): 198fa9e

made parent abstract and added pons support

Browse files
deep_translator/constants.py CHANGED
@@ -4,7 +4,7 @@ BASE_URLS = {
4
  "PONS": "https://en.pons.com/translate/"
5
  }
6
 
7
- CODES_TO_LANGUAGES = {
8
  'af': 'afrikaans',
9
  'sq': 'albanian',
10
  'am': 'amharic',
@@ -113,4 +113,32 @@ CODES_TO_LANGUAGES = {
113
  'he': 'Hebrew'
114
  }
115
 
116
- LANGUAGES_TO_CODES = {v: k for k, v in CODES_TO_LANGUAGES.items()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  "PONS": "https://en.pons.com/translate/"
5
  }
6
 
7
+ GOOGLE_CODES_TO_LANGUAGES = {
8
  'af': 'afrikaans',
9
  'sq': 'albanian',
10
  'am': 'amharic',
 
113
  'he': 'Hebrew'
114
  }
115
 
116
+ GOOGLE_LANGUAGES_TO_CODES = {v: k for k, v in GOOGLE_CODES_TO_LANGUAGES.items()}
117
+
118
+
119
+ PONS_CODES_TO_LANGUAGES = {
120
+ 'ar': 'arabic',
121
+ 'bg': 'bulgarian',
122
+ 'zh-cn': 'chinese',
123
+ 'cs': 'czech',
124
+ 'da': 'danish',
125
+ 'nl': 'dutch',
126
+ 'en': 'english',
127
+ 'fr': 'french',
128
+ 'de': 'german',
129
+ 'el': 'greek',
130
+ 'hu': 'hungarian',
131
+ 'it': 'italian',
132
+ 'la': 'latin',
133
+ 'no': 'norwegian',
134
+ 'pl': 'polish',
135
+ 'pt': 'portuguese',
136
+ 'ru': 'russian',
137
+ 'sl': 'slovenian',
138
+ 'es': 'spanish',
139
+ 'sv': 'swedish',
140
+ 'tr': 'turkish',
141
+ 'elv': 'elvish'
142
+ }
143
+
144
+ PONS_LANGUAGES_TO_CODES = {v: k for k, v in PONS_CODES_TO_LANGUAGES.items()}
deep_translator/deep_translator.py CHANGED
@@ -1,129 +1,7 @@
1
  """Main module."""
2
 
 
 
3
 
4
- from bs4 import BeautifulSoup
5
- import requests
6
- from models import BaseTranslator
7
- from constants import BASE_URLS, LANGUAGES_TO_CODES, CODES_TO_LANGUAGES
8
- from exceptions import LanguageNotSupportedException, NotValidPayload, ElementNotFoundInGetRequest, NotValidLength
9
- from parent import ParentTranslator
10
- import string
11
 
12
-
13
- class GoogleTranslator(ParentTranslator):
14
- """
15
- class that uses google translate to translate texts
16
- """
17
- def __init__(self, source="auto", target="en"):
18
- """
19
- @param source: source language to translate from
20
- @param target: target language to translate to
21
- """
22
- self.__base_url = BASE_URLS.get("GOOGLE_TRANSLATE")
23
-
24
- if self._validate_languages([source.lower(), target.lower()]):
25
- self._source = self._map_language_to_code(source.lower())
26
- self._target = self._map_language_to_code(target.lower())
27
-
28
- super(GoogleTranslator, self).__init__(base_url=self.__base_url,
29
- source=self._source,
30
- target=self._target,
31
- element_tag='div',
32
- element_query={"class": "t0"},
33
- hl=self._target,
34
- sl=self._source,
35
- q=None)
36
-
37
- def _map_language_to_code(self, language):
38
- """
39
-
40
- @param language: type of language
41
- @return: mapped value of the language or raise an exception if the language is not supported
42
- """
43
- if language in LANGUAGES_TO_CODES.values() or language == 'auto':
44
- return language
45
- elif language in LANGUAGES_TO_CODES.keys():
46
- return LANGUAGES_TO_CODES[language]
47
- else:
48
- raise LanguageNotSupportedException(language)
49
-
50
- def translate(self, payload, payload_tag='q', **kwargs):
51
- return super().translate(payload, payload_tag)
52
-
53
-
54
- class PonsTranslator(ParentTranslator):
55
- """
56
- class that uses PONS translator to translate words
57
- """
58
- def __init__(self, source="french", target="english"):
59
- """
60
- @param source: source language to translate from
61
- @param target: target language to translate to
62
- """
63
- self.__base_url = BASE_URLS.get("PONS")
64
-
65
- if self._validate_languages([source.lower(), target.lower()]):
66
- self._source = self._map_language_to_code(source.lower())
67
- self._target = self._map_language_to_code(target.lower())
68
-
69
- super().__init__(base_url=self.__base_url,
70
- source=self._source,
71
- target=self._target,
72
- element_tag='div',
73
- element_query={"class": "target"}
74
- )
75
-
76
- def _map_language_to_code(self, language):
77
- """
78
-
79
- @param language: type of language
80
- @return: mapped value of the language or raise an exception if the language is not supported
81
- """
82
- if language in LANGUAGES_TO_CODES.values():
83
- return CODES_TO_LANGUAGES[language]
84
- elif language in LANGUAGES_TO_CODES.keys():
85
- return language
86
- else:
87
- raise LanguageNotSupportedException(language)
88
-
89
- def _validate_languages(self, languages):
90
- """
91
-
92
- @param languages: languages to validate
93
- @return: True or raise an exception
94
- """
95
- for lang in languages:
96
- if lang not in LANGUAGES_TO_CODES.keys():
97
- if lang not in LANGUAGES_TO_CODES.values():
98
- raise LanguageNotSupportedException(lang)
99
- return True
100
-
101
- def translate(self, payload, payload_tag=None, **kwargs):
102
- from requests.utils import quote
103
- url = "{}{}-{}/{}".format(self.__base_url, self._source, self._target, quote(payload))
104
- response = requests.get(url)
105
- soup = BeautifulSoup(response.text, 'html.parser')
106
- elements = soup.findAll(self._element_tag, self._element_query)
107
- # elements = soup.body.find_all('a')
108
- eof = []
109
- for el in elements:
110
- temp = ''
111
- for e in el.findAll('a'):
112
- if e.parent.name == 'div':
113
- if e and "/translate/{}-{}/".format(self._target, self._source) in e.get('href'):
114
- temp += e.get_text() + ' '
115
- eof.append(temp)
116
-
117
- return [word for word in eof if word and len(word) > 1]
118
-
119
-
120
- if __name__ == '__main__':
121
- # res = GoogleTranslator(source='auto', target='french').translate(payload="A paragraph is a series of related sentences developing a central idea, called the topic. Try to think about paragraphs in terms of thematic unity: a paragraph is a sentence or a group of sentences that supports one central, unified idea. Paragraphs add one idea at a time to your broader argument.")
122
- # res = GoogleTranslator(source='auto', target='french').translate_text(path='../examples/test.txt')
123
- # res = GoogleTranslator(source='auto', target='french').translate_sentences([
124
- # "this is good",
125
- # "das Wetter ist schön",
126
- # "un verme verde in un bicchiere verde"
127
- # ])
128
- res = PonsTranslator(source="english", target="arabic").translate(payload='good')
129
- print(res)
 
1
  """Main module."""
2
 
3
+ from .google_trans import GoogleTranslator
4
+ from .pons import PonsTranslator
5
 
 
 
 
 
 
 
 
6
 
7
+ __all__ = [GoogleTranslator, PonsTranslator]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
deep_translator/google_trans.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC
2
+
3
+ from constants import BASE_URLS, GOOGLE_LANGUAGES_TO_CODES
4
+ from exceptions import LanguageNotSupportedException, ElementNotFoundInGetRequest, NotValidPayload, NotValidLength
5
+ from parent import BaseTranslator
6
+ from bs4 import BeautifulSoup
7
+ import requests
8
+
9
+
10
+ class GoogleTranslator(BaseTranslator, ABC):
11
+ """
12
+ class that uses google translate to translate texts
13
+ """
14
+
15
+ def __init__(self, source="auto", target="en"):
16
+ """
17
+ @param source: source language to translate from
18
+ @param target: target language to translate to
19
+ """
20
+ self.__base_url = BASE_URLS.get("GOOGLE_TRANSLATE")
21
+
22
+ if self.is_language_supported(source, target):
23
+ self._source, self._target = self._map_language_to_code(source.lower(), target.lower())
24
+
25
+ super(GoogleTranslator, self).__init__(base_url=self.__base_url,
26
+ source=self._source,
27
+ target=self._target,
28
+ element_tag='div',
29
+ element_query={"class": "t0"},
30
+ payload_key='q', # key of payload in the url
31
+ hl=self._target,
32
+ sl=self._source)
33
+
34
+ def _map_language_to_code(self, *languages, **kwargs):
35
+ """
36
+
37
+ @param language: type of language
38
+ @return: mapped value of the language or raise an exception if the language is not supported
39
+ """
40
+ for language in languages:
41
+ if language in GOOGLE_LANGUAGES_TO_CODES.values() or language == 'auto':
42
+ yield language
43
+ elif language in GOOGLE_LANGUAGES_TO_CODES.keys():
44
+ yield GOOGLE_LANGUAGES_TO_CODES[language]
45
+ else:
46
+ raise LanguageNotSupportedException(language)
47
+
48
+ def is_language_supported(self, *languages, **kwargs):
49
+ for lang in languages:
50
+ if lang != 'auto' and lang not in GOOGLE_LANGUAGES_TO_CODES.keys():
51
+ if lang != 'auto' and lang not in GOOGLE_LANGUAGES_TO_CODES.values():
52
+ raise LanguageNotSupportedException(lang)
53
+ return True
54
+
55
+ def translate(self, payload, **kwargs):
56
+ """
57
+ main function that uses google translate to translate a text
58
+ @param payload: desired text to translate
59
+ @return: str: translated text
60
+ """
61
+
62
+ if not self.validate_payload(payload):
63
+ raise NotValidPayload(payload)
64
+
65
+ if not self._check_length(payload):
66
+ raise NotValidLength(payload)
67
+
68
+ try:
69
+ payload = payload.strip()
70
+
71
+ if self.payload_key:
72
+ self._url_params[self.payload_key] = payload
73
+
74
+ res = requests.get(self.__base_url, params=self._url_params)
75
+ soup = BeautifulSoup(res.text, 'html.parser')
76
+ element = soup.find(self._element_tag, self._element_query)
77
+ if not element:
78
+ raise ElementNotFoundInGetRequest(element)
79
+
80
+ return element.get_text(strip=True)
81
+
82
+ except Exception as e:
83
+ raise Exception(str(e.args))
84
+
85
+
86
+ if __name__ == '__main__':
87
+ res = GoogleTranslator(source="auto", target="de").translate(payload='this is a good day')
88
+ print(res)
deep_translator/models.py DELETED
@@ -1,17 +0,0 @@
1
- from abc import ABC, abstractmethod
2
-
3
-
4
- class BaseTranslator(ABC):
5
- def __init__(self):
6
- super(BaseTranslator, self).__init__()
7
-
8
- @abstractmethod
9
- def _validate_payload(self, payload):
10
- pass
11
-
12
- @abstractmethod
13
- def translate(self, payload, payload_tag):
14
- pass
15
-
16
-
17
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
deep_translator/parent.py CHANGED
@@ -1,14 +1,10 @@
1
  """Main module."""
2
 
 
 
3
 
4
- from bs4 import BeautifulSoup
5
- import requests
6
- from .models import BaseTranslator
7
- from .constants import LANGUAGES_TO_CODES
8
- from .exceptions import LanguageNotSupportedException, NotValidPayload, ElementNotFoundInGetRequest, NotValidLength
9
 
10
-
11
- class ParentTranslator(BaseTranslator):
12
  """
13
  class that serve as a parent translator class for other different translators
14
  """
@@ -16,6 +12,7 @@ class ParentTranslator(BaseTranslator):
16
  base_url=None,
17
  source="auto",
18
  target="en",
 
19
  element_tag=None,
20
  element_query=None,
21
  **url_params):
@@ -29,9 +26,10 @@ class ParentTranslator(BaseTranslator):
29
  self._url_params = url_params
30
  self._element_tag = element_tag
31
  self._element_query = element_query
32
- super(ParentTranslator, self).__init__()
 
33
 
34
- def _validate_payload(self, payload):
35
  """
36
  validate the payload text to translate
37
  @param payload: text to translate
@@ -45,49 +43,9 @@ class ParentTranslator(BaseTranslator):
45
  def _check_length(self, payload, min_chars=0, max_chars=5000):
46
  return True if min_chars < len(payload) < max_chars else False
47
 
48
- def _validate_languages(self, languages):
49
- """
50
-
51
- @param languages: languages to validate
52
- @return: True or raise an exception
53
- """
54
- for lang in languages:
55
- if lang != 'auto' and lang not in LANGUAGES_TO_CODES.keys():
56
- if lang != 'auto' and lang not in LANGUAGES_TO_CODES.values():
57
- raise LanguageNotSupportedException(lang)
58
- return True
59
-
60
- def translate(self, payload, payload_tag, **kwargs):
61
- """
62
- main function that uses google translate to translate a text
63
- @param payload: desired text to translate
64
- @param payload_tag: tag of the payload in the url parameters
65
- @return: str: translated text
66
- """
67
-
68
- if not self._validate_payload(payload):
69
- raise NotValidPayload(payload)
70
-
71
- if not self._check_length(payload):
72
- raise NotValidLength(payload)
73
-
74
- try:
75
- payload = payload.strip()
76
-
77
- if payload_tag in self._url_params.keys():
78
- self._url_params[payload_tag] = payload
79
-
80
- res = requests.get(self.__base_url, params=self._url_params)
81
- soup = BeautifulSoup(res.text, 'html.parser')
82
- element = soup.find(self._element_tag, self._element_query)
83
- if not element:
84
- raise ElementNotFoundInGetRequest(element)
85
-
86
- return element.get_text(strip=True)
87
-
88
- except Exception as e:
89
- print(e.args)
90
- raise
91
 
92
  def translate_file(self, path, **kwargs):
93
  try:
 
1
  """Main module."""
2
 
3
+ from exceptions import NotValidPayload
4
+ from abc import ABC, abstractmethod
5
 
 
 
 
 
 
6
 
7
+ class BaseTranslator(ABC):
 
8
  """
9
  class that serve as a parent translator class for other different translators
10
  """
 
12
  base_url=None,
13
  source="auto",
14
  target="en",
15
+ payload_key=None,
16
  element_tag=None,
17
  element_query=None,
18
  **url_params):
 
26
  self._url_params = url_params
27
  self._element_tag = element_tag
28
  self._element_query = element_query
29
+ self.payload_key = payload_key
30
+ super(BaseTranslator, self).__init__()
31
 
32
+ def validate_payload(self, payload):
33
  """
34
  validate the payload text to translate
35
  @param payload: text to translate
 
43
  def _check_length(self, payload, min_chars=0, max_chars=5000):
44
  return True if min_chars < len(payload) < max_chars else False
45
 
46
+ @abstractmethod
47
+ def translate(self, payload, **kwargs):
48
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  def translate_file(self, path, **kwargs):
51
  try:
deep_translator/pons.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC
2
+
3
+ from bs4 import BeautifulSoup
4
+ import requests
5
+ from constants import BASE_URLS, PONS_LANGUAGES_TO_CODES, PONS_CODES_TO_LANGUAGES
6
+ from exceptions import LanguageNotSupportedException, ElementNotFoundInGetRequest
7
+ from parent import BaseTranslator
8
+ from requests.utils import quote
9
+
10
+
11
+ class PonsTranslator(BaseTranslator):
12
+ """
13
+ class that uses PONS translator to translate words
14
+ """
15
+ def __init__(self, source="french", target="english"):
16
+ """
17
+ @param source: source language to translate from
18
+ @param target: target language to translate to
19
+ """
20
+ self.__base_url = BASE_URLS.get("PONS")
21
+
22
+ if self.is_language_supported(source, target, translator='pons'):
23
+ self._source, self._target = self._map_language_to_code(source, target)
24
+
25
+ super().__init__(base_url=self.__base_url,
26
+ source=self._source,
27
+ target=self._target,
28
+ payload_key=None,
29
+ element_tag='div',
30
+ element_query={"class": "target"}
31
+ )
32
+
33
+ def _map_language_to_code(self, *languages, **kwargs):
34
+ """
35
+
36
+ @param language: type of language
37
+ @return: mapped value of the language or raise an exception if the language is not supported
38
+ """
39
+ for language in languages:
40
+ if language in PONS_LANGUAGES_TO_CODES.values() or language == 'auto':
41
+ yield PONS_CODES_TO_LANGUAGES[language]
42
+ elif language in PONS_LANGUAGES_TO_CODES.keys():
43
+ yield language
44
+ else:
45
+ raise LanguageNotSupportedException(language)
46
+
47
+ def is_language_supported(self, *languages, **kwargs):
48
+ for lang in languages:
49
+ if lang not in PONS_LANGUAGES_TO_CODES.keys():
50
+ if lang not in PONS_LANGUAGES_TO_CODES.values():
51
+ raise LanguageNotSupportedException(lang)
52
+ return True
53
+
54
+ def translate(self, payload, **kwargs):
55
+
56
+ url = "{}{}-{}/{}".format(self.__base_url, self._source, self._target, quote(payload))
57
+ response = requests.get(url)
58
+ soup = BeautifulSoup(response.text, 'html.parser')
59
+ elements = soup.findAll(self._element_tag, self._element_query)
60
+ if not elements:
61
+ raise ElementNotFoundInGetRequest(elements)
62
+
63
+ eof = []
64
+ for el in elements:
65
+ temp = ''
66
+ for e in el.findAll('a'):
67
+ if e.parent.name == 'div':
68
+ if e and "/translate/{}-{}/".format(self._target, self._source) in e.get('href'):
69
+ temp += e.get_text() + ' '
70
+ eof.append(temp)
71
+
72
+ return [word for word in eof if word and len(word) > 1]
73
+
74
+
75
+ if __name__ == '__main__':
76
+ # res = GoogleTranslator(source='auto', target='french').translate(payload="A paragraph is a series of related sentences developing a central idea, called the topic. Try to think about paragraphs in terms of thematic unity: a paragraph is a sentence or a group of sentences that supports one central, unified idea. Paragraphs add one idea at a time to your broader argument.")
77
+ # res = GoogleTranslator(source='auto', target='french').translate_text(path='../examples/test.txt')
78
+ # res = GoogleTranslator(source='auto', target='french').translate_sentences([
79
+ # "this is good",
80
+ # "das Wetter ist schön",
81
+ # "un verme verde in un bicchiere verde"
82
+ # ])
83
+ res = PonsTranslator(source="english", target="arabic").translate(payload='good')
84
+ print(res)