Spaces:
Runtime error
Runtime error
| # coding:utf-8 | |
| # author LuShan | |
| # version : 1.1.9 | |
| import json, requests, random, re | |
| from urllib.parse import quote | |
| import urllib3 | |
| import logging | |
| from trans_constant import LANGUAGES, DEFAULT_SERVICE_URLS | |
| log = logging.getLogger(__name__) | |
| log.addHandler(logging.NullHandler()) | |
| urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | |
| URLS_SUFFIX = [re.search('translate.google.(.*)', url.strip()).group(1) for url in DEFAULT_SERVICE_URLS] | |
| URL_SUFFIX_DEFAULT = 'com' | |
| class google_new_transError(Exception): | |
| """Exception that uses context to present a meaningful error message""" | |
| def __init__(self, msg=None, **kwargs): | |
| self.tts = kwargs.pop('tts', None) | |
| self.rsp = kwargs.pop('response', None) | |
| if msg: | |
| self.msg = msg | |
| elif self.tts is not None: | |
| self.msg = self.infer_msg(self.tts, self.rsp) | |
| else: | |
| self.msg = None | |
| super(google_new_transError, self).__init__(self.msg) | |
| def infer_msg(self, tts, rsp=None): | |
| cause = "Unknown" | |
| if rsp is None: | |
| premise = "Failed to connect" | |
| return "{}. Probable cause: {}".format(premise, "timeout") | |
| # if tts.tld != 'com': | |
| # host = _translate_url(tld=tts.tld) | |
| # cause = "Host '{}' is not reachable".format(host) | |
| else: | |
| status = rsp.status_code | |
| reason = rsp.reason | |
| premise = "{:d} ({}) from TTS API".format(status, reason) | |
| if status == 403: | |
| cause = "Bad token or upstream API changes" | |
| elif status == 200 and not tts.lang_check: | |
| cause = "No audio stream in response. Unsupported language '%s'" % self.tts.lang | |
| elif status >= 500: | |
| cause = "Uptream API error. Try again later." | |
| return "{}. Probable cause: {}".format(premise, cause) | |
| class google_translator: | |
| ''' | |
| You can use 108 language in target and source,details view LANGUAGES. | |
| Target language: like 'en'、'zh'、'th'... | |
| :param url_suffix: The source text(s) to be translated. Batch translation is supported via sequence input. | |
| The value should be one of the url_suffix listed in : `DEFAULT_SERVICE_URLS` | |
| :type url_suffix: UTF-8 :class:`str`; :class:`unicode`; string sequence (list, tuple, iterator, generator) | |
| :param text: The source text(s) to be translated. | |
| :type text: UTF-8 :class:`str`; :class:`unicode`; | |
| :param lang_tgt: The language to translate the source text into. | |
| The value should be one of the language codes listed in : `LANGUAGES` | |
| :type lang_tgt: :class:`str`; :class:`unicode` | |
| :param lang_src: The language of the source text. | |
| The value should be one of the language codes listed in :const:`googletrans.LANGUAGES` | |
| If a language is not specified, | |
| the system will attempt to identify the source language automatically. | |
| :type lang_src: :class:`str`; :class:`unicode` | |
| :param timeout: Timeout Will be used for every request. | |
| :type timeout: number or a double of numbers | |
| :param proxies: proxies Will be used for every request. | |
| :type proxies: class : dict; like: {'http': 'http:171.112.169.47:19934/', 'https': 'https:171.112.169.47:19934/'} | |
| ''' | |
| def __init__(self, url_suffix="com", timeout=5, proxies=None): | |
| self.proxies = proxies | |
| if url_suffix not in URLS_SUFFIX: | |
| self.url_suffix = URL_SUFFIX_DEFAULT | |
| else: | |
| self.url_suffix = url_suffix | |
| url_base = "https://translate.google.{}".format(self.url_suffix) | |
| self.url = url_base + "/_/TranslateWebserverUi/data/batchexecute" | |
| self.timeout = timeout | |
| def _package_rpc(self, text, lang_src='auto', lang_tgt='auto'): | |
| GOOGLE_TTS_RPC = ["MkEWBc"] | |
| parameter = [[text.strip(), lang_src, lang_tgt, True], [1]] | |
| escaped_parameter = json.dumps(parameter, separators=(',', ':')) | |
| rpc = [[[random.choice(GOOGLE_TTS_RPC), escaped_parameter, None, "generic"]]] | |
| espaced_rpc = json.dumps(rpc, separators=(',', ':')) | |
| # text_urldecode = quote(text.strip()) | |
| freq_initial = "f.req={}&".format(quote(espaced_rpc)) | |
| freq = freq_initial | |
| return freq | |
| def translate(self, text, lang_tgt='auto', lang_src='auto', pronounce=False): | |
| try: | |
| lang = LANGUAGES[lang_src] | |
| except: | |
| lang_src = 'auto' | |
| try: | |
| lang = LANGUAGES[lang_tgt] | |
| except: | |
| lang_src = 'auto' | |
| text = str(text) | |
| if len(text) >= 5000: | |
| return "Warning: Can only detect less than 5000 characters" | |
| if len(text) == 0: | |
| return "" | |
| headers = { | |
| "Referer": "http://translate.google.{}/".format(self.url_suffix), | |
| "User-Agent": | |
| "Mozilla/5.0 (Windows NT 10.0; WOW64) " | |
| "AppleWebKit/537.36 (KHTML, like Gecko) " | |
| "Chrome/47.0.2526.106 Safari/537.36", | |
| "Content-Type": "application/x-www-form-urlencoded;charset=utf-8" | |
| } | |
| freq = self._package_rpc(text, lang_src, lang_tgt) | |
| response = requests.Request(method='POST', | |
| url=self.url, | |
| data=freq, | |
| headers=headers, | |
| ) | |
| try: | |
| if self.proxies == None or type(self.proxies) != dict: | |
| self.proxies = {} | |
| with requests.Session() as s: | |
| s.proxies = self.proxies | |
| r = s.send(request=response.prepare(), | |
| verify=False, | |
| timeout=self.timeout) | |
| for line in r.iter_lines(chunk_size=1024): | |
| decoded_line = line.decode('utf-8') | |
| if "MkEWBc" in decoded_line: | |
| try: | |
| response = decoded_line | |
| response = json.loads(response) | |
| response = list(response) | |
| response = json.loads(response[0][2]) | |
| response_ = list(response) | |
| response = response_[1][0] | |
| if len(response) == 1: | |
| if len(response[0]) > 5: | |
| sentences = response[0][5] | |
| else: ## only url | |
| sentences = response[0][0] | |
| if pronounce == False: | |
| return sentences | |
| elif pronounce == True: | |
| return [sentences,None,None] | |
| translate_text = "" | |
| for sentence in sentences: | |
| sentence = sentence[0] | |
| translate_text += sentence.strip() + ' ' | |
| translate_text = translate_text | |
| if pronounce == False: | |
| return translate_text | |
| elif pronounce == True: | |
| pronounce_src = (response_[0][0]) | |
| pronounce_tgt = (response_[1][0][0][1]) | |
| return [translate_text, pronounce_src, pronounce_tgt] | |
| elif len(response) == 2: | |
| sentences = [] | |
| for i in response: | |
| sentences.append(i[0]) | |
| if pronounce == False: | |
| return sentences | |
| elif pronounce == True: | |
| pronounce_src = (response_[0][0]) | |
| pronounce_tgt = (response_[1][0][0][1]) | |
| return [sentences, pronounce_src, pronounce_tgt] | |
| except Exception as e: | |
| raise e | |
| r.raise_for_status() | |
| except requests.exceptions.ConnectTimeout as e: | |
| raise e | |
| except requests.exceptions.HTTPError as e: | |
| # Request successful, bad response | |
| raise google_new_transError(tts=self, response=r) | |
| except requests.exceptions.RequestException as e: | |
| # Request failed | |
| raise google_new_transError(tts=self) | |
| def detect(self, text): | |
| text = str(text) | |
| if len(text) >= 5000: | |
| return log.debug("Warning: Can only detect less than 5000 characters") | |
| if len(text) == 0: | |
| return "" | |
| headers = { | |
| "Referer": "http://translate.google.{}/".format(self.url_suffix), | |
| "User-Agent": | |
| "Mozilla/5.0 (Windows NT 10.0; WOW64) " | |
| "AppleWebKit/537.36 (KHTML, like Gecko) " | |
| "Chrome/47.0.2526.106 Safari/537.36", | |
| "Content-Type": "application/x-www-form-urlencoded;charset=utf-8" | |
| } | |
| freq = self._package_rpc(text) | |
| response = requests.Request(method='POST', | |
| url=self.url, | |
| data=freq, | |
| headers=headers) | |
| try: | |
| if self.proxies == None or type(self.proxies) != dict: | |
| self.proxies = {} | |
| with requests.Session() as s: | |
| s.proxies = self.proxies | |
| r = s.send(request=response.prepare(), | |
| verify=False, | |
| timeout=self.timeout) | |
| for line in r.iter_lines(chunk_size=1024): | |
| decoded_line = line.decode('utf-8') | |
| if "MkEWBc" in decoded_line: | |
| # regex_str = r"\[\[\"wrb.fr\",\"MkEWBc\",\"\[\[(.*).*?,\[\[\[" | |
| try: | |
| # data_got = re.search(regex_str,decoded_line).group(1) | |
| response = (decoded_line + ']') | |
| response = json.loads(response) | |
| response = list(response) | |
| response = json.loads(response[0][2]) | |
| response = list(response) | |
| detect_lang = response[0][2] | |
| except Exception: | |
| raise Exception | |
| # data_got = data_got.split('\\\"]')[0] | |
| return [detect_lang, LANGUAGES[detect_lang.lower()]] | |
| r.raise_for_status() | |
| except requests.exceptions.HTTPError as e: | |
| # Request successful, bad response | |
| log.debug(str(e)) | |
| raise google_new_transError(tts=self, response=r) | |
| except requests.exceptions.RequestException as e: | |
| # Request failed | |
| log.debug(str(e)) | |
| raise google_new_transError(tts=self) |