Spaces:

next-social
/

9fo912

Runtime error

9fo912 / trans_google.py

pengdaqian

fix more

49226f5 over 2 years ago

10.9 kB

	# coding:utf-8
	# author LuShan
	# version : 1.1.9
	import json, requests, random, re
	from urllib.parse import quote
	import urllib3
	import logging
	from trans_constant import LANGUAGES, DEFAULT_SERVICE_URLS

	log = logging.getLogger(__name__)
	log.addHandler(logging.NullHandler())

	urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

	URLS_SUFFIX = [re.search('translate.google.(.*)', url.strip()).group(1) for url in DEFAULT_SERVICE_URLS]
	URL_SUFFIX_DEFAULT = 'com'


	class google_new_transError(Exception):
	"""Exception that uses context to present a meaningful error message"""

	def __init__(self, msg=None, **kwargs):
	self.tts = kwargs.pop('tts', None)
	self.rsp = kwargs.pop('response', None)
	if msg:
	self.msg = msg
	elif self.tts is not None:
	self.msg = self.infer_msg(self.tts, self.rsp)
	else:
	self.msg = None
	super(google_new_transError, self).__init__(self.msg)

	def infer_msg(self, tts, rsp=None):
	cause = "Unknown"

	if rsp is None:
	premise = "Failed to connect"

	return "{}. Probable cause: {}".format(premise, "timeout")
	# if tts.tld != 'com':
	# host = _translate_url(tld=tts.tld)
	# cause = "Host '{}' is not reachable".format(host)

	else:
	status = rsp.status_code
	reason = rsp.reason

	premise = "{:d} ({}) from TTS API".format(status, reason)

	if status == 403:
	cause = "Bad token or upstream API changes"
	elif status == 200 and not tts.lang_check:
	cause = "No audio stream in response. Unsupported language '%s'" % self.tts.lang
	elif status >= 500:
	cause = "Uptream API error. Try again later."

	return "{}. Probable cause: {}".format(premise, cause)


	class google_translator:
	'''
	You can use 108 language in target and source,details view LANGUAGES.
	Target language: like 'en'、'zh'、'th'...

	:param url_suffix: The source text(s) to be translated. Batch translation is supported via sequence input.
	The value should be one of the url_suffix listed in : `DEFAULT_SERVICE_URLS`
	:type url_suffix: UTF-8 :class:`str`; :class:`unicode`; string sequence (list, tuple, iterator, generator)

	:param text: The source text(s) to be translated.
	:type text: UTF-8 :class:`str`; :class:`unicode`;

	:param lang_tgt: The language to translate the source text into.
	The value should be one of the language codes listed in : `LANGUAGES`
	:type lang_tgt: :class:`str`; :class:`unicode`

	:param lang_src: The language of the source text.
	The value should be one of the language codes listed in :const:`googletrans.LANGUAGES`
	If a language is not specified,
	the system will attempt to identify the source language automatically.
	:type lang_src: :class:`str`; :class:`unicode`

	:param timeout: Timeout Will be used for every request.
	:type timeout: number or a double of numbers

	:param proxies: proxies Will be used for every request.
	:type proxies: class : dict; like: {'http': 'http:171.112.169.47:19934/', 'https': 'https:171.112.169.47:19934/'}

	'''

	def __init__(self, url_suffix="com", timeout=5, proxies=None):
	self.proxies = proxies
	if url_suffix not in URLS_SUFFIX:
	self.url_suffix = URL_SUFFIX_DEFAULT
	else:
	self.url_suffix = url_suffix
	url_base = "https://translate.google.{}".format(self.url_suffix)
	self.url = url_base + "/_/TranslateWebserverUi/data/batchexecute"
	self.timeout = timeout

	def _package_rpc(self, text, lang_src='auto', lang_tgt='auto'):
	GOOGLE_TTS_RPC = ["MkEWBc"]
	parameter = [[text.strip(), lang_src, lang_tgt, True], [1]]
	escaped_parameter = json.dumps(parameter, separators=(',', ':'))
	rpc = [[[random.choice(GOOGLE_TTS_RPC), escaped_parameter, None, "generic"]]]
	espaced_rpc = json.dumps(rpc, separators=(',', ':'))
	# text_urldecode = quote(text.strip())
	freq_initial = "f.req={}&".format(quote(espaced_rpc))
	freq = freq_initial
	return freq

	def translate(self, text, lang_tgt='auto', lang_src='auto', pronounce=False):
	try:
	lang = LANGUAGES[lang_src]
	except:
	lang_src = 'auto'
	try:
	lang = LANGUAGES[lang_tgt]
	except:
	lang_src = 'auto'
	text = str(text)
	if len(text) >= 5000:
	return "Warning: Can only detect less than 5000 characters"
	if len(text) == 0:
	return ""
	headers = {
	"Referer": "http://translate.google.{}/".format(self.url_suffix),
	"User-Agent":
	"Mozilla/5.0 (Windows NT 10.0; WOW64) "
	"AppleWebKit/537.36 (KHTML, like Gecko) "
	"Chrome/47.0.2526.106 Safari/537.36",
	"Content-Type": "application/x-www-form-urlencoded;charset=utf-8"
	}
	freq = self._package_rpc(text, lang_src, lang_tgt)
	response = requests.Request(method='POST',
	url=self.url,
	data=freq,
	headers=headers,
	)
	try:
	if self.proxies == None or type(self.proxies) != dict:
	self.proxies = {}
	with requests.Session() as s:
	s.proxies = self.proxies
	r = s.send(request=response.prepare(),
	verify=False,
	timeout=self.timeout)
	for line in r.iter_lines(chunk_size=1024):
	decoded_line = line.decode('utf-8')
	if "MkEWBc" in decoded_line:
	try:
	response = decoded_line
	response = json.loads(response)
	response = list(response)
	response = json.loads(response[0][2])
	response_ = list(response)
	response = response_[1][0]
	if len(response) == 1:
	if len(response[0]) > 5:
	sentences = response[0][5]
	else: ## only url
	sentences = response[0][0]
	if pronounce == False:
	return sentences
	elif pronounce == True:
	return [sentences,None,None]
	translate_text = ""
	for sentence in sentences:
	sentence = sentence[0]
	translate_text += sentence.strip() + ' '
	translate_text = translate_text
	if pronounce == False:
	return translate_text
	elif pronounce == True:
	pronounce_src = (response_[0][0])
	pronounce_tgt = (response_[1][0][0][1])
	return [translate_text, pronounce_src, pronounce_tgt]
	elif len(response) == 2:
	sentences = []
	for i in response:
	sentences.append(i[0])
	if pronounce == False:
	return sentences
	elif pronounce == True:
	pronounce_src = (response_[0][0])
	pronounce_tgt = (response_[1][0][0][1])
	return [sentences, pronounce_src, pronounce_tgt]
	except Exception as e:
	raise e
	r.raise_for_status()
	except requests.exceptions.ConnectTimeout as e:
	raise e
	except requests.exceptions.HTTPError as e:
	# Request successful, bad response
	raise google_new_transError(tts=self, response=r)
	except requests.exceptions.RequestException as e:
	# Request failed
	raise google_new_transError(tts=self)

	def detect(self, text):
	text = str(text)
	if len(text) >= 5000:
	return log.debug("Warning: Can only detect less than 5000 characters")
	if len(text) == 0:
	return ""
	headers = {
	"Referer": "http://translate.google.{}/".format(self.url_suffix),
	"User-Agent":
	"Mozilla/5.0 (Windows NT 10.0; WOW64) "
	"AppleWebKit/537.36 (KHTML, like Gecko) "
	"Chrome/47.0.2526.106 Safari/537.36",
	"Content-Type": "application/x-www-form-urlencoded;charset=utf-8"
	}
	freq = self._package_rpc(text)
	response = requests.Request(method='POST',
	url=self.url,
	data=freq,
	headers=headers)
	try:
	if self.proxies == None or type(self.proxies) != dict:
	self.proxies = {}
	with requests.Session() as s:
	s.proxies = self.proxies
	r = s.send(request=response.prepare(),
	verify=False,
	timeout=self.timeout)

	for line in r.iter_lines(chunk_size=1024):
	decoded_line = line.decode('utf-8')
	if "MkEWBc" in decoded_line:
	# regex_str = r"\[\[\"wrb.fr\",\"MkEWBc\",\"\[\[(.).?,\[\[\["
	try:
	# data_got = re.search(regex_str,decoded_line).group(1)
	response = (decoded_line + ']')
	response = json.loads(response)
	response = list(response)
	response = json.loads(response[0][2])
	response = list(response)
	detect_lang = response[0][2]
	except Exception:
	raise Exception
	# data_got = data_got.split('\\\"]')[0]
	return [detect_lang, LANGUAGES[detect_lang.lower()]]
	r.raise_for_status()
	except requests.exceptions.HTTPError as e:
	# Request successful, bad response
	log.debug(str(e))
	raise google_new_transError(tts=self, response=r)
	except requests.exceptions.RequestException as e:
	# Request failed
	log.debug(str(e))
	raise google_new_transError(tts=self)