Yaron Koresh commited on
Commit
43afd3e
·
verified ·
1 Parent(s): c9c4c93

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -15
app.py CHANGED
@@ -3,7 +3,6 @@ from bs4 import BeautifulSoup
3
  from abc import ABC, abstractmethod
4
  from pathlib import Path
5
  from typing import List, Optional, Union
6
- from langdetect import detect as get_language
7
  from collections import namedtuple
8
  from inspect import signature
9
  import os
@@ -659,6 +658,7 @@ def all_pipes(pos,neg,artist,song):
659
 
660
  return imgs
661
 
 
662
  language_codes = {
663
  "afrikaans": "af",
664
  "albanian": "sq",
@@ -963,28 +963,121 @@ class BaseTranslator(ABC):
963
  translated = self.translate(text, **kwargs)
964
  arr.append(translated)
965
  return arr
966
-
967
- def translate(txt,to_lang="en",from_lang=False):
968
- log(f'CALL translate')
969
- if not from_lang:
970
- from_lang = get_language(txt)
971
- if(from_lang == to_lang):
972
- log(f'RET translate with txt as {txt}')
973
- return txt
974
 
 
 
 
 
975
 
976
- translator_endpoint = "https://api.cognitive.microsofttranslator.com/translate?api-version=3.0"
977
-
978
- translator = Translator(from_lang=from_lang,to_lang=to_lang)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
979
  translation = ""
980
- if len(txt) > 490:
981
  words = txt.split()
982
  while len(words) > 0:
983
  chunk = ""
984
- while len(words) > 0 and len(chunk) < 490:
985
  chunk = chunk + " " + words[0]
986
  words = words[1:]
987
- if len(chunk) > 490:
988
  _words = chunk.split()
989
  words = [_words[-1], *words]
990
  chunk = " ".join(_words[:-1])
 
3
  from abc import ABC, abstractmethod
4
  from pathlib import Path
5
  from typing import List, Optional, Union
 
6
  from collections import namedtuple
7
  from inspect import signature
8
  import os
 
658
 
659
  return imgs
660
 
661
+ google_translate_endpoint = "https://translate.google.com/m"
662
  language_codes = {
663
  "afrikaans": "af",
664
  "albanian": "sq",
 
963
  translated = self.translate(text, **kwargs)
964
  arr.append(translated)
965
  return arr
 
 
 
 
 
 
 
 
966
 
967
+ class GoogleTranslator(BaseTranslator):
968
+ """
969
+ class that wraps functions, which use Google Translate under the hood to translate text(s)
970
+ """
971
 
972
+ def __init__(
973
+ self,
974
+ source: str = "auto",
975
+ target: str = "en",
976
+ proxies: Optional[dict] = None,
977
+ **kwargs
978
+ ):
979
+ """
980
+ @param source: source language to translate from
981
+ @param target: target language to translate to
982
+ """
983
+ self.proxies = proxies
984
+ super().__init__(
985
+ base_url=google_translate_endpoint,
986
+ source=source,
987
+ target=target,
988
+ element_tag="div",
989
+ element_query={"class": "t0"},
990
+ payload_key="q", # key of text in the url
991
+ **kwargs
992
+ )
993
+
994
+ self._alt_element_query = {"class": "result-container"}
995
+
996
+ def translate(self, text: str, **kwargs) -> str:
997
+ """
998
+ function to translate a text
999
+ @param text: desired text to translate
1000
+ @return: str: translated text
1001
+ """
1002
+ if is_input_valid(text, max_chars=1000):
1003
+ text = text.strip()
1004
+ if self._same_source_target() or is_empty(text):
1005
+ return text
1006
+ self._url_params["tl"] = self._target
1007
+ self._url_params["sl"] = self._source
1008
+
1009
+ if self.payload_key:
1010
+ self._url_params[self.payload_key] = text
1011
+
1012
+ response = requests.get(
1013
+ self._base_url, params=self._url_params, proxies=self.proxies
1014
+ )
1015
+ if response.status_code == 429:
1016
+ raise TooManyRequests()
1017
+
1018
+ if request_failed(status_code=response.status_code):
1019
+ raise RequestError()
1020
+
1021
+ soup = BeautifulSoup(response.text, "html.parser")
1022
+
1023
+ element = soup.find(self._element_tag, self._element_query)
1024
+ response.close()
1025
+
1026
+ if not element:
1027
+ element = soup.find(self._element_tag, self._alt_element_query)
1028
+ if not element:
1029
+ raise TranslationNotFound(text)
1030
+ if element.get_text(strip=True) == text.strip():
1031
+ to_translate_alpha = "".join(
1032
+ ch for ch in text.strip() if ch.isalnum()
1033
+ )
1034
+ translated_alpha = "".join(
1035
+ ch for ch in element.get_text(strip=True) if ch.isalnum()
1036
+ )
1037
+ if (
1038
+ to_translate_alpha
1039
+ and translated_alpha
1040
+ and to_translate_alpha == translated_alpha
1041
+ ):
1042
+ self._url_params["tl"] = self._target
1043
+ if "hl" not in self._url_params:
1044
+ return text.strip()
1045
+ del self._url_params["hl"]
1046
+ return self.translate(text)
1047
+
1048
+ else:
1049
+ return element.get_text(strip=True)
1050
+
1051
+ def translate_file(self, path: str, **kwargs) -> str:
1052
+ """
1053
+ translate directly from file
1054
+ @param path: path to the target file
1055
+ @type path: str
1056
+ @param kwargs: additional args
1057
+ @return: str
1058
+ """
1059
+ return self._translate_file(path, **kwargs)
1060
+
1061
+ def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
1062
+ """
1063
+ translate a list of texts
1064
+ @param batch: list of texts you want to translate
1065
+ @return: list of translations
1066
+ """
1067
+ return self._translate_batch(batch, **kwargs)
1068
+
1069
+ def translate(txt,to_lang="en",from_lang="auto"):
1070
+ log(f'CALL translate')
1071
+ translator = GoogleTranslator(from_lang=from_lang,to_lang=to_lang)
1072
  translation = ""
1073
+ if len(txt) > 1000:
1074
  words = txt.split()
1075
  while len(words) > 0:
1076
  chunk = ""
1077
+ while len(words) > 0 and len(chunk) < 1000:
1078
  chunk = chunk + " " + words[0]
1079
  words = words[1:]
1080
+ if len(chunk) > 1000:
1081
  _words = chunk.split()
1082
  words = [_words[-1], *words]
1083
  chunk = " ".join(_words[:-1])