awacke1 commited on
Commit
125c22a
·
1 Parent(s): 6bb2035

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -1
app.py CHANGED
@@ -38,6 +38,12 @@ def get_table_download_link(file_path):
38
  href = f'<a href="data:file/htm;base64,{b64}" target="_blank" download="{os.path.basename(file_path)}">{os.path.basename(file_path)}</a>'
39
  return href
40
 
 
 
 
 
 
 
41
  def read_file_content(file):
42
  if file.type == "application/json":
43
  content = json.load(file)
@@ -45,10 +51,15 @@ def read_file_content(file):
45
  elif file.type == "text/html":
46
  content = BeautifulSoup(file, "html.parser")
47
  return content.text
48
- elif file.type == "application/xml" or file.type == "text/xml":
49
  tree = ElementTree.parse(file)
50
  root = tree.getroot()
51
  return ElementTree.tostring(root, encoding='unicode')
 
 
 
 
 
52
  elif file.type == "text/plain":
53
  return file.getvalue().decode()
54
  else:
 
38
  href = f'<a href="data:file/htm;base64,{b64}" target="_blank" download="{os.path.basename(file_path)}">{os.path.basename(file_path)}</a>'
39
  return href
40
 
41
+ def CompressXML(xml_text):
42
+ words = xml_text.split()
43
+ english_words = [word for word in words if re.fullmatch(r'[A-Za-z ]*', word)]
44
+ compressed_text = ' '.join(english_words)
45
+ return compressed_text
46
+
47
  def read_file_content(file):
48
  if file.type == "application/json":
49
  content = json.load(file)
 
51
  elif file.type == "text/html":
52
  content = BeautifulSoup(file, "html.parser")
53
  return content.text
54
+ elif file.type == "application/xmlold" or file.type == "text/xmlold":
55
  tree = ElementTree.parse(file)
56
  root = tree.getroot()
57
  return ElementTree.tostring(root, encoding='unicode')
58
+ elif file.type == "application/xml" or file.type == "text/xml":
59
+ tree = ElementTree.parse(file)
60
+ root = tree.getroot()
61
+ xml_text = ElementTree.tostring(root, encoding='unicode')
62
+ return CompressXML(xml_text)
63
  elif file.type == "text/plain":
64
  return file.getvalue().decode()
65
  else: