Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,8 @@ import os
|
|
| 4 |
import base64
|
| 5 |
import glob
|
| 6 |
import json
|
| 7 |
-
|
|
|
|
| 8 |
from datetime import datetime
|
| 9 |
from dotenv import load_dotenv
|
| 10 |
from openai import ChatCompletion
|
|
@@ -38,6 +39,19 @@ def get_table_download_link(file_path):
|
|
| 38 |
href = f'<a href="data:file/htm;base64,{b64}" target="_blank" download="{os.path.basename(file_path)}">{os.path.basename(file_path)}</a>'
|
| 39 |
return href
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
def read_file_content(file):
|
| 42 |
if file.type == "application/json":
|
| 43 |
content = json.load(file)
|
|
@@ -45,10 +59,15 @@ def read_file_content(file):
|
|
| 45 |
elif file.type == "text/html":
|
| 46 |
content = BeautifulSoup(file, "html.parser")
|
| 47 |
return content.text
|
| 48 |
-
elif file.type == "application/
|
| 49 |
tree = ElementTree.parse(file)
|
| 50 |
root = tree.getroot()
|
| 51 |
return ElementTree.tostring(root, encoding='unicode')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
elif file.type == "text/plain":
|
| 53 |
return file.getvalue().decode()
|
| 54 |
else:
|
|
|
|
| 4 |
import base64
|
| 5 |
import glob
|
| 6 |
import json
|
| 7 |
+
import re
|
| 8 |
+
from xml.etree import ElementTree as ET
|
| 9 |
from datetime import datetime
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
from openai import ChatCompletion
|
|
|
|
| 39 |
href = f'<a href="data:file/htm;base64,{b64}" target="_blank" download="{os.path.basename(file_path)}">{os.path.basename(file_path)}</a>'
|
| 40 |
return href
|
| 41 |
|
| 42 |
+
def CompressXML_Old(xml_text):
|
| 43 |
+
words = xml_text.split()
|
| 44 |
+
english_words = [word for word in words if re.fullmatch(r'[A-Za-z ]*', word)]
|
| 45 |
+
compressed_text = ' '.join(english_words)
|
| 46 |
+
return compressed_text
|
| 47 |
+
|
| 48 |
+
def CompressXML(xml_text):
|
| 49 |
+
tree = ET.ElementTree(ET.fromstring(xml_text))
|
| 50 |
+
for elem in tree.iter():
|
| 51 |
+
if isinstance(elem.tag, ET.Comment):
|
| 52 |
+
elem.getparent().remove(elem)
|
| 53 |
+
return ET.tostring(tree.getroot(), encoding='unicode')
|
| 54 |
+
|
| 55 |
def read_file_content(file):
|
| 56 |
if file.type == "application/json":
|
| 57 |
content = json.load(file)
|
|
|
|
| 59 |
elif file.type == "text/html":
|
| 60 |
content = BeautifulSoup(file, "html.parser")
|
| 61 |
return content.text
|
| 62 |
+
elif file.type == "application/xmlold" or file.type == "text/xmlold":
|
| 63 |
tree = ElementTree.parse(file)
|
| 64 |
root = tree.getroot()
|
| 65 |
return ElementTree.tostring(root, encoding='unicode')
|
| 66 |
+
elif file.type == "application/xml" or file.type == "text/xml":
|
| 67 |
+
tree = ElementTree.parse(file)
|
| 68 |
+
root = tree.getroot()
|
| 69 |
+
xml_text = ElementTree.tostring(root, encoding='unicode')
|
| 70 |
+
return CompressXML(xml_text)
|
| 71 |
elif file.type == "text/plain":
|
| 72 |
return file.getvalue().decode()
|
| 73 |
else:
|