Spaces:
Running
Running
Update app2.py
Browse files
app2.py
CHANGED
@@ -17,7 +17,10 @@ from bs4 import BeautifulSoup
|
|
17 |
from fake_useragent import UserAgent
|
18 |
from cleantext import clean
|
19 |
import qrcode
|
20 |
-
|
|
|
|
|
|
|
21 |
|
22 |
# Setup logging with detailed configuration
|
23 |
logging.basicConfig(
|
@@ -42,13 +45,12 @@ class URLProcessor:
|
|
42 |
'Connection': 'keep-alive',
|
43 |
'Upgrade-Insecure-Requests': '1'
|
44 |
})
|
45 |
-
|
46 |
def advanced_text_cleaning(self, text: str) -> str:
|
47 |
"""Robust text cleaning with version compatibility"""
|
48 |
try:
|
49 |
cleaned_text = clean(
|
50 |
text,
|
51 |
-
fix_unicode=True,
|
52 |
to_ascii=True,
|
53 |
lower=True,
|
54 |
no_line_breaks=True,
|
@@ -208,7 +210,7 @@ class FileProcessor:
|
|
208 |
for extracted_file in os.listdir(extraction_directory):
|
209 |
extracted_file_path = os.path.join(extraction_directory, extracted_file)
|
210 |
process_file(extracted_file_path)
|
211 |
-
|
212 |
def _process_single_file(self, file) -> List[Dict]:
|
213 |
try:
|
214 |
file_stat = os.stat(file.name)
|
@@ -247,13 +249,6 @@ def _process_single_file(self, file) -> List[Dict]:
|
|
247 |
|
248 |
def generate_qr_code(json_data):
|
249 |
"""Generate a QR code from JSON data."""
|
250 |
-
# Limit the size of json_data to avoid exceeding QR code version limits
|
251 |
-
max_length = 2953 # Maximum length for version 40 (the highest version)
|
252 |
-
|
253 |
-
if len(json_data) > max_length:
|
254 |
-
logger.warning("JSON data is too large for QR code generation. Truncating data.")
|
255 |
-
json_data = json_data[:max_length] # Truncate the data
|
256 |
-
|
257 |
qr = qrcode.make(json_data)
|
258 |
qr_path = "output/qr_code.png"
|
259 |
qr.save(qr_path)
|
|
|
17 |
from fake_useragent import UserAgent
|
18 |
from cleantext import clean
|
19 |
import qrcode
|
20 |
+
if sys.version_info >= (3, 6):
|
21 |
+
import zipfile
|
22 |
+
else:
|
23 |
+
import zipfile36 as zipfile
|
24 |
|
25 |
# Setup logging with detailed configuration
|
26 |
logging.basicConfig(
|
|
|
45 |
'Connection': 'keep-alive',
|
46 |
'Upgrade-Insecure-Requests': '1'
|
47 |
})
|
48 |
+
|
49 |
def advanced_text_cleaning(self, text: str) -> str:
|
50 |
"""Robust text cleaning with version compatibility"""
|
51 |
try:
|
52 |
cleaned_text = clean(
|
53 |
text,
|
|
|
54 |
to_ascii=True,
|
55 |
lower=True,
|
56 |
no_line_breaks=True,
|
|
|
210 |
for extracted_file in os.listdir(extraction_directory):
|
211 |
extracted_file_path = os.path.join(extraction_directory, extracted_file)
|
212 |
process_file(extracted_file_path)
|
213 |
+
|
214 |
def _process_single_file(self, file) -> List[Dict]:
|
215 |
try:
|
216 |
file_stat = os.stat(file.name)
|
|
|
249 |
|
250 |
def generate_qr_code(json_data):
|
251 |
"""Generate a QR code from JSON data."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
qr = qrcode.make(json_data)
|
253 |
qr_path = "output/qr_code.png"
|
254 |
qr.save(qr_path)
|