Spaces:
Running
Running
Update app2.py
Browse files
app2.py
CHANGED
@@ -6,7 +6,7 @@ import mimetypes
|
|
6 |
import time
|
7 |
import io
|
8 |
from selenium import webdriver
|
9 |
-
from
|
10 |
import concurrent.futures
|
11 |
import string
|
12 |
import zipfile
|
@@ -48,7 +48,7 @@ class URLProcessor:
|
|
48 |
self.session = requests.Session()
|
49 |
self.timeout = 10 # seconds
|
50 |
self.session.headers.update({
|
51 |
-
'User-Agent': UserAgent().random,
|
52 |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
53 |
'Accept-Language': 'en-US,en;q=0.5',
|
54 |
'Accept-Encoding': 'gzip, deflate, br',
|
@@ -76,7 +76,7 @@ class URLProcessor:
|
|
76 |
return cleaned_text
|
77 |
except Exception as e:
|
78 |
logger.warning(f"Text cleaning error: {e}. Using fallback method.")
|
79 |
-
text = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text) # Remove control characters
|
80 |
text = text.encode('ascii', 'ignore').decode('ascii') # Remove non-ASCII characters
|
81 |
text = re.sub(r'\s+', ' ', text) # Normalize whitespace
|
82 |
return text.strip()
|
@@ -229,7 +229,7 @@ class FileProcessor:
|
|
229 |
"source": "file",
|
230 |
"filename": filename,
|
231 |
"content": content,
|
232 |
-
"timestamp": datetime
|
233 |
})
|
234 |
except Exception as e:
|
235 |
logger.error(f"Error reading file {filename}: {str(e)}")
|
@@ -346,7 +346,7 @@ def generate_qr_code(data: Union[str, Dict], combined: bool = True) -> List[str]
|
|
346 |
img.save(str(output_path))
|
347 |
return [str(output_path)]
|
348 |
|
349 |
-
|
350 |
except Exception as e:
|
351 |
logger.error(f"QR generation error: {e}")
|
352 |
return []
|
@@ -355,7 +355,7 @@ def decode_qr_code(image_path: str) -> str:
|
|
355 |
"""Decode QR code from an image file"""
|
356 |
try:
|
357 |
img = Image.open(image_path)
|
358 |
-
decoded_objects = decode
|
359 |
if decoded_objects:
|
360 |
return decoded_objects[0].data.decode('utf-8')
|
361 |
raise ValueError("Unable to decode QR code")
|
|
|
6 |
import time
|
7 |
import io
|
8 |
from selenium import webdriver
|
9 |
+
from selenium.webdriver.common.by
|
10 |
import concurrent.futures
|
11 |
import string
|
12 |
import zipfile
|
|
|
48 |
self.session = requests.Session()
|
49 |
self.timeout = 10 # seconds
|
50 |
self.session.headers.update({
|
51 |
+
'User -Agent': UserAgent().random,
|
52 |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
53 |
'Accept-Language': 'en-US,en;q=0.5',
|
54 |
'Accept-Encoding': 'gzip, deflate, br',
|
|
|
76 |
return cleaned_text
|
77 |
except Exception as e:
|
78 |
logger.warning(f"Text cleaning error: {e}. Using fallback method.")
|
79 |
+
text = re.sub(r'[\x00 -\x1F\x7F-\x9F]', '', text) # Remove control characters
|
80 |
text = text.encode('ascii', 'ignore').decode('ascii') # Remove non-ASCII characters
|
81 |
text = re.sub(r'\s+', ' ', text) # Normalize whitespace
|
82 |
return text.strip()
|
|
|
229 |
"source": "file",
|
230 |
"filename": filename,
|
231 |
"content": content,
|
232 |
+
"timestamp": datetime.now ().isoformat()
|
233 |
})
|
234 |
except Exception as e:
|
235 |
logger.error(f"Error reading file {filename}: {str(e)}")
|
|
|
346 |
img.save(str(output_path))
|
347 |
return [str(output_path)]
|
348 |
|
349 |
+
return []
|
350 |
except Exception as e:
|
351 |
logger.error(f"QR generation error: {e}")
|
352 |
return []
|
|
|
355 |
"""Decode QR code from an image file"""
|
356 |
try:
|
357 |
img = Image.open(image_path)
|
358 |
+
decoded_objects = decode(qr_img)
|
359 |
if decoded_objects:
|
360 |
return decoded_objects[0].data.decode('utf-8')
|
361 |
raise ValueError("Unable to decode QR code")
|