Spaces:
Running
Running
Update app2.py
Browse files
app2.py
CHANGED
@@ -48,7 +48,7 @@ class URLProcessor:
|
|
48 |
self.session = requests.Session()
|
49 |
self.timeout = 10 # seconds
|
50 |
self.session.headers.update({
|
51 |
-
'User
|
52 |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
53 |
'Accept-Language': 'en-US,en;q=0.5',
|
54 |
'Accept-Encoding': 'gzip, deflate, br',
|
@@ -76,7 +76,7 @@ class URLProcessor:
|
|
76 |
return cleaned_text
|
77 |
except Exception as e:
|
78 |
logger.warning(f"Text cleaning error: {e}. Using fallback method.")
|
79 |
-
text = re.sub(r'[\x00
|
80 |
text = text.encode('ascii', 'ignore').decode('ascii') # Remove non-ASCII characters
|
81 |
text = re.sub(r'\s+', ' ', text) # Normalize whitespace
|
82 |
return text.strip()
|
@@ -229,7 +229,7 @@ class FileProcessor:
|
|
229 |
"source": "file",
|
230 |
"filename": filename,
|
231 |
"content": content,
|
232 |
-
"timestamp": datetime.now
|
233 |
})
|
234 |
except Exception as e:
|
235 |
logger.error(f"Error reading file {filename}: {str(e)}")
|
@@ -355,8 +355,8 @@ def decode_qr_code(image_path: str) -> str:
|
|
355 |
"""Decode QR code from an image file"""
|
356 |
try:
|
357 |
img = Image.open(image_path)
|
358 |
-
decoded_objects = decode(img)
|
359 |
-
|
360 |
return decoded_objects[0].data.decode('utf-8')
|
361 |
raise ValueError("Unable to decode QR code")
|
362 |
except Exception as e:
|
@@ -451,4 +451,4 @@ def main():
|
|
451 |
)
|
452 |
|
453 |
if __name__ == "__main__":
|
454 |
-
main()
|
|
|
48 |
self.session = requests.Session()
|
49 |
self.timeout = 10 # seconds
|
50 |
self.session.headers.update({
|
51 |
+
'User-Agent': UserAgent().random, # Fixed: removed space in User-Agent
|
52 |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
53 |
'Accept-Language': 'en-US,en;q=0.5',
|
54 |
'Accept-Encoding': 'gzip, deflate, br',
|
|
|
76 |
return cleaned_text
|
77 |
except Exception as e:
|
78 |
logger.warning(f"Text cleaning error: {e}. Using fallback method.")
|
79 |
+
text = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text) # Remove control characters
|
80 |
text = text.encode('ascii', 'ignore').decode('ascii') # Remove non-ASCII characters
|
81 |
text = re.sub(r'\s+', ' ', text) # Normalize whitespace
|
82 |
return text.strip()
|
|
|
229 |
"source": "file",
|
230 |
"filename": filename,
|
231 |
"content": content,
|
232 |
+
"timestamp": datetime .now().isoformat()
|
233 |
})
|
234 |
except Exception as e:
|
235 |
logger.error(f"Error reading file {filename}: {str(e)}")
|
|
|
355 |
"""Decode QR code from an image file"""
|
356 |
try:
|
357 |
img = Image.open(image_path)
|
358 |
+
decoded_objects = decode (img)
|
359 |
+
if decoded_objects:
|
360 |
return decoded_objects[0].data.decode('utf-8')
|
361 |
raise ValueError("Unable to decode QR code")
|
362 |
except Exception as e:
|
|
|
451 |
)
|
452 |
|
453 |
if __name__ == "__main__":
|
454 |
+
main()
|