Spaces:
Running
Running
File size: 4,615 Bytes
2a08e80 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
"""
Advanced URL & Text Processing Suite
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A sophisticated, enterprise-grade toolkit for URL processing, file manipulation, and QR operations.
Designed with performance, scalability, and reliability in mind.
Key Features:
- Intelligent URL Processing with adaptive rate limiting
- Universal File Processing with smart content extraction
- Advanced QR Code operations with custom styling
- Modern, responsive UI with real-time feedback
- Enterprise-grade security and performance
Example:
>>> from urld import URLProcessor, FileProcessor, QRProcessor
>>> url_proc = URLProcessor(request_delay=1.0)
>>> result = url_proc.process_urls(['https://example.com'])
>>> print(result)
"""
import logging
import sys
from typing import Dict, List, Optional, Union, Any
from pathlib import Path
__version__ = "1.0.0"
__author__ = "Advanced URL Processing Team"
__license__ = "MIT"
__copyright__ = "Copyright 2024 Advanced URL Processing Team"
# Configure advanced logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s.%(msecs)03d [%(levelname)s] %(name)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler('urld.log')
]
)
logger = logging.getLogger(__name__)
# Import core components
try:
from .url_processor import URLProcessor
from .file_processor import FileProcessor
from .qr_processor import QRProcessor
from .interface import create_interface
except ImportError as e:
logger.error(f"Failed to import core component: {e}")
raise
# Type definitions for enhanced type checking
ProcessingResult = Dict[str, Any]
URLList = List[str]
ProcessingMode = Literal['basic', 'interactive', 'deep']
class ProcessingError(Exception):
"""Custom exception for processing errors"""
def __init__(self, message: str, details: Optional[Dict] = None):
super().__init__(message)
self.details = details or {}
# Version compatibility check
if sys.version_info < (3, 8):
logger.warning("Python 3.8+ is recommended for optimal performance")
# Feature detection
def check_features() -> Dict[str, bool]:
"""Check availability of optional features"""
features = {
'selenium': False,
'gpu_acceleration': False,
'ocr_support': False,
'advanced_qr': False
}
try:
import selenium
features['selenium'] = True
except ImportError:
pass
try:
import torch
features['gpu_acceleration'] = torch.cuda.is_available()
except ImportError:
pass
try:
import pytesseract
features['ocr_support'] = True
except ImportError:
pass
try:
import qrcode
features['advanced_qr'] = True
except ImportError:
pass
return features
# Initialize feature detection
AVAILABLE_FEATURES = check_features()
# Export public interface
__all__ = [
'URLProcessor',
'FileProcessor',
'QRProcessor',
'create_interface',
'ProcessingError',
'ProcessingResult',
'URLList',
'ProcessingMode',
'AVAILABLE_FEATURES',
'__version__',
]
# Startup information
logger.info(f"Advanced URL Processing Suite v{__version__}")
logger.info(f"Available features: {', '.join(k for k, v in AVAILABLE_FEATURES.items() if v)}")
def get_config_path() -> Path:
"""Get the configuration file path"""
return Path.home() / '.urld' / 'config.json'
def initialize():
"""Initialize the module with advanced setup"""
config_path = get_config_path()
if not config_path.parent.exists():
config_path.parent.mkdir(parents=True)
logger.info(f"Created configuration directory: {config_path.parent}")
if not config_path.exists():
import json
default_config = {
'url_processor': {
'request_delay': 1.0,
'timeout': 30,
'max_retries': 3,
'respect_robots': True
},
'file_processor': {
'max_file_size': 2 * 1024 * 1024 * 1024,
'supported_formats': ['pdf', 'docx', 'xlsx', 'zip', 'tar.gz']
},
'qr_processor': {
'error_correction': 'H',
'box_size': 10,
'border': 4
}
}
with open(config_path, 'w') as f:
json.dump(default_config, f, indent=4)
logger.info(f"Created default configuration file: {config_path}")
# Run initialization
initialize() |