""" Validation utilities for security and input validation """ import os import re from typing import Optional from urllib.parse import urlparse class ValidationError(Exception): """Custom exception for validation errors.""" pass class Validators: """Collection of validation functions for security and input validation.""" # Regex patterns for validation - allow numbers, letters, hyphens, underscores, dots HUGGINGFACE_MODEL_PATTERN = re.compile(r'^[a-zA-Z0-9_\-\.]+/[a-zA-Z0-9_\-\.]+$') SAFE_FILENAME_PATTERN = re.compile(r'^[a-zA-Z0-9_\-\.]+$') @staticmethod def validate_model_path(model_path: str) -> bool: """ Validate that a custom model path is safe and follows expected patterns. Args: model_path: The model path to validate Returns: bool: True if valid, False otherwise Raises: ValidationError: If the model path is invalid """ if not model_path or not isinstance(model_path, str): raise ValidationError("Model path cannot be empty") # Trim whitespace model_path = model_path.strip() # Check for dangerous characters (excluding single forward slash for HuggingFace format) dangerous_chars = ['..', '\\', '|', ';', '&', '$', '`', '<', '>'] if any(char in model_path for char in dangerous_chars): raise ValidationError("Model path contains invalid characters") # Check for multiple slashes or leading/trailing slashes if '//' in model_path or model_path.startswith('/') or model_path.endswith('/'): raise ValidationError("Model path contains invalid characters") # Check if it looks like a HuggingFace model path (user/model format) if not Validators.HUGGINGFACE_MODEL_PATTERN.match(model_path): raise ValidationError("Model path must follow the format 'organization/model-name'") # Check length limits if len(model_path) > 200: raise ValidationError("Model path is too long") return True @staticmethod def validate_filename(filename: str) -> bool: """ Validate that a filename is safe for upload. Args: filename: The filename to validate Returns: bool: True if valid, False otherwise Raises: ValidationError: If the filename is invalid """ if not filename or not isinstance(filename, str): raise ValidationError("Filename cannot be empty") # Check for dangerous characters and patterns dangerous_patterns = ['..', '/', '\\', '|', ';', '&', '$', '`', '<', '>'] if any(pattern in filename for pattern in dangerous_patterns): raise ValidationError("Filename contains invalid characters") # Check if filename starts with a dot (hidden files) if filename.startswith('.'): raise ValidationError("Hidden files are not allowed") # Check length if len(filename) > 255: raise ValidationError("Filename is too long") return True @staticmethod def validate_file_extension(filename: str, allowed_extensions: set) -> bool: """ Validate that a file has an allowed extension. Args: filename: The filename to check allowed_extensions: Set of allowed extensions (e.g., {'.txt', '.py'}) Returns: bool: True if valid, False otherwise Raises: ValidationError: If the extension is not allowed """ if not filename: raise ValidationError("Filename cannot be empty") _, ext = os.path.splitext(filename.lower()) if ext not in allowed_extensions: allowed_list = ', '.join(sorted(allowed_extensions)) raise ValidationError(f"File type '{ext}' not allowed. Allowed types: {allowed_list}") return True @staticmethod def validate_file_size(file_size: int, max_size: int) -> bool: """ Validate that a file size is within limits. Args: file_size: Size of the file in bytes max_size: Maximum allowed size in bytes Returns: bool: True if valid, False otherwise Raises: ValidationError: If the file is too large """ if file_size > max_size: max_mb = max_size / (1024 * 1024) current_mb = file_size / (1024 * 1024) raise ValidationError(f"File too large: {current_mb:.1f}MB (max: {max_mb:.1f}MB)") return True @staticmethod def validate_text_input(text: str, max_length: int = 1000000) -> bool: """ Validate text input for processing. Args: text: The text to validate max_length: Maximum allowed length Returns: bool: True if valid, False otherwise Raises: ValidationError: If the text is invalid """ if not isinstance(text, str): raise ValidationError("Text input must be a string") if len(text) > max_length: raise ValidationError(f"Text too long: {len(text)} characters (max: {max_length})") return True @staticmethod def sanitize_model_path(model_path: str) -> str: """ Sanitize a model path by removing potentially dangerous elements. Args: model_path: The model path to sanitize Returns: str: Sanitized model path """ if not model_path: return "" # Remove whitespace sanitized = model_path.strip() # Remove any path traversal attempts sanitized = sanitized.replace('..', '') sanitized = sanitized.replace('/', '') sanitized = sanitized.replace('\\', '') return sanitized @staticmethod def is_safe_path(path: str, base_path: str) -> bool: """ Check if a path is safe and within the expected base directory. Args: path: The path to check base_path: The base directory that the path should be within Returns: bool: True if the path is safe, False otherwise """ try: # Resolve both paths to absolute paths abs_path = os.path.abspath(path) abs_base = os.path.abspath(base_path) # Check if the path is within the base directory return abs_path.startswith(abs_base) except (OSError, ValueError): return False # Global instance validators = Validators()