Spaces:
Running
Running
File size: 7,266 Bytes
d66ab65 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
"""
Validation utilities for security and input validation
"""
import os
import re
from typing import Optional
from urllib.parse import urlparse
class ValidationError(Exception):
"""Custom exception for validation errors."""
pass
class Validators:
"""Collection of validation functions for security and input validation."""
# Regex patterns for validation - allow numbers, letters, hyphens, underscores, dots
HUGGINGFACE_MODEL_PATTERN = re.compile(r'^[a-zA-Z0-9_\-\.]+/[a-zA-Z0-9_\-\.]+$')
SAFE_FILENAME_PATTERN = re.compile(r'^[a-zA-Z0-9_\-\.]+$')
@staticmethod
def validate_model_path(model_path: str) -> bool:
"""
Validate that a custom model path is safe and follows expected patterns.
Args:
model_path: The model path to validate
Returns:
bool: True if valid, False otherwise
Raises:
ValidationError: If the model path is invalid
"""
if not model_path or not isinstance(model_path, str):
raise ValidationError("Model path cannot be empty")
# Trim whitespace
model_path = model_path.strip()
# Check for dangerous characters (excluding single forward slash for HuggingFace format)
dangerous_chars = ['..', '\\', '|', ';', '&', '$', '`', '<', '>']
if any(char in model_path for char in dangerous_chars):
raise ValidationError("Model path contains invalid characters")
# Check for multiple slashes or leading/trailing slashes
if '//' in model_path or model_path.startswith('/') or model_path.endswith('/'):
raise ValidationError("Model path contains invalid characters")
# Check if it looks like a HuggingFace model path (user/model format)
if not Validators.HUGGINGFACE_MODEL_PATTERN.match(model_path):
raise ValidationError("Model path must follow the format 'organization/model-name'")
# Check length limits
if len(model_path) > 200:
raise ValidationError("Model path is too long")
return True
@staticmethod
def validate_filename(filename: str) -> bool:
"""
Validate that a filename is safe for upload.
Args:
filename: The filename to validate
Returns:
bool: True if valid, False otherwise
Raises:
ValidationError: If the filename is invalid
"""
if not filename or not isinstance(filename, str):
raise ValidationError("Filename cannot be empty")
# Check for dangerous characters and patterns
dangerous_patterns = ['..', '/', '\\', '|', ';', '&', '$', '`', '<', '>']
if any(pattern in filename for pattern in dangerous_patterns):
raise ValidationError("Filename contains invalid characters")
# Check if filename starts with a dot (hidden files)
if filename.startswith('.'):
raise ValidationError("Hidden files are not allowed")
# Check length
if len(filename) > 255:
raise ValidationError("Filename is too long")
return True
@staticmethod
def validate_file_extension(filename: str, allowed_extensions: set) -> bool:
"""
Validate that a file has an allowed extension.
Args:
filename: The filename to check
allowed_extensions: Set of allowed extensions (e.g., {'.txt', '.py'})
Returns:
bool: True if valid, False otherwise
Raises:
ValidationError: If the extension is not allowed
"""
if not filename:
raise ValidationError("Filename cannot be empty")
_, ext = os.path.splitext(filename.lower())
if ext not in allowed_extensions:
allowed_list = ', '.join(sorted(allowed_extensions))
raise ValidationError(f"File type '{ext}' not allowed. Allowed types: {allowed_list}")
return True
@staticmethod
def validate_file_size(file_size: int, max_size: int) -> bool:
"""
Validate that a file size is within limits.
Args:
file_size: Size of the file in bytes
max_size: Maximum allowed size in bytes
Returns:
bool: True if valid, False otherwise
Raises:
ValidationError: If the file is too large
"""
if file_size > max_size:
max_mb = max_size / (1024 * 1024)
current_mb = file_size / (1024 * 1024)
raise ValidationError(f"File too large: {current_mb:.1f}MB (max: {max_mb:.1f}MB)")
return True
@staticmethod
def validate_text_input(text: str, max_length: int = 1000000) -> bool:
"""
Validate text input for processing.
Args:
text: The text to validate
max_length: Maximum allowed length
Returns:
bool: True if valid, False otherwise
Raises:
ValidationError: If the text is invalid
"""
if not isinstance(text, str):
raise ValidationError("Text input must be a string")
if len(text) > max_length:
raise ValidationError(f"Text too long: {len(text)} characters (max: {max_length})")
return True
@staticmethod
def sanitize_model_path(model_path: str) -> str:
"""
Sanitize a model path by removing potentially dangerous elements.
Args:
model_path: The model path to sanitize
Returns:
str: Sanitized model path
"""
if not model_path:
return ""
# Remove whitespace
sanitized = model_path.strip()
# Remove any path traversal attempts
sanitized = sanitized.replace('..', '')
sanitized = sanitized.replace('/', '')
sanitized = sanitized.replace('\\', '')
return sanitized
@staticmethod
def is_safe_path(path: str, base_path: str) -> bool:
"""
Check if a path is safe and within the expected base directory.
Args:
path: The path to check
base_path: The base directory that the path should be within
Returns:
bool: True if the path is safe, False otherwise
"""
try:
# Resolve both paths to absolute paths
abs_path = os.path.abspath(path)
abs_base = os.path.abspath(base_path)
# Check if the path is within the base directory
return abs_path.startswith(abs_base)
except (OSError, ValueError):
return False
# Global instance
validators = Validators() |