File size: 7,266 Bytes
d66ab65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
"""

Validation utilities for security and input validation

"""
import os
import re
from typing import Optional
from urllib.parse import urlparse


class ValidationError(Exception):
    """Custom exception for validation errors."""
    pass


class Validators:
    """Collection of validation functions for security and input validation."""
    
    # Regex patterns for validation - allow numbers, letters, hyphens, underscores, dots
    HUGGINGFACE_MODEL_PATTERN = re.compile(r'^[a-zA-Z0-9_\-\.]+/[a-zA-Z0-9_\-\.]+$')
    SAFE_FILENAME_PATTERN = re.compile(r'^[a-zA-Z0-9_\-\.]+$')
    
    @staticmethod
    def validate_model_path(model_path: str) -> bool:
        """

        Validate that a custom model path is safe and follows expected patterns.

        

        Args:

            model_path: The model path to validate

            

        Returns:

            bool: True if valid, False otherwise

            

        Raises:

            ValidationError: If the model path is invalid

        """
        if not model_path or not isinstance(model_path, str):
            raise ValidationError("Model path cannot be empty")
        
        # Trim whitespace
        model_path = model_path.strip()
        
        # Check for dangerous characters (excluding single forward slash for HuggingFace format)
        dangerous_chars = ['..', '\\', '|', ';', '&', '$', '`', '<', '>']
        if any(char in model_path for char in dangerous_chars):
            raise ValidationError("Model path contains invalid characters")
        
        # Check for multiple slashes or leading/trailing slashes
        if '//' in model_path or model_path.startswith('/') or model_path.endswith('/'):
            raise ValidationError("Model path contains invalid characters")
        
        # Check if it looks like a HuggingFace model path (user/model format)
        if not Validators.HUGGINGFACE_MODEL_PATTERN.match(model_path):
            raise ValidationError("Model path must follow the format 'organization/model-name'")
        
        # Check length limits
        if len(model_path) > 200:
            raise ValidationError("Model path is too long")
        
        return True
    
    @staticmethod
    def validate_filename(filename: str) -> bool:
        """

        Validate that a filename is safe for upload.

        

        Args:

            filename: The filename to validate

            

        Returns:

            bool: True if valid, False otherwise

            

        Raises:

            ValidationError: If the filename is invalid

        """
        if not filename or not isinstance(filename, str):
            raise ValidationError("Filename cannot be empty")
        
        # Check for dangerous characters and patterns
        dangerous_patterns = ['..', '/', '\\', '|', ';', '&', '$', '`', '<', '>']
        if any(pattern in filename for pattern in dangerous_patterns):
            raise ValidationError("Filename contains invalid characters")
        
        # Check if filename starts with a dot (hidden files)
        if filename.startswith('.'):
            raise ValidationError("Hidden files are not allowed")
        
        # Check length
        if len(filename) > 255:
            raise ValidationError("Filename is too long")
        
        return True
    
    @staticmethod
    def validate_file_extension(filename: str, allowed_extensions: set) -> bool:
        """

        Validate that a file has an allowed extension.

        

        Args:

            filename: The filename to check

            allowed_extensions: Set of allowed extensions (e.g., {'.txt', '.py'})

            

        Returns:

            bool: True if valid, False otherwise

            

        Raises:

            ValidationError: If the extension is not allowed

        """
        if not filename:
            raise ValidationError("Filename cannot be empty")
        
        _, ext = os.path.splitext(filename.lower())
        if ext not in allowed_extensions:
            allowed_list = ', '.join(sorted(allowed_extensions))
            raise ValidationError(f"File type '{ext}' not allowed. Allowed types: {allowed_list}")
        
        return True
    
    @staticmethod
    def validate_file_size(file_size: int, max_size: int) -> bool:
        """

        Validate that a file size is within limits.

        

        Args:

            file_size: Size of the file in bytes

            max_size: Maximum allowed size in bytes

            

        Returns:

            bool: True if valid, False otherwise

            

        Raises:

            ValidationError: If the file is too large

        """
        if file_size > max_size:
            max_mb = max_size / (1024 * 1024)
            current_mb = file_size / (1024 * 1024)
            raise ValidationError(f"File too large: {current_mb:.1f}MB (max: {max_mb:.1f}MB)")
        
        return True
    
    @staticmethod
    def validate_text_input(text: str, max_length: int = 1000000) -> bool:
        """

        Validate text input for processing.

        

        Args:

            text: The text to validate

            max_length: Maximum allowed length

            

        Returns:

            bool: True if valid, False otherwise

            

        Raises:

            ValidationError: If the text is invalid

        """
        if not isinstance(text, str):
            raise ValidationError("Text input must be a string")
        
        if len(text) > max_length:
            raise ValidationError(f"Text too long: {len(text)} characters (max: {max_length})")
        
        return True
    
    @staticmethod
    def sanitize_model_path(model_path: str) -> str:
        """

        Sanitize a model path by removing potentially dangerous elements.

        

        Args:

            model_path: The model path to sanitize

            

        Returns:

            str: Sanitized model path

        """
        if not model_path:
            return ""
        
        # Remove whitespace
        sanitized = model_path.strip()
        
        # Remove any path traversal attempts
        sanitized = sanitized.replace('..', '')
        sanitized = sanitized.replace('/', '')
        sanitized = sanitized.replace('\\', '')
        
        return sanitized
    
    @staticmethod
    def is_safe_path(path: str, base_path: str) -> bool:
        """

        Check if a path is safe and within the expected base directory.

        

        Args:

            path: The path to check

            base_path: The base directory that the path should be within

            

        Returns:

            bool: True if the path is safe, False otherwise

        """
        try:
            # Resolve both paths to absolute paths
            abs_path = os.path.abspath(path)
            abs_base = os.path.abspath(base_path)
            
            # Check if the path is within the base directory
            return abs_path.startswith(abs_base)
        except (OSError, ValueError):
            return False


# Global instance
validators = Validators()