File size: 2,721 Bytes
d66ab65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
from dataclasses import dataclass, field
from typing import List

@dataclass
class Config:
    """Centralized configuration for Tokenizer Pro application."""
    
    # Flask settings
    SECRET_KEY: str = os.getenv('SECRET_KEY', 'tokenizer-pro-secret-key-change-in-production')
    DEBUG: bool = os.getenv('DEBUG', 'False').lower() in ('true', '1', 'yes')
    
    # File upload settings
    MAX_CONTENT_LENGTH: int = int(os.getenv('MAX_CONTENT_LENGTH', 25 * 1024 * 1024))  # 25MB
    UPLOAD_FOLDER: str = os.getenv('UPLOAD_FOLDER', '/tmp/tokenizer_uploads')
    ALLOWED_EXTENSIONS: List[str] = field(default_factory=lambda: ['txt', 'md', 'py', 'js', 'html', 'css', 'json', 'xml', 'csv'])
    
    # Tokenizer caching settings
    CACHE_SIZE: int = int(os.getenv('CACHE_SIZE', 10))
    CACHE_EXPIRATION: int = int(os.getenv('CACHE_EXPIRATION', 3600))  # 1 hour in seconds
    
    # Display limits
    MAX_DISPLAY_TOKENS: int = int(os.getenv('MAX_DISPLAY_TOKENS', 50000))
    PREVIEW_CHAR_LIMIT: int = int(os.getenv('PREVIEW_CHAR_LIMIT', 8096))
    
    # Performance settings
    CHUNK_SIZE: int = int(os.getenv('CHUNK_SIZE', 1024 * 1024))  # 1MB chunks for file processing
    
    # Security settings
    VALIDATE_MODEL_PATHS: bool = os.getenv('VALIDATE_MODEL_PATHS', 'True').lower() in ('true', '1', 'yes')
    ALLOWED_MODEL_PREFIXES: List[str] = field(default_factory=lambda: [
        'microsoft/', 'google/', 'meta-llama/', 'mistralai/', 'openai-community/',
        'Qwen/', 'THUDM/', 'deepseek-ai/', 'unsloth/', 'google-bert/', 'bartar/'
    ])
    
    # HuggingFace settings
    HF_HOME: str = os.getenv('HF_HOME', '/tmp/huggingface')
    HF_CACHE_DIR: str = os.getenv('HF_CACHE_DIR', '/tmp/huggingface/cache')
    
    # Logging settings
    LOG_LEVEL: str = os.getenv('LOG_LEVEL', 'INFO')
    LOG_FILE: str = os.getenv('LOG_FILE', 'tokenizer_pro.log')
    LOG_MAX_BYTES: int = int(os.getenv('LOG_MAX_BYTES', 10 * 1024 * 1024))  # 10MB
    LOG_BACKUP_COUNT: int = int(os.getenv('LOG_BACKUP_COUNT', 3))

class DevelopmentConfig(Config):
    """Development configuration with debug enabled."""
    DEBUG = True
    SECRET_KEY = 'dev-secret-key'

class ProductionConfig(Config):
    """Production configuration with enhanced security."""
    DEBUG = False
    SECRET_KEY = os.getenv('SECRET_KEY', None)
    
    def __post_init__(self):
        if not self.SECRET_KEY:
            raise ValueError("SECRET_KEY must be set in production environment")

class TestingConfig(Config):
    """Testing configuration."""
    TESTING = True
    DEBUG = True
    UPLOAD_FOLDER = '/tmp/test_uploads'
    CACHE_SIZE = 2
    MAX_DISPLAY_TOKENS = 100