tokenizers / config.py
bartar's picture
Upload 26 files
d66ab65 verified
import os
from dataclasses import dataclass, field
from typing import List
@dataclass
class Config:
"""Centralized configuration for Tokenizer Pro application."""
# Flask settings
SECRET_KEY: str = os.getenv('SECRET_KEY', 'tokenizer-pro-secret-key-change-in-production')
DEBUG: bool = os.getenv('DEBUG', 'False').lower() in ('true', '1', 'yes')
# File upload settings
MAX_CONTENT_LENGTH: int = int(os.getenv('MAX_CONTENT_LENGTH', 25 * 1024 * 1024)) # 25MB
UPLOAD_FOLDER: str = os.getenv('UPLOAD_FOLDER', '/tmp/tokenizer_uploads')
ALLOWED_EXTENSIONS: List[str] = field(default_factory=lambda: ['txt', 'md', 'py', 'js', 'html', 'css', 'json', 'xml', 'csv'])
# Tokenizer caching settings
CACHE_SIZE: int = int(os.getenv('CACHE_SIZE', 10))
CACHE_EXPIRATION: int = int(os.getenv('CACHE_EXPIRATION', 3600)) # 1 hour in seconds
# Display limits
MAX_DISPLAY_TOKENS: int = int(os.getenv('MAX_DISPLAY_TOKENS', 50000))
PREVIEW_CHAR_LIMIT: int = int(os.getenv('PREVIEW_CHAR_LIMIT', 8096))
# Performance settings
CHUNK_SIZE: int = int(os.getenv('CHUNK_SIZE', 1024 * 1024)) # 1MB chunks for file processing
# Security settings
VALIDATE_MODEL_PATHS: bool = os.getenv('VALIDATE_MODEL_PATHS', 'True').lower() in ('true', '1', 'yes')
ALLOWED_MODEL_PREFIXES: List[str] = field(default_factory=lambda: [
'microsoft/', 'google/', 'meta-llama/', 'mistralai/', 'openai-community/',
'Qwen/', 'THUDM/', 'deepseek-ai/', 'unsloth/', 'google-bert/', 'bartar/'
])
# HuggingFace settings
HF_HOME: str = os.getenv('HF_HOME', '/tmp/huggingface')
HF_CACHE_DIR: str = os.getenv('HF_CACHE_DIR', '/tmp/huggingface/cache')
# Logging settings
LOG_LEVEL: str = os.getenv('LOG_LEVEL', 'INFO')
LOG_FILE: str = os.getenv('LOG_FILE', 'tokenizer_pro.log')
LOG_MAX_BYTES: int = int(os.getenv('LOG_MAX_BYTES', 10 * 1024 * 1024)) # 10MB
LOG_BACKUP_COUNT: int = int(os.getenv('LOG_BACKUP_COUNT', 3))
class DevelopmentConfig(Config):
"""Development configuration with debug enabled."""
DEBUG = True
SECRET_KEY = 'dev-secret-key'
class ProductionConfig(Config):
"""Production configuration with enhanced security."""
DEBUG = False
SECRET_KEY = os.getenv('SECRET_KEY', None)
def __post_init__(self):
if not self.SECRET_KEY:
raise ValueError("SECRET_KEY must be set in production environment")
class TestingConfig(Config):
"""Testing configuration."""
TESTING = True
DEBUG = True
UPLOAD_FOLDER = '/tmp/test_uploads'
CACHE_SIZE = 2
MAX_DISPLAY_TOKENS = 100