Spaces:
				
			
			
	
			
			
		Paused
		
	
	
	
			
			
	
	
	
	
		
		
		Paused
		
	File size: 4,128 Bytes
			
			| 03c0888 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | from enum import Enum
class CacheMode(Enum):
    """
    Defines the caching behavior for web crawling operations.
    
    Modes:
    - ENABLED: Normal caching behavior (read and write)
    - DISABLED: No caching at all
    - READ_ONLY: Only read from cache, don't write
    - WRITE_ONLY: Only write to cache, don't read
    - BYPASS: Bypass cache for this operation
    """
    ENABLED = "enabled"
    DISABLED = "disabled"
    READ_ONLY = "read_only"
    WRITE_ONLY = "write_only"
    BYPASS = "bypass"
class CacheContext:
    """
    Encapsulates cache-related decisions and URL handling.
    
    This class centralizes all cache-related logic and URL type checking,
    making the caching behavior more predictable and maintainable.
    
    Attributes:
        url (str): The URL being processed.
        cache_mode (CacheMode): The cache mode for the current operation.
        always_bypass (bool): If True, bypasses caching for this operation.
        is_cacheable (bool): True if the URL is cacheable, False otherwise.
        is_web_url (bool): True if the URL is a web URL, False otherwise.
        is_local_file (bool): True if the URL is a local file, False otherwise.
        is_raw_html (bool): True if the URL is raw HTML, False otherwise.
        _url_display (str): The display name for the URL (web, local file, or raw HTML).
    """
    def __init__(self, url: str, cache_mode: CacheMode, always_bypass: bool = False):
        """
        Initializes the CacheContext with the provided URL and cache mode.
        
        Args:
            url (str): The URL being processed.
            cache_mode (CacheMode): The cache mode for the current operation.
            always_bypass (bool): If True, bypasses caching for this operation.
        """
        self.url = url
        self.cache_mode = cache_mode
        self.always_bypass = always_bypass
        self.is_cacheable = url.startswith(('http://', 'https://', 'file://'))
        self.is_web_url = url.startswith(('http://', 'https://'))
        self.is_local_file = url.startswith("file://")
        self.is_raw_html = url.startswith("raw:")
        self._url_display = url if not self.is_raw_html else "Raw HTML"
    
    def should_read(self) -> bool:
        """
        Determines if cache should be read based on context.
        
        How it works:
        1. If always_bypass is True or is_cacheable is False, return False.
        2. If cache_mode is ENABLED or READ_ONLY, return True.
        
        Returns:
            bool: True if cache should be read, False otherwise.
        """
        if self.always_bypass or not self.is_cacheable:
            return False
        return self.cache_mode in [CacheMode.ENABLED, CacheMode.READ_ONLY]
    
    def should_write(self) -> bool:
        """
        Determines if cache should be written based on context.
        
        How it works:
        1. If always_bypass is True or is_cacheable is False, return False.
        2. If cache_mode is ENABLED or WRITE_ONLY, return True.
        
        Returns:
            bool: True if cache should be written, False otherwise.
        """
        if self.always_bypass or not self.is_cacheable:
            return False
        return self.cache_mode in [CacheMode.ENABLED, CacheMode.WRITE_ONLY]
    
    @property
    def display_url(self) -> str:
        """Returns the URL in display format."""
        return self._url_display
def _legacy_to_cache_mode(
    disable_cache: bool = False,
    bypass_cache: bool = False,
    no_cache_read: bool = False,
    no_cache_write: bool = False
) -> CacheMode:
    """
    Converts legacy cache parameters to the new CacheMode enum.
    
    This is an internal function to help transition from the old boolean flags
    to the new CacheMode system.
    """
    if disable_cache:
        return CacheMode.DISABLED
    if bypass_cache:
        return CacheMode.BYPASS
    if no_cache_read and no_cache_write:
        return CacheMode.DISABLED
    if no_cache_read:
        return CacheMode.WRITE_ONLY
    if no_cache_write:
        return CacheMode.READ_ONLY
    return CacheMode.ENABLED
 | 
