# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # You may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import shutil import tempfile import zipfile import logging from diskcache import Cache # Resolve cache directory from environment or default to HF_HOME or /tmp cache_directory = os.environ.get("CACHE_DIR", os.getenv("HF_HOME", "/tmp")) cache = Cache(cache_directory) # Print cache statistics try: item_count = len(cache) size_bytes = cache.volume() print(f"[Cache] Loaded: {item_count} items, ~{size_bytes} bytes") except Exception as e: print(f"[Cache] Unable to retrieve statistics: {e}") def create_cache_zip(): """ Archives the cache directory into a ZIP file, ensuring it is safe and consistent. Returns the archive path and an error message (if any). """ temp_dir = tempfile.gettempdir() archive_path = os.path.join(temp_dir, "cache_archive.zip") if not os.path.isdir(cache_directory): logging.error(f"[Cache] Directory does not exist: {cache_directory}") return None, f"Cache directory not found: {cache_directory}" logging.info("[Cache] Initiating checkpoint before archiving...") try: # Force SQLite checkpoint to flush WAL with Cache(cache_directory) as temp_cache: temp_cache.close() # Remove unnecessary temp cache files tmp_subdir = os.path.join(cache_directory, 'tmp') if os.path.isdir(tmp_subdir): logging.info(f"[Cache] Removing temporary subdirectory: {tmp_subdir}") shutil.rmtree(tmp_subdir) logging.info(f"[Cache] Creating archive at: {archive_path}") with zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED, compresslevel=9) as zipf: for root, _, files in os.walk(cache_directory): for file in files: file_path = os.path.join(root, file) arcname = os.path.relpath(file_path, cache_directory) zipf.write(file_path, arcname) logging.info("[Cache] Archive created successfully.") return archive_path, None except Exception as e: logging.exception("[Cache] Failed to create archive") return None, f"Error creating cache archive: {e}"