docappointemet / cache.py
mgbam's picture
Update cache.py
8a19371 verified
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import shutil
import tempfile
import zipfile
import logging
from diskcache import Cache
# Resolve cache directory from environment or default to HF_HOME or /tmp
cache_directory = os.environ.get("CACHE_DIR", os.getenv("HF_HOME", "/tmp"))
cache = Cache(cache_directory)
# Print cache statistics
try:
item_count = len(cache)
size_bytes = cache.volume()
print(f"[Cache] Loaded: {item_count} items, ~{size_bytes} bytes")
except Exception as e:
print(f"[Cache] Unable to retrieve statistics: {e}")
def create_cache_zip():
"""
Archives the cache directory into a ZIP file, ensuring it is safe and consistent.
Returns the archive path and an error message (if any).
"""
temp_dir = tempfile.gettempdir()
archive_path = os.path.join(temp_dir, "cache_archive.zip")
if not os.path.isdir(cache_directory):
logging.error(f"[Cache] Directory does not exist: {cache_directory}")
return None, f"Cache directory not found: {cache_directory}"
logging.info("[Cache] Initiating checkpoint before archiving...")
try:
# Force SQLite checkpoint to flush WAL
with Cache(cache_directory) as temp_cache:
temp_cache.close()
# Remove unnecessary temp cache files
tmp_subdir = os.path.join(cache_directory, 'tmp')
if os.path.isdir(tmp_subdir):
logging.info(f"[Cache] Removing temporary subdirectory: {tmp_subdir}")
shutil.rmtree(tmp_subdir)
logging.info(f"[Cache] Creating archive at: {archive_path}")
with zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED, compresslevel=9) as zipf:
for root, _, files in os.walk(cache_directory):
for file in files:
file_path = os.path.join(root, file)
arcname = os.path.relpath(file_path, cache_directory)
zipf.write(file_path, arcname)
logging.info("[Cache] Archive created successfully.")
return archive_path, None
except Exception as e:
logging.exception("[Cache] Failed to create archive")
return None, f"Error creating cache archive: {e}"