File size: 2,734 Bytes
db56fd6
 
 
8a19371
db56fd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a19371
db56fd6
8a19371
 
 
 
 
db56fd6
 
 
8a19371
db56fd6
8a19371
db56fd6
 
8a19371
 
 
 
db56fd6
8a19371
 
db56fd6
8a19371
 
 
 
db56fd6
8a19371
db56fd6
 
 
8a19371
 
 
 
 
 
 
db56fd6
 
 
 
 
 
8a19371
 
db56fd6
8a19371
db56fd6
8a19371
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import shutil
import tempfile
import zipfile
import logging
from diskcache import Cache

# Resolve cache directory from environment or default to HF_HOME or /tmp
cache_directory = os.environ.get("CACHE_DIR", os.getenv("HF_HOME", "/tmp"))
cache = Cache(cache_directory)

# Print cache statistics
try:
    item_count = len(cache)
    size_bytes = cache.volume()
    print(f"[Cache] Loaded: {item_count} items, ~{size_bytes} bytes")
except Exception as e:
    print(f"[Cache] Unable to retrieve statistics: {e}")

def create_cache_zip():
    """
    Archives the cache directory into a ZIP file, ensuring it is safe and consistent.
    Returns the archive path and an error message (if any).
    """
    temp_dir = tempfile.gettempdir()
    archive_path = os.path.join(temp_dir, "cache_archive.zip")

    if not os.path.isdir(cache_directory):
        logging.error(f"[Cache] Directory does not exist: {cache_directory}")
        return None, f"Cache directory not found: {cache_directory}"

    logging.info("[Cache] Initiating checkpoint before archiving...")
    try:
        # Force SQLite checkpoint to flush WAL
        with Cache(cache_directory) as temp_cache:
            temp_cache.close()

        # Remove unnecessary temp cache files
        tmp_subdir = os.path.join(cache_directory, 'tmp')
        if os.path.isdir(tmp_subdir):
            logging.info(f"[Cache] Removing temporary subdirectory: {tmp_subdir}")
            shutil.rmtree(tmp_subdir)

        logging.info(f"[Cache] Creating archive at: {archive_path}")
        with zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED, compresslevel=9) as zipf:
            for root, _, files in os.walk(cache_directory):
                for file in files:
                    file_path = os.path.join(root, file)
                    arcname = os.path.relpath(file_path, cache_directory)
                    zipf.write(file_path, arcname)

        logging.info("[Cache] Archive created successfully.")
        return archive_path, None

    except Exception as e:
        logging.exception("[Cache] Failed to create archive")
        return None, f"Error creating cache archive: {e}"