mgbam commited on
Commit
8a19371
·
verified ·
1 Parent(s): f8d688b

Update cache.py

Browse files
Files changed (1) hide show
  1. cache.py +32 -28
cache.py CHANGED
@@ -1,7 +1,7 @@
1
  # Copyright 2025 Google LLC
2
  #
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
  # You may obtain a copy of the License at
6
  #
7
  # http://www.apache.org/licenses/LICENSE-2.0
@@ -12,56 +12,60 @@
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
 
15
- from diskcache import Cache
16
  import os
17
  import shutil
18
  import tempfile
19
  import zipfile
20
  import logging
 
21
 
22
- cache = Cache(os.environ.get("CACHE_DIR", "/cache"))
23
- # Print cache statistics after loading
 
 
 
24
  try:
25
  item_count = len(cache)
26
  size_bytes = cache.volume()
27
- print(f"Cache loaded: {item_count} items, approx {size_bytes} bytes")
28
  except Exception as e:
29
- print(f"Could not retrieve cache statistics: {e}")
30
 
31
  def create_cache_zip():
 
 
 
 
32
  temp_dir = tempfile.gettempdir()
33
- base_name = os.path.join(temp_dir, "cache_archive") # A more descriptive name
34
- archive_path = base_name + ".zip"
35
- cache_directory = os.environ.get("CACHE_DIR", "/cache")
36
-
37
  if not os.path.isdir(cache_directory):
38
- logging.error(f"Cache directory not found at {cache_directory}")
39
- return None, f"Cache directory not found on server: {cache_directory}"
40
-
41
- logging.info("Forcing a cache checkpoint for safe backup...")
42
  try:
43
- # Open and immediately close a connection.
44
- # This forces SQLite to perform a checkpoint, merging the .wal file
45
- # into the main .db file, ensuring the on-disk files are consistent.
46
  with Cache(cache_directory) as temp_cache:
47
  temp_cache.close()
48
-
49
- # Clean up temporary files before archiving.
50
- tmp_path = os.path.join(cache_directory, 'tmp')
51
- if os.path.isdir(tmp_path):
52
- logging.info(f"Removing temporary cache directory: {tmp_path}")
53
- shutil.rmtree(tmp_path)
54
 
55
- logging.info(f"Checkpoint complete. Creating zip archive of {cache_directory} to {archive_path}")
 
 
 
 
 
 
56
  with zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED, compresslevel=9) as zipf:
57
  for root, _, files in os.walk(cache_directory):
58
  for file in files:
59
  file_path = os.path.join(root, file)
60
  arcname = os.path.relpath(file_path, cache_directory)
61
  zipf.write(file_path, arcname)
62
- logging.info("Zip archive created successfully.")
 
63
  return archive_path, None
64
-
65
  except Exception as e:
66
- logging.error(f"Error creating zip archive of cache directory: {e}", exc_info=True)
67
- return None, f"Error creating zip archive: {e}"
 
1
  # Copyright 2025 Google LLC
2
  #
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # You may not use this file except in compliance with the License.
5
  # You may obtain a copy of the License at
6
  #
7
  # http://www.apache.org/licenses/LICENSE-2.0
 
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
 
 
15
  import os
16
  import shutil
17
  import tempfile
18
  import zipfile
19
  import logging
20
+ from diskcache import Cache
21
 
22
+ # Resolve cache directory from environment or default to HF_HOME or /tmp
23
+ cache_directory = os.environ.get("CACHE_DIR", os.getenv("HF_HOME", "/tmp"))
24
+ cache = Cache(cache_directory)
25
+
26
+ # Print cache statistics
27
  try:
28
  item_count = len(cache)
29
  size_bytes = cache.volume()
30
+ print(f"[Cache] Loaded: {item_count} items, ~{size_bytes} bytes")
31
  except Exception as e:
32
+ print(f"[Cache] Unable to retrieve statistics: {e}")
33
 
34
  def create_cache_zip():
35
+ """
36
+ Archives the cache directory into a ZIP file, ensuring it is safe and consistent.
37
+ Returns the archive path and an error message (if any).
38
+ """
39
  temp_dir = tempfile.gettempdir()
40
+ archive_path = os.path.join(temp_dir, "cache_archive.zip")
41
+
 
 
42
  if not os.path.isdir(cache_directory):
43
+ logging.error(f"[Cache] Directory does not exist: {cache_directory}")
44
+ return None, f"Cache directory not found: {cache_directory}"
45
+
46
+ logging.info("[Cache] Initiating checkpoint before archiving...")
47
  try:
48
+ # Force SQLite checkpoint to flush WAL
 
 
49
  with Cache(cache_directory) as temp_cache:
50
  temp_cache.close()
 
 
 
 
 
 
51
 
52
+ # Remove unnecessary temp cache files
53
+ tmp_subdir = os.path.join(cache_directory, 'tmp')
54
+ if os.path.isdir(tmp_subdir):
55
+ logging.info(f"[Cache] Removing temporary subdirectory: {tmp_subdir}")
56
+ shutil.rmtree(tmp_subdir)
57
+
58
+ logging.info(f"[Cache] Creating archive at: {archive_path}")
59
  with zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED, compresslevel=9) as zipf:
60
  for root, _, files in os.walk(cache_directory):
61
  for file in files:
62
  file_path = os.path.join(root, file)
63
  arcname = os.path.relpath(file_path, cache_directory)
64
  zipf.write(file_path, arcname)
65
+
66
+ logging.info("[Cache] Archive created successfully.")
67
  return archive_path, None
68
+
69
  except Exception as e:
70
+ logging.exception("[Cache] Failed to create archive")
71
+ return None, f"Error creating cache archive: {e}"