Spaces:
Sleeping
Sleeping
yonnel
commited on
Commit
·
cd5a102
1
Parent(s):
8331c23
Fix checkpoint permissions error in build_index.py
Browse files- Use system temp directory instead of app/data/checkpoints
- Handle PermissionError gracefully with warnings instead of crashes
- Allow script to continue even if checkpoints can't be saved
- Fixes: PermissionError: [Errno 13] Permission denied: 'app/data/checkpoints'
- app/build_index.py +28 -16
app/build_index.py
CHANGED
@@ -28,34 +28,46 @@ except ImportError:
|
|
28 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
29 |
logger = logging.getLogger(__name__)
|
30 |
|
31 |
-
# Checkpoint file paths
|
32 |
-
|
|
|
33 |
MOVIE_DATA_CHECKPOINT = f"{CHECKPOINT_DIR}/movie_data.pkl"
|
34 |
EMBEDDINGS_CHECKPOINT = f"{CHECKPOINT_DIR}/embeddings_progress.pkl"
|
35 |
METADATA_CHECKPOINT = f"{CHECKPOINT_DIR}/metadata_progress.pkl"
|
36 |
|
37 |
def save_checkpoint(data, filepath: str):
|
38 |
-
"""Save checkpoint data to file"""
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
def load_checkpoint(filepath: str):
|
45 |
"""Load checkpoint data from file"""
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
51 |
return None
|
52 |
|
53 |
def cleanup_checkpoints():
|
54 |
"""Remove checkpoint files after successful completion"""
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
59 |
|
60 |
class TMDBClient:
|
61 |
"""Client for TMDB API with retry and backoff"""
|
|
|
28 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
29 |
logger = logging.getLogger(__name__)
|
30 |
|
31 |
+
# Checkpoint file paths - use temp directory or disable for production
|
32 |
+
import tempfile
|
33 |
+
CHECKPOINT_DIR = os.environ.get('CHECKPOINT_DIR', tempfile.gettempdir())
|
34 |
MOVIE_DATA_CHECKPOINT = f"{CHECKPOINT_DIR}/movie_data.pkl"
|
35 |
EMBEDDINGS_CHECKPOINT = f"{CHECKPOINT_DIR}/embeddings_progress.pkl"
|
36 |
METADATA_CHECKPOINT = f"{CHECKPOINT_DIR}/metadata_progress.pkl"
|
37 |
|
38 |
def save_checkpoint(data, filepath: str):
|
39 |
+
"""Save checkpoint data to file - skip if permissions denied"""
|
40 |
+
try:
|
41 |
+
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
42 |
+
with open(filepath, 'wb') as f:
|
43 |
+
pickle.dump(data, f)
|
44 |
+
logger.info(f"Checkpoint saved: {filepath}")
|
45 |
+
except PermissionError:
|
46 |
+
logger.warning(f"Cannot save checkpoint due to permissions: {filepath}")
|
47 |
+
except Exception as e:
|
48 |
+
logger.warning(f"Failed to save checkpoint {filepath}: {e}")
|
49 |
|
50 |
def load_checkpoint(filepath: str):
|
51 |
"""Load checkpoint data from file"""
|
52 |
+
try:
|
53 |
+
if os.path.exists(filepath):
|
54 |
+
with open(filepath, 'rb') as f:
|
55 |
+
data = pickle.load(f)
|
56 |
+
logger.info(f"Checkpoint loaded: {filepath}")
|
57 |
+
return data
|
58 |
+
except Exception as e:
|
59 |
+
logger.warning(f"Failed to load checkpoint {filepath}: {e}")
|
60 |
return None
|
61 |
|
62 |
def cleanup_checkpoints():
|
63 |
"""Remove checkpoint files after successful completion"""
|
64 |
+
try:
|
65 |
+
import shutil
|
66 |
+
if os.path.exists(CHECKPOINT_DIR) and CHECKPOINT_DIR != tempfile.gettempdir():
|
67 |
+
shutil.rmtree(CHECKPOINT_DIR)
|
68 |
+
logger.info("Checkpoint files cleaned up")
|
69 |
+
except Exception as e:
|
70 |
+
logger.warning(f"Failed to cleanup checkpoints: {e}")
|
71 |
|
72 |
class TMDBClient:
|
73 |
"""Client for TMDB API with retry and backoff"""
|