Whatever-this-is / lmmvibes /utils /persistent_storage.py
Lisa Dunlap
Add persistent storage support for Hugging Face Spaces - Enhanced app.py with automatic persistent storage detection - Added comprehensive persistent storage utilities - Added documentation and examples - Automatic HF_HOME and cache configuration for /data directory
f850bde
raw
history blame
7.59 kB
"""
Utilities for persistent storage in Hugging Face Spaces.
This module provides utilities for managing persistent storage in Hugging Face Spaces,
including data directories, cache management, and file operations.
"""
import os
import shutil
from pathlib import Path
from typing import Optional, Union
import tempfile
def get_persistent_data_dir() -> Optional[Path]:
"""Get the persistent data directory if available.
In Hugging Face Spaces, this will be `/data/app_data`.
Returns None if persistent storage is not available.
Returns:
Path to persistent storage directory if available, None otherwise.
"""
if os.path.isdir("/data"):
data_dir = Path("/data/app_data")
data_dir.mkdir(exist_ok=True)
return data_dir
return None
def get_cache_dir() -> Path:
"""Get the appropriate cache directory (persistent if available, temp otherwise).
In Hugging Face Spaces, this will be `/data/.cache`.
Falls back to temp directory in local development.
Returns:
Path to cache directory.
"""
if os.path.isdir("/data"):
cache_dir = Path("/data/.cache")
cache_dir.mkdir(exist_ok=True)
return cache_dir
else:
# Fallback to temp directory
return Path(tempfile.gettempdir()) / "app_cache"
def get_hf_home_dir() -> Path:
"""Get the Hugging Face home directory for model caching.
In Hugging Face Spaces, this will be `/data/.huggingface`.
Falls back to default ~/.cache/huggingface in local development.
Returns:
Path to HF home directory.
"""
if os.path.isdir("/data"):
hf_home = Path("/data/.huggingface")
hf_home.mkdir(exist_ok=True)
return hf_home
else:
# Fallback to default location
return Path.home() / ".cache" / "huggingface"
def save_uploaded_file(uploaded_file, filename: str) -> Optional[Path]:
"""Save an uploaded file to persistent storage.
Args:
uploaded_file: Gradio uploaded file object
filename: Name to save the file as
Returns:
Path to saved file if successful, None otherwise.
"""
persistent_dir = get_persistent_data_dir()
if persistent_dir and uploaded_file:
save_path = persistent_dir / filename
save_path.parent.mkdir(parents=True, exist_ok=True)
# Copy the uploaded file to persistent storage
if hasattr(uploaded_file, 'name'):
# Gradio file object
shutil.copy2(uploaded_file.name, save_path)
else:
# Direct file path
shutil.copy2(uploaded_file, save_path)
return save_path
return None
def save_data_to_persistent(data: bytes, filename: str, subdirectory: str = "") -> Optional[Path]:
"""Save binary data to persistent storage.
Args:
data: Binary data to save
filename: Name to save the file as
subdirectory: Optional subdirectory within persistent storage
Returns:
Path to saved file if successful, None otherwise.
"""
persistent_dir = get_persistent_data_dir()
if persistent_dir:
if subdirectory:
save_dir = persistent_dir / subdirectory
save_dir.mkdir(exist_ok=True)
else:
save_dir = persistent_dir
save_path = save_dir / filename
save_path.parent.mkdir(parents=True, exist_ok=True)
with open(save_path, 'wb') as f:
f.write(data)
return save_path
return None
def load_data_from_persistent(filename: str, subdirectory: str = "") -> Optional[bytes]:
"""Load binary data from persistent storage.
Args:
filename: Name of the file to load
subdirectory: Optional subdirectory within persistent storage
Returns:
Binary data if successful, None otherwise.
"""
persistent_dir = get_persistent_data_dir()
if persistent_dir:
if subdirectory:
load_path = persistent_dir / subdirectory / filename
else:
load_path = persistent_dir / filename
if load_path.exists():
with open(load_path, 'rb') as f:
return f.read()
return None
def list_persistent_files(subdirectory: str = "", pattern: str = "*") -> list[Path]:
"""List files in persistent storage.
Args:
subdirectory: Optional subdirectory within persistent storage
pattern: Glob pattern to match files (e.g., "*.json", "data_*")
Returns:
List of Path objects for matching files.
"""
persistent_dir = get_persistent_data_dir()
if persistent_dir:
if subdirectory:
search_dir = persistent_dir / subdirectory
else:
search_dir = persistent_dir
if search_dir.exists():
return list(search_dir.glob(pattern))
return []
def delete_persistent_file(filename: str, subdirectory: str = "") -> bool:
"""Delete a file from persistent storage.
Args:
filename: Name of the file to delete
subdirectory: Optional subdirectory within persistent storage
Returns:
True if successful, False otherwise.
"""
persistent_dir = get_persistent_data_dir()
if persistent_dir:
if subdirectory:
file_path = persistent_dir / subdirectory / filename
else:
file_path = persistent_dir / filename
if file_path.exists():
file_path.unlink()
return True
return False
def is_persistent_storage_available() -> bool:
"""Check if persistent storage is available.
Returns:
True if persistent storage is available, False otherwise.
"""
return os.path.isdir("/data")
def get_persistent_results_dir() -> Optional[Path]:
"""Get the persistent results directory for storing pipeline results.
Returns:
Path to persistent results directory if available, None otherwise.
"""
persistent_dir = get_persistent_data_dir()
if persistent_dir:
results_dir = persistent_dir / "results"
results_dir.mkdir(exist_ok=True)
return results_dir
return None
def get_persistent_logs_dir() -> Optional[Path]:
"""Get the persistent logs directory for storing application logs.
Returns:
Path to persistent logs directory if available, None otherwise.
"""
persistent_dir = get_persistent_data_dir()
if persistent_dir:
logs_dir = persistent_dir / "logs"
logs_dir.mkdir(exist_ok=True)
return logs_dir
return None
def get_storage_info() -> dict:
"""Get information about available storage.
Returns:
Dictionary with storage information.
"""
info = {
"persistent_available": is_persistent_storage_available(),
"data_dir": None,
"cache_dir": str(get_cache_dir()),
"hf_home": str(get_hf_home_dir()),
"storage_paths": {}
}
if info["persistent_available"]:
data_dir = get_persistent_data_dir()
info["data_dir"] = str(data_dir)
# Check available space
try:
total, used, free = shutil.disk_usage(data_dir)
info["storage_paths"] = {
"total_gb": round(total / (1024**3), 2),
"used_gb": round(used / (1024**3), 2),
"free_gb": round(free / (1024**3), 2)
}
except OSError:
pass
return info