Spaces:
Running
Running
Lisa Dunlap
Add persistent storage support for Hugging Face Spaces - Enhanced app.py with automatic persistent storage detection - Added comprehensive persistent storage utilities - Added documentation and examples - Automatic HF_HOME and cache configuration for /data directory
f850bde
""" | |
Utilities for persistent storage in Hugging Face Spaces. | |
This module provides utilities for managing persistent storage in Hugging Face Spaces, | |
including data directories, cache management, and file operations. | |
""" | |
import os | |
import shutil | |
from pathlib import Path | |
from typing import Optional, Union | |
import tempfile | |
def get_persistent_data_dir() -> Optional[Path]: | |
"""Get the persistent data directory if available. | |
In Hugging Face Spaces, this will be `/data/app_data`. | |
Returns None if persistent storage is not available. | |
Returns: | |
Path to persistent storage directory if available, None otherwise. | |
""" | |
if os.path.isdir("/data"): | |
data_dir = Path("/data/app_data") | |
data_dir.mkdir(exist_ok=True) | |
return data_dir | |
return None | |
def get_cache_dir() -> Path: | |
"""Get the appropriate cache directory (persistent if available, temp otherwise). | |
In Hugging Face Spaces, this will be `/data/.cache`. | |
Falls back to temp directory in local development. | |
Returns: | |
Path to cache directory. | |
""" | |
if os.path.isdir("/data"): | |
cache_dir = Path("/data/.cache") | |
cache_dir.mkdir(exist_ok=True) | |
return cache_dir | |
else: | |
# Fallback to temp directory | |
return Path(tempfile.gettempdir()) / "app_cache" | |
def get_hf_home_dir() -> Path: | |
"""Get the Hugging Face home directory for model caching. | |
In Hugging Face Spaces, this will be `/data/.huggingface`. | |
Falls back to default ~/.cache/huggingface in local development. | |
Returns: | |
Path to HF home directory. | |
""" | |
if os.path.isdir("/data"): | |
hf_home = Path("/data/.huggingface") | |
hf_home.mkdir(exist_ok=True) | |
return hf_home | |
else: | |
# Fallback to default location | |
return Path.home() / ".cache" / "huggingface" | |
def save_uploaded_file(uploaded_file, filename: str) -> Optional[Path]: | |
"""Save an uploaded file to persistent storage. | |
Args: | |
uploaded_file: Gradio uploaded file object | |
filename: Name to save the file as | |
Returns: | |
Path to saved file if successful, None otherwise. | |
""" | |
persistent_dir = get_persistent_data_dir() | |
if persistent_dir and uploaded_file: | |
save_path = persistent_dir / filename | |
save_path.parent.mkdir(parents=True, exist_ok=True) | |
# Copy the uploaded file to persistent storage | |
if hasattr(uploaded_file, 'name'): | |
# Gradio file object | |
shutil.copy2(uploaded_file.name, save_path) | |
else: | |
# Direct file path | |
shutil.copy2(uploaded_file, save_path) | |
return save_path | |
return None | |
def save_data_to_persistent(data: bytes, filename: str, subdirectory: str = "") -> Optional[Path]: | |
"""Save binary data to persistent storage. | |
Args: | |
data: Binary data to save | |
filename: Name to save the file as | |
subdirectory: Optional subdirectory within persistent storage | |
Returns: | |
Path to saved file if successful, None otherwise. | |
""" | |
persistent_dir = get_persistent_data_dir() | |
if persistent_dir: | |
if subdirectory: | |
save_dir = persistent_dir / subdirectory | |
save_dir.mkdir(exist_ok=True) | |
else: | |
save_dir = persistent_dir | |
save_path = save_dir / filename | |
save_path.parent.mkdir(parents=True, exist_ok=True) | |
with open(save_path, 'wb') as f: | |
f.write(data) | |
return save_path | |
return None | |
def load_data_from_persistent(filename: str, subdirectory: str = "") -> Optional[bytes]: | |
"""Load binary data from persistent storage. | |
Args: | |
filename: Name of the file to load | |
subdirectory: Optional subdirectory within persistent storage | |
Returns: | |
Binary data if successful, None otherwise. | |
""" | |
persistent_dir = get_persistent_data_dir() | |
if persistent_dir: | |
if subdirectory: | |
load_path = persistent_dir / subdirectory / filename | |
else: | |
load_path = persistent_dir / filename | |
if load_path.exists(): | |
with open(load_path, 'rb') as f: | |
return f.read() | |
return None | |
def list_persistent_files(subdirectory: str = "", pattern: str = "*") -> list[Path]: | |
"""List files in persistent storage. | |
Args: | |
subdirectory: Optional subdirectory within persistent storage | |
pattern: Glob pattern to match files (e.g., "*.json", "data_*") | |
Returns: | |
List of Path objects for matching files. | |
""" | |
persistent_dir = get_persistent_data_dir() | |
if persistent_dir: | |
if subdirectory: | |
search_dir = persistent_dir / subdirectory | |
else: | |
search_dir = persistent_dir | |
if search_dir.exists(): | |
return list(search_dir.glob(pattern)) | |
return [] | |
def delete_persistent_file(filename: str, subdirectory: str = "") -> bool: | |
"""Delete a file from persistent storage. | |
Args: | |
filename: Name of the file to delete | |
subdirectory: Optional subdirectory within persistent storage | |
Returns: | |
True if successful, False otherwise. | |
""" | |
persistent_dir = get_persistent_data_dir() | |
if persistent_dir: | |
if subdirectory: | |
file_path = persistent_dir / subdirectory / filename | |
else: | |
file_path = persistent_dir / filename | |
if file_path.exists(): | |
file_path.unlink() | |
return True | |
return False | |
def is_persistent_storage_available() -> bool: | |
"""Check if persistent storage is available. | |
Returns: | |
True if persistent storage is available, False otherwise. | |
""" | |
return os.path.isdir("/data") | |
def get_persistent_results_dir() -> Optional[Path]: | |
"""Get the persistent results directory for storing pipeline results. | |
Returns: | |
Path to persistent results directory if available, None otherwise. | |
""" | |
persistent_dir = get_persistent_data_dir() | |
if persistent_dir: | |
results_dir = persistent_dir / "results" | |
results_dir.mkdir(exist_ok=True) | |
return results_dir | |
return None | |
def get_persistent_logs_dir() -> Optional[Path]: | |
"""Get the persistent logs directory for storing application logs. | |
Returns: | |
Path to persistent logs directory if available, None otherwise. | |
""" | |
persistent_dir = get_persistent_data_dir() | |
if persistent_dir: | |
logs_dir = persistent_dir / "logs" | |
logs_dir.mkdir(exist_ok=True) | |
return logs_dir | |
return None | |
def get_storage_info() -> dict: | |
"""Get information about available storage. | |
Returns: | |
Dictionary with storage information. | |
""" | |
info = { | |
"persistent_available": is_persistent_storage_available(), | |
"data_dir": None, | |
"cache_dir": str(get_cache_dir()), | |
"hf_home": str(get_hf_home_dir()), | |
"storage_paths": {} | |
} | |
if info["persistent_available"]: | |
data_dir = get_persistent_data_dir() | |
info["data_dir"] = str(data_dir) | |
# Check available space | |
try: | |
total, used, free = shutil.disk_usage(data_dir) | |
info["storage_paths"] = { | |
"total_gb": round(total / (1024**3), 2), | |
"used_gb": round(used / (1024**3), 2), | |
"free_gb": round(free / (1024**3), 2) | |
} | |
except OSError: | |
pass | |
return info |