Spaces:
Running
Running
Commit
·
ca54b04
1
Parent(s):
1dc37e8
Enhance Dockerfile and Streamlit app for Hugging Face directory management
Browse files- Updated Dockerfile to create additional directories for Hugging Face caching with appropriate permissions.
- Enhanced Streamlit app to configure environment variables for Hugging Face and implement robust directory creation with error handling and logging.
- Improved fallback mechanisms for directory creation to ensure stability in various environments.
- Dockerfile +6 -2
- TROUBLESHOOTING.md +31 -1
- src/streamlit_app.py +23 -0
Dockerfile
CHANGED
@@ -10,13 +10,17 @@ RUN apt-get update && apt-get install -y \
|
|
10 |
&& rm -rf /var/lib/apt/lists/*
|
11 |
|
12 |
# Create necessary directories with proper permissions
|
13 |
-
RUN mkdir -p /app/.streamlit /tmp/docling_temp /tmp/easyocr_models /tmp/cache /tmp/config /tmp/data && \
|
14 |
chmod 755 /app/.streamlit && \
|
15 |
chmod 777 /tmp/docling_temp && \
|
16 |
chmod 777 /tmp/easyocr_models && \
|
17 |
chmod 777 /tmp/cache && \
|
18 |
chmod 777 /tmp/config && \
|
19 |
-
chmod 777 /tmp/data
|
|
|
|
|
|
|
|
|
20 |
|
21 |
COPY requirements.txt ./
|
22 |
COPY src/ ./src/
|
|
|
10 |
&& rm -rf /var/lib/apt/lists/*
|
11 |
|
12 |
# Create necessary directories with proper permissions
|
13 |
+
RUN mkdir -p /app/.streamlit /tmp/docling_temp /tmp/easyocr_models /tmp/cache /tmp/config /tmp/data /tmp/huggingface /tmp/huggingface_cache /tmp/transformers_cache /tmp/datasets_cache && \
|
14 |
chmod 755 /app/.streamlit && \
|
15 |
chmod 777 /tmp/docling_temp && \
|
16 |
chmod 777 /tmp/easyocr_models && \
|
17 |
chmod 777 /tmp/cache && \
|
18 |
chmod 777 /tmp/config && \
|
19 |
+
chmod 777 /tmp/data && \
|
20 |
+
chmod 777 /tmp/huggingface && \
|
21 |
+
chmod 777 /tmp/huggingface_cache && \
|
22 |
+
chmod 777 /tmp/transformers_cache && \
|
23 |
+
chmod 777 /tmp/datasets_cache
|
24 |
|
25 |
COPY requirements.txt ./
|
26 |
COPY src/ ./src/
|
TROUBLESHOOTING.md
CHANGED
@@ -95,4 +95,34 @@ The app automatically sets these environment variables:
|
|
95 |
- `HOME=/tmp/docling_temp` (or fallback)
|
96 |
- `XDG_CACHE_HOME=/tmp/cache` (or fallback)
|
97 |
- `XDG_CONFIG_HOME=/tmp/config` (or fallback)
|
98 |
-
- `XDG_DATA_HOME=/tmp/data` (or fallback)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
- `HOME=/tmp/docling_temp` (or fallback)
|
96 |
- `XDG_CACHE_HOME=/tmp/cache` (or fallback)
|
97 |
- `XDG_CONFIG_HOME=/tmp/config` (or fallback)
|
98 |
+
- `XDG_DATA_HOME=/tmp/data` (or fallback)
|
99 |
+
|
100 |
+
### Hugging Face Hub Permission Errors
|
101 |
+
|
102 |
+
If you encounter Hugging Face Hub permission errors like:
|
103 |
+
```
|
104 |
+
PermissionError: [Errno 13] Permission denied: '/.cache'
|
105 |
+
```
|
106 |
+
|
107 |
+
The app now handles these by:
|
108 |
+
1. Setting `HF_HOME`, `HF_CACHE_HOME`, `TRANSFORMERS_CACHE`, and `HF_DATASETS_CACHE` to writable directories
|
109 |
+
2. Creating all necessary Hugging Face cache directories with proper permissions
|
110 |
+
3. Using fallback directories if the primary ones fail
|
111 |
+
|
112 |
+
### Environment Variables
|
113 |
+
|
114 |
+
The app automatically sets these environment variables:
|
115 |
+
- `STREAMLIT_SERVER_FILE_WATCHER_TYPE=none`
|
116 |
+
- `STREAMLIT_SERVER_HEADLESS=true`
|
117 |
+
- `STREAMLIT_BROWSER_GATHER_USAGE_STATS=false`
|
118 |
+
- `STREAMLIT_SERVER_ENABLE_CORS=false`
|
119 |
+
- `STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION=false`
|
120 |
+
- `EASYOCR_MODULE_PATH=/tmp/easyocr_models` (or fallback)
|
121 |
+
- `HOME=/tmp/docling_temp` (or fallback)
|
122 |
+
- `XDG_CACHE_HOME=/tmp/cache` (or fallback)
|
123 |
+
- `XDG_CONFIG_HOME=/tmp/config` (or fallback)
|
124 |
+
- `XDG_DATA_HOME=/tmp/data` (or fallback)
|
125 |
+
- `HF_HOME=/tmp/huggingface` (or fallback)
|
126 |
+
- `HF_CACHE_HOME=/tmp/huggingface_cache` (or fallback)
|
127 |
+
- `TRANSFORMERS_CACHE=/tmp/transformers_cache` (or fallback)
|
128 |
+
- `HF_DATASETS_CACHE=/tmp/datasets_cache` (or fallback)
|
src/streamlit_app.py
CHANGED
@@ -97,6 +97,29 @@ for env_var in ['XDG_CACHE_HOME', 'XDG_CONFIG_HOME', 'XDG_DATA_HOME']:
|
|
97 |
except Exception as e:
|
98 |
logging.warning(f"Could not create directory for {env_var}: {e}")
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
# Log startup information
|
101 |
logging.info("=" * 50)
|
102 |
logging.info("Docling Streamlit App Starting")
|
|
|
97 |
except Exception as e:
|
98 |
logging.warning(f"Could not create directory for {env_var}: {e}")
|
99 |
|
100 |
+
# Configure Hugging Face Hub to use writable directories
|
101 |
+
os.environ['HF_HOME'] = os.path.join(TEMP_DIR, 'huggingface')
|
102 |
+
os.environ['HF_CACHE_HOME'] = os.path.join(TEMP_DIR, 'huggingface_cache')
|
103 |
+
os.environ['TRANSFORMERS_CACHE'] = os.path.join(TEMP_DIR, 'transformers_cache')
|
104 |
+
os.environ['HF_DATASETS_CACHE'] = os.path.join(TEMP_DIR, 'datasets_cache')
|
105 |
+
|
106 |
+
# Create Hugging Face directories
|
107 |
+
hf_dirs = ['HF_HOME', 'HF_CACHE_HOME', 'TRANSFORMERS_CACHE', 'HF_DATASETS_CACHE']
|
108 |
+
for env_var in hf_dirs:
|
109 |
+
try:
|
110 |
+
os.makedirs(os.environ[env_var], exist_ok=True)
|
111 |
+
logging.info(f"Created Hugging Face directory for {env_var}: {os.environ[env_var]}")
|
112 |
+
except Exception as e:
|
113 |
+
logging.warning(f"Could not create Hugging Face directory for {env_var}: {e}")
|
114 |
+
# Fallback to /tmp if the temp directory fails
|
115 |
+
fallback_path = os.path.join('/tmp', env_var.lower())
|
116 |
+
os.environ[env_var] = fallback_path
|
117 |
+
try:
|
118 |
+
os.makedirs(fallback_path, exist_ok=True)
|
119 |
+
logging.info(f"Using fallback Hugging Face directory for {env_var}: {fallback_path}")
|
120 |
+
except Exception as e2:
|
121 |
+
logging.error(f"Failed to create fallback Hugging Face directory for {env_var}: {e2}")
|
122 |
+
|
123 |
# Log startup information
|
124 |
logging.info("=" * 50)
|
125 |
logging.info("Docling Streamlit App Starting")
|