levalencia commited on
Commit
ca54b04
·
1 Parent(s): 1dc37e8

Enhance Dockerfile and Streamlit app for Hugging Face directory management

Browse files

- Updated Dockerfile to create additional directories for Hugging Face caching with appropriate permissions.
- Enhanced Streamlit app to configure environment variables for Hugging Face and implement robust directory creation with error handling and logging.
- Improved fallback mechanisms for directory creation to ensure stability in various environments.

Files changed (3) hide show
  1. Dockerfile +6 -2
  2. TROUBLESHOOTING.md +31 -1
  3. src/streamlit_app.py +23 -0
Dockerfile CHANGED
@@ -10,13 +10,17 @@ RUN apt-get update && apt-get install -y \
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
  # Create necessary directories with proper permissions
13
- RUN mkdir -p /app/.streamlit /tmp/docling_temp /tmp/easyocr_models /tmp/cache /tmp/config /tmp/data && \
14
  chmod 755 /app/.streamlit && \
15
  chmod 777 /tmp/docling_temp && \
16
  chmod 777 /tmp/easyocr_models && \
17
  chmod 777 /tmp/cache && \
18
  chmod 777 /tmp/config && \
19
- chmod 777 /tmp/data
 
 
 
 
20
 
21
  COPY requirements.txt ./
22
  COPY src/ ./src/
 
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
  # Create necessary directories with proper permissions
13
+ RUN mkdir -p /app/.streamlit /tmp/docling_temp /tmp/easyocr_models /tmp/cache /tmp/config /tmp/data /tmp/huggingface /tmp/huggingface_cache /tmp/transformers_cache /tmp/datasets_cache && \
14
  chmod 755 /app/.streamlit && \
15
  chmod 777 /tmp/docling_temp && \
16
  chmod 777 /tmp/easyocr_models && \
17
  chmod 777 /tmp/cache && \
18
  chmod 777 /tmp/config && \
19
+ chmod 777 /tmp/data && \
20
+ chmod 777 /tmp/huggingface && \
21
+ chmod 777 /tmp/huggingface_cache && \
22
+ chmod 777 /tmp/transformers_cache && \
23
+ chmod 777 /tmp/datasets_cache
24
 
25
  COPY requirements.txt ./
26
  COPY src/ ./src/
TROUBLESHOOTING.md CHANGED
@@ -95,4 +95,34 @@ The app automatically sets these environment variables:
95
  - `HOME=/tmp/docling_temp` (or fallback)
96
  - `XDG_CACHE_HOME=/tmp/cache` (or fallback)
97
  - `XDG_CONFIG_HOME=/tmp/config` (or fallback)
98
- - `XDG_DATA_HOME=/tmp/data` (or fallback)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  - `HOME=/tmp/docling_temp` (or fallback)
96
  - `XDG_CACHE_HOME=/tmp/cache` (or fallback)
97
  - `XDG_CONFIG_HOME=/tmp/config` (or fallback)
98
+ - `XDG_DATA_HOME=/tmp/data` (or fallback)
99
+
100
+ ### Hugging Face Hub Permission Errors
101
+
102
+ If you encounter Hugging Face Hub permission errors like:
103
+ ```
104
+ PermissionError: [Errno 13] Permission denied: '/.cache'
105
+ ```
106
+
107
+ The app now handles these by:
108
+ 1. Setting `HF_HOME`, `HF_CACHE_HOME`, `TRANSFORMERS_CACHE`, and `HF_DATASETS_CACHE` to writable directories
109
+ 2. Creating all necessary Hugging Face cache directories with proper permissions
110
+ 3. Using fallback directories if the primary ones fail
111
+
112
+ ### Environment Variables
113
+
114
+ The app automatically sets these environment variables:
115
+ - `STREAMLIT_SERVER_FILE_WATCHER_TYPE=none`
116
+ - `STREAMLIT_SERVER_HEADLESS=true`
117
+ - `STREAMLIT_BROWSER_GATHER_USAGE_STATS=false`
118
+ - `STREAMLIT_SERVER_ENABLE_CORS=false`
119
+ - `STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION=false`
120
+ - `EASYOCR_MODULE_PATH=/tmp/easyocr_models` (or fallback)
121
+ - `HOME=/tmp/docling_temp` (or fallback)
122
+ - `XDG_CACHE_HOME=/tmp/cache` (or fallback)
123
+ - `XDG_CONFIG_HOME=/tmp/config` (or fallback)
124
+ - `XDG_DATA_HOME=/tmp/data` (or fallback)
125
+ - `HF_HOME=/tmp/huggingface` (or fallback)
126
+ - `HF_CACHE_HOME=/tmp/huggingface_cache` (or fallback)
127
+ - `TRANSFORMERS_CACHE=/tmp/transformers_cache` (or fallback)
128
+ - `HF_DATASETS_CACHE=/tmp/datasets_cache` (or fallback)
src/streamlit_app.py CHANGED
@@ -97,6 +97,29 @@ for env_var in ['XDG_CACHE_HOME', 'XDG_CONFIG_HOME', 'XDG_DATA_HOME']:
97
  except Exception as e:
98
  logging.warning(f"Could not create directory for {env_var}: {e}")
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  # Log startup information
101
  logging.info("=" * 50)
102
  logging.info("Docling Streamlit App Starting")
 
97
  except Exception as e:
98
  logging.warning(f"Could not create directory for {env_var}: {e}")
99
 
100
+ # Configure Hugging Face Hub to use writable directories
101
+ os.environ['HF_HOME'] = os.path.join(TEMP_DIR, 'huggingface')
102
+ os.environ['HF_CACHE_HOME'] = os.path.join(TEMP_DIR, 'huggingface_cache')
103
+ os.environ['TRANSFORMERS_CACHE'] = os.path.join(TEMP_DIR, 'transformers_cache')
104
+ os.environ['HF_DATASETS_CACHE'] = os.path.join(TEMP_DIR, 'datasets_cache')
105
+
106
+ # Create Hugging Face directories
107
+ hf_dirs = ['HF_HOME', 'HF_CACHE_HOME', 'TRANSFORMERS_CACHE', 'HF_DATASETS_CACHE']
108
+ for env_var in hf_dirs:
109
+ try:
110
+ os.makedirs(os.environ[env_var], exist_ok=True)
111
+ logging.info(f"Created Hugging Face directory for {env_var}: {os.environ[env_var]}")
112
+ except Exception as e:
113
+ logging.warning(f"Could not create Hugging Face directory for {env_var}: {e}")
114
+ # Fallback to /tmp if the temp directory fails
115
+ fallback_path = os.path.join('/tmp', env_var.lower())
116
+ os.environ[env_var] = fallback_path
117
+ try:
118
+ os.makedirs(fallback_path, exist_ok=True)
119
+ logging.info(f"Using fallback Hugging Face directory for {env_var}: {fallback_path}")
120
+ except Exception as e2:
121
+ logging.error(f"Failed to create fallback Hugging Face directory for {env_var}: {e2}")
122
+
123
  # Log startup information
124
  logging.info("=" * 50)
125
  logging.info("Docling Streamlit App Starting")