levalencia commited on
Commit
479ced5
Β·
1 Parent(s): 0633369

Enhance Dockerfile and Streamlit app for improved temporary file management

Browse files

- Added directory creation and permission settings in Dockerfile for temp files.
- Updated Streamlit app to use system temp directory, with fallback options for permission issues.
- Improved error handling and logging for temp file operations, ensuring better user feedback and stability.

Files changed (3) hide show
  1. Dockerfile +6 -0
  2. src/streamlit_app.py +85 -22
  3. test_permissions.py +80 -0
Dockerfile CHANGED
@@ -9,8 +9,14 @@ RUN apt-get update && apt-get install -y \
9
  git \
10
  && rm -rf /var/lib/apt/lists/*
11
 
 
 
 
 
 
12
  COPY requirements.txt ./
13
  COPY src/ ./src/
 
14
 
15
  RUN pip3 install -r requirements.txt
16
 
 
9
  git \
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
+ # Create necessary directories with proper permissions
13
+ RUN mkdir -p /app/.streamlit /tmp/docling_temp && \
14
+ chmod 755 /app/.streamlit && \
15
+ chmod 777 /tmp/docling_temp
16
+
17
  COPY requirements.txt ./
18
  COPY src/ ./src/
19
+ COPY .streamlit/ ./.streamlit/
20
 
21
  RUN pip3 install -r requirements.txt
22
 
src/streamlit_app.py CHANGED
@@ -13,6 +13,14 @@ import difflib
13
  import re
14
  import time
15
 
 
 
 
 
 
 
 
 
16
  # Load environment variables from .env
17
  load_dotenv()
18
 
@@ -21,9 +29,47 @@ AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
21
  AZURE_OPENAI_VERSION = os.getenv("AZURE_OPENAI_VERSION")
22
  AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT")
23
 
24
- # Create temp directory if it doesn't exist
25
- TEMP_DIR = "temp_files"
26
- os.makedirs(TEMP_DIR, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  def cleanup_temp_files():
29
  """Clean up temporary files in the temp directory."""
@@ -32,8 +78,18 @@ def cleanup_temp_files():
32
  for filename in os.listdir(TEMP_DIR):
33
  file_path = os.path.join(TEMP_DIR, filename)
34
  if os.path.isfile(file_path):
35
- os.remove(file_path)
 
 
 
 
 
 
36
  logging.info(f"Cleaned up temporary files in {TEMP_DIR}")
 
 
 
 
37
  except Exception as e:
38
  logging.warning(f"Error cleaning up temp files: {e}")
39
 
@@ -67,18 +123,29 @@ def clear_all_data():
67
 
68
  def get_temp_files_info():
69
  """Get information about temporary files (count and total size)."""
70
- if not os.path.exists(TEMP_DIR):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  return 0, 0
72
-
73
- files = os.listdir(TEMP_DIR)
74
- total_size = 0
75
-
76
- for filename in files:
77
- file_path = os.path.join(TEMP_DIR, filename)
78
- if os.path.isfile(file_path):
79
- total_size += os.path.getsize(file_path)
80
-
81
- return len(files), total_size
82
 
83
  def format_file_size(size_bytes):
84
  """Format file size in human readable format."""
@@ -103,17 +170,13 @@ def save_uploaded_file(uploaded_file, filename):
103
  f.write(file_bytes)
104
  logging.info(f"Saved uploaded file to {temp_path}")
105
  return temp_path
 
 
 
106
  except Exception as e:
107
  logging.error(f"Error saving uploaded file: {e}")
108
  raise
109
 
110
- logging.basicConfig(
111
- level=logging.INFO, # or DEBUG for more verbosity
112
- format="%(asctime)s %(levelname)s %(name)s: %(message)s",
113
- stream=sys.stdout,
114
- force=True
115
- )
116
-
117
  # Configure page layout to use wide mode
118
  st.set_page_config(
119
  page_title="Medical Document Parser & Redactor",
 
13
  import re
14
  import time
15
 
16
+ # Configure logging early to avoid issues
17
+ logging.basicConfig(
18
+ level=logging.INFO,
19
+ format="%(asctime)s %(levelname)s %(name)s: %(message)s",
20
+ stream=sys.stdout,
21
+ force=True
22
+ )
23
+
24
  # Load environment variables from .env
25
  load_dotenv()
26
 
 
29
  AZURE_OPENAI_VERSION = os.getenv("AZURE_OPENAI_VERSION")
30
  AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT")
31
 
32
+ # Use system temp directory instead of local directory to avoid permission issues
33
+ TEMP_DIR = os.path.join(tempfile.gettempdir(), "docling_temp")
34
+ try:
35
+ os.makedirs(TEMP_DIR, exist_ok=True)
36
+ logging.info(f"Using temp directory: {TEMP_DIR}")
37
+ except PermissionError as e:
38
+ logging.warning(f"Permission error creating temp dir {TEMP_DIR}: {e}")
39
+ # Fallback to a subdirectory of the current working directory if temp dir fails
40
+ TEMP_DIR = os.path.join(os.getcwd(), "temp_files")
41
+ try:
42
+ os.makedirs(TEMP_DIR, exist_ok=True)
43
+ logging.info(f"Using fallback temp directory: {TEMP_DIR}")
44
+ except PermissionError as e2:
45
+ logging.warning(f"Permission error creating fallback temp dir {TEMP_DIR}: {e2}")
46
+ # Last resort: use a directory that should be writable
47
+ TEMP_DIR = "/tmp/docling_temp"
48
+ try:
49
+ os.makedirs(TEMP_DIR, exist_ok=True)
50
+ logging.info(f"Using last resort temp directory: {TEMP_DIR}")
51
+ except Exception as e3:
52
+ logging.error(f"Failed to create any temp directory: {e3}")
53
+ # Use current directory as absolute last resort
54
+ TEMP_DIR = "."
55
+ logging.warning(f"Using current directory as temp: {TEMP_DIR}")
56
+
57
+ # Configure Streamlit to use writable directories
58
+ os.environ['STREAMLIT_SERVER_FILE_WATCHER_TYPE'] = 'none'
59
+ os.environ['STREAMLIT_SERVER_HEADLESS'] = 'true'
60
+ os.environ['STREAMLIT_BROWSER_GATHER_USAGE_STATS'] = 'false'
61
+
62
+ # Additional environment variables for Hugging Face deployment
63
+ os.environ['STREAMLIT_SERVER_ENABLE_CORS'] = 'false'
64
+ os.environ['STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION'] = 'false'
65
+
66
+ # Log startup information
67
+ logging.info("=" * 50)
68
+ logging.info("Docling Streamlit App Starting")
69
+ logging.info(f"Temp directory: {TEMP_DIR}")
70
+ logging.info(f"Current working directory: {os.getcwd()}")
71
+ logging.info(f"Python version: {sys.version}")
72
+ logging.info("=" * 50)
73
 
74
  def cleanup_temp_files():
75
  """Clean up temporary files in the temp directory."""
 
78
  for filename in os.listdir(TEMP_DIR):
79
  file_path = os.path.join(TEMP_DIR, filename)
80
  if os.path.isfile(file_path):
81
+ try:
82
+ os.remove(file_path)
83
+ logging.info(f"Removed temp file: {filename}")
84
+ except PermissionError as e:
85
+ logging.warning(f"Permission error removing {filename}: {e}")
86
+ except Exception as e:
87
+ logging.warning(f"Error removing {filename}: {e}")
88
  logging.info(f"Cleaned up temporary files in {TEMP_DIR}")
89
+ else:
90
+ logging.info(f"Temp directory {TEMP_DIR} does not exist")
91
+ except PermissionError as e:
92
+ logging.warning(f"Permission error accessing temp directory {TEMP_DIR}: {e}")
93
  except Exception as e:
94
  logging.warning(f"Error cleaning up temp files: {e}")
95
 
 
123
 
124
  def get_temp_files_info():
125
  """Get information about temporary files (count and total size)."""
126
+ try:
127
+ if not os.path.exists(TEMP_DIR):
128
+ return 0, 0
129
+
130
+ files = os.listdir(TEMP_DIR)
131
+ total_size = 0
132
+
133
+ for filename in files:
134
+ try:
135
+ file_path = os.path.join(TEMP_DIR, filename)
136
+ if os.path.isfile(file_path):
137
+ total_size += os.path.getsize(file_path)
138
+ except (PermissionError, OSError) as e:
139
+ logging.warning(f"Error accessing file {filename}: {e}")
140
+ continue
141
+
142
+ return len(files), total_size
143
+ except PermissionError as e:
144
+ logging.warning(f"Permission error accessing temp directory {TEMP_DIR}: {e}")
145
+ return 0, 0
146
+ except Exception as e:
147
+ logging.warning(f"Error getting temp files info: {e}")
148
  return 0, 0
 
 
 
 
 
 
 
 
 
 
149
 
150
  def format_file_size(size_bytes):
151
  """Format file size in human readable format."""
 
170
  f.write(file_bytes)
171
  logging.info(f"Saved uploaded file to {temp_path}")
172
  return temp_path
173
+ except PermissionError as e:
174
+ logging.error(f"Permission error saving uploaded file to {temp_path}: {e}")
175
+ raise PermissionError(f"Cannot save file due to permission restrictions. Please try clearing data or contact support.")
176
  except Exception as e:
177
  logging.error(f"Error saving uploaded file: {e}")
178
  raise
179
 
 
 
 
 
 
 
 
180
  # Configure page layout to use wide mode
181
  st.set_page_config(
182
  page_title="Medical Document Parser & Redactor",
test_permissions.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script to check permissions and directory creation in Hugging Face environment.
4
+ """
5
+
6
+ import os
7
+ import tempfile
8
+ import logging
9
+
10
+ def test_permissions():
11
+ """Test if we can create directories and files in various locations."""
12
+ print("=== Testing Permissions ===")
13
+
14
+ # Test 1: System temp directory
15
+ print("\n1. Testing system temp directory...")
16
+ try:
17
+ temp_dir = os.path.join(tempfile.gettempdir(), "docling_test")
18
+ os.makedirs(temp_dir, exist_ok=True)
19
+ test_file = os.path.join(temp_dir, "test.txt")
20
+ with open(test_file, "w") as f:
21
+ f.write("test")
22
+ os.remove(test_file)
23
+ os.rmdir(temp_dir)
24
+ print(f"βœ… Success: {temp_dir}")
25
+ except Exception as e:
26
+ print(f"❌ Failed: {e}")
27
+
28
+ # Test 2: Current working directory
29
+ print("\n2. Testing current working directory...")
30
+ try:
31
+ cwd = os.getcwd()
32
+ test_dir = os.path.join(cwd, "test_temp")
33
+ os.makedirs(test_dir, exist_ok=True)
34
+ test_file = os.path.join(test_dir, "test.txt")
35
+ with open(test_file, "w") as f:
36
+ f.write("test")
37
+ os.remove(test_file)
38
+ os.rmdir(test_dir)
39
+ print(f"βœ… Success: {test_dir}")
40
+ except Exception as e:
41
+ print(f"❌ Failed: {e}")
42
+
43
+ # Test 3: /tmp directory
44
+ print("\n3. Testing /tmp directory...")
45
+ try:
46
+ tmp_dir = "/tmp/docling_test"
47
+ os.makedirs(tmp_dir, exist_ok=True)
48
+ test_file = os.path.join(tmp_dir, "test.txt")
49
+ with open(test_file, "w") as f:
50
+ f.write("test")
51
+ os.remove(test_file)
52
+ os.rmdir(tmp_dir)
53
+ print(f"βœ… Success: {tmp_dir}")
54
+ except Exception as e:
55
+ print(f"❌ Failed: {e}")
56
+
57
+ # Test 4: Environment variables
58
+ print("\n4. Testing environment variables...")
59
+ env_vars = [
60
+ 'STREAMLIT_SERVER_FILE_WATCHER_TYPE',
61
+ 'STREAMLIT_SERVER_HEADLESS',
62
+ 'STREAMLIT_BROWSER_GATHER_USAGE_STATS'
63
+ ]
64
+ for var in env_vars:
65
+ value = os.environ.get(var, 'NOT_SET')
66
+ print(f" {var}: {value}")
67
+
68
+ # Test 5: Current directory permissions
69
+ print("\n5. Testing current directory permissions...")
70
+ try:
71
+ cwd = os.getcwd()
72
+ print(f" Current directory: {cwd}")
73
+ print(f" Readable: {os.access(cwd, os.R_OK)}")
74
+ print(f" Writable: {os.access(cwd, os.W_OK)}")
75
+ print(f" Executable: {os.access(cwd, os.X_OK)}")
76
+ except Exception as e:
77
+ print(f"❌ Failed: {e}")
78
+
79
+ if __name__ == "__main__":
80
+ test_permissions()