File size: 6,065 Bytes
479ced5
 
98aae70
 
479ced5
 
 
 
98aae70
479ced5
98aae70
 
 
 
 
479ced5
98aae70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479ced5
 
98aae70
 
 
479ced5
98aae70
 
479ced5
98aae70
 
 
 
 
 
 
479ced5
98aae70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479ced5
98aae70
 
 
 
 
479ced5
98aae70
 
 
 
 
 
 
 
479ced5
98aae70
 
 
 
 
 
 
 
 
479ced5
 
98aae70
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/usr/bin/env python3
"""
Test script to verify environment variables and cache directory permissions.
This should be run before the main application to ensure everything is set up correctly.
"""

import os
import tempfile
import sys

def test_environment_setup():
    """Test that environment variables are set correctly."""
    print("=" * 60)
    print("Testing Environment Setup")
    print("=" * 60)
    
    # Check critical environment variables
    critical_vars = [
        'HF_HOME',
        'HF_CACHE_HOME', 
        'HF_HUB_CACHE',
        'TRANSFORMERS_CACHE',
        'HF_DATASETS_CACHE',
        'TEMP_DIR',
        'HOME',
        'TMPDIR'
    ]
    
    all_good = True
    for var in critical_vars:
        value = os.environ.get(var)
        if value:
            print(f"βœ… {var}: {value}")
        else:
            print(f"❌ {var}: NOT SET")
            all_good = False
    
    return all_good

def test_cache_directories():
    """Test that cache directories can be created and accessed."""
    print("\n" + "=" * 60)
    print("Testing Cache Directory Access")
    print("=" * 60)
    
    cache_dirs = [
        os.environ.get('HF_HOME', '/tmp/docling_temp/huggingface'),
        os.environ.get('HF_CACHE_HOME', '/tmp/docling_temp/huggingface_cache'),
        os.environ.get('HF_HUB_CACHE', '/tmp/docling_temp/huggingface_cache'),
        os.environ.get('TRANSFORMERS_CACHE', '/tmp/docling_temp/transformers_cache'),
        os.environ.get('HF_DATASETS_CACHE', '/tmp/docling_temp/datasets_cache'),
        os.environ.get('TORCH_HOME', '/tmp/docling_temp/torch'),
        os.environ.get('TENSORFLOW_HOME', '/tmp/docling_temp/tensorflow'),
        os.environ.get('KERAS_HOME', '/tmp/docling_temp/keras'),
    ]
    
    all_good = True
    for cache_dir in cache_dirs:
        try:
            os.makedirs(cache_dir, exist_ok=True)
            # Test writing a file
            test_file = os.path.join(cache_dir, 'test_write.txt')
            with open(test_file, 'w') as f:
                f.write('test')
            os.remove(test_file)
            print(f"βœ… {cache_dir}: WRITABLE")
        except Exception as e:
            print(f"❌ {cache_dir}: ERROR - {e}")
            all_good = False
    
    return all_good

def test_root_filesystem_access():
    """Test that we cannot access root filesystem."""
    print("\n" + "=" * 60)
    print("Testing Root Filesystem Access Prevention")
    print("=" * 60)
    
    root_paths = [
        '/.cache',
        '/root',
        '/etc/test',
        '/var/test'
    ]
    
    all_good = True
    for path in root_paths:
        try:
            os.makedirs(path, exist_ok=True)
            print(f"❌ {path}: SUCCESSFULLY CREATED (SHOULD FAIL)")
            all_good = False
        except PermissionError:
            print(f"βœ… {path}: PERMISSION DENIED (GOOD)")
        except Exception as e:
            print(f"⚠️  {path}: OTHER ERROR - {e}")
    
    return all_good

def test_temp_directory():
    """Test temp directory access."""
    print("\n" + "=" * 60)
    print("Testing Temp Directory Access")
    print("=" * 60)
    
    temp_dir = os.environ.get('TEMP_DIR', '/tmp/docling_temp')
    try:
        os.makedirs(temp_dir, exist_ok=True)
        test_file = os.path.join(temp_dir, 'test_temp.txt')
        with open(test_file, 'w') as f:
            f.write('temp test')
        os.remove(test_file)
        print(f"βœ… {temp_dir}: WRITABLE")
        return True
    except Exception as e:
        print(f"❌ {temp_dir}: ERROR - {e}")
        return False

def main():
    """Run all tests."""
    print("Docling Environment and Permission Test")
    print("This script tests that the environment is set up correctly for Hugging Face Spaces")
    
    # Set environment variables if not already set
    if not os.environ.get('TEMP_DIR'):
        temp_dir = os.path.join(tempfile.gettempdir(), "docling_temp")
        os.environ.update({
            'TEMP_DIR': temp_dir,
            'HOME': temp_dir,
            'USERPROFILE': temp_dir,
            'TMPDIR': temp_dir,
            'TEMP': temp_dir,
            'TMP': temp_dir,
            'HF_HOME': os.path.join(temp_dir, 'huggingface'),
            'HF_CACHE_HOME': os.path.join(temp_dir, 'huggingface_cache'),
            'HF_HUB_CACHE': os.path.join(temp_dir, 'huggingface_cache'),
            'TRANSFORMERS_CACHE': os.path.join(temp_dir, 'transformers_cache'),
            'HF_DATASETS_CACHE': os.path.join(temp_dir, 'datasets_cache'),
            'DIFFUSERS_CACHE': os.path.join(temp_dir, 'diffusers_cache'),
            'ACCELERATE_CACHE': os.path.join(temp_dir, 'accelerate_cache'),
            'TORCH_HOME': os.path.join(temp_dir, 'torch'),
            'TENSORFLOW_HOME': os.path.join(temp_dir, 'tensorflow'),
            'KERAS_HOME': os.path.join(temp_dir, 'keras'),
            'XDG_CACHE_HOME': os.path.join(temp_dir, 'cache'),
            'XDG_CONFIG_HOME': os.path.join(temp_dir, 'config'),
            'XDG_DATA_HOME': os.path.join(temp_dir, 'data'),
        })
    
    # Run tests
    env_ok = test_environment_setup()
    cache_ok = test_cache_directories()
    root_ok = test_root_filesystem_access()
    temp_ok = test_temp_directory()
    
    # Summary
    print("\n" + "=" * 60)
    print("TEST SUMMARY")
    print("=" * 60)
    print(f"Environment Variables: {'βœ… PASS' if env_ok else '❌ FAIL'}")
    print(f"Cache Directories: {'βœ… PASS' if cache_ok else '❌ FAIL'}")
    print(f"Root Access Prevention: {'βœ… PASS' if root_ok else '❌ FAIL'}")
    print(f"Temp Directory: {'βœ… PASS' if temp_ok else '❌ FAIL'}")
    
    overall_success = env_ok and cache_ok and root_ok and temp_ok
    print(f"\nOverall Result: {'βœ… ALL TESTS PASSED' if overall_success else '❌ SOME TESTS FAILED'}")
    
    if not overall_success:
        print("\n⚠️  Some tests failed. Please check the environment setup.")
        sys.exit(1)
    else:
        print("\nπŸŽ‰ All tests passed! The environment is ready for Docling.")
        sys.exit(0)

if __name__ == "__main__":
    main()