Spaces:
Sleeping
Sleeping
Upload 9 files
Browse files- Dockerfile +11 -2
- app.py +27 -7
- web_api.py +96 -27
Dockerfile
CHANGED
@@ -18,8 +18,16 @@ COPY web_api.py .
|
|
18 |
COPY converter.py .
|
19 |
COPY preserve_linebreaks.lua .
|
20 |
|
21 |
-
# Create
|
22 |
-
RUN
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
# Expose port
|
25 |
EXPOSE 7860
|
@@ -27,6 +35,7 @@ EXPOSE 7860
|
|
27 |
# Set environment variables
|
28 |
ENV PYTHONPATH=/app
|
29 |
ENV PORT=7860
|
|
|
30 |
|
31 |
# Run the application
|
32 |
CMD ["python", "app.py"]
|
|
|
18 |
COPY converter.py .
|
19 |
COPY preserve_linebreaks.lua .
|
20 |
|
21 |
+
# Create a user for the application (better security)
|
22 |
+
RUN useradd -m -u 1000 appuser
|
23 |
+
|
24 |
+
# Create necessary directories with proper permissions
|
25 |
+
RUN mkdir -p /tmp/docx_converter && \
|
26 |
+
chmod 777 /tmp/docx_converter && \
|
27 |
+
chown -R appuser:appuser /app /tmp/docx_converter
|
28 |
+
|
29 |
+
# Switch to the application user
|
30 |
+
USER appuser
|
31 |
|
32 |
# Expose port
|
33 |
EXPOSE 7860
|
|
|
35 |
# Set environment variables
|
36 |
ENV PYTHONPATH=/app
|
37 |
ENV PORT=7860
|
38 |
+
ENV TMPDIR=/tmp/docx_converter
|
39 |
|
40 |
# Run the application
|
41 |
CMD ["python", "app.py"]
|
app.py
CHANGED
@@ -1,17 +1,32 @@
|
|
1 |
#!/usr/bin/env python3
|
2 |
"""
|
3 |
DOCX to LaTeX Converter API
|
4 |
-
Main entry point for Hugging Face Spaces deployment
|
5 |
"""
|
6 |
|
7 |
import os
|
8 |
import sys
|
|
|
9 |
|
10 |
# Set up environment for Hugging Face Spaces
|
11 |
if 'SPACE_ID' in os.environ:
|
12 |
# Running on Hugging Face Spaces
|
13 |
PORT = int(os.environ.get('PORT', 7860))
|
14 |
HOST = '0.0.0.0'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
else:
|
16 |
# Running locally
|
17 |
PORT = 5001
|
@@ -24,10 +39,15 @@ if __name__ == "__main__":
|
|
24 |
print(f"🚀 Starting DOCX to LaTeX Converter API")
|
25 |
print(f"🌐 Server running on http://{HOST}:{PORT}")
|
26 |
print(f"📖 Health check: http://{HOST}:{PORT}/api/health")
|
27 |
-
print(f"📚 API Documentation: https://huggingface.co/spaces/
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
#!/usr/bin/env python3
|
2 |
"""
|
3 |
DOCX to LaTeX Converter API
|
4 |
+
Main entry point for Hugging Face Spaces deployment with improved file handling
|
5 |
"""
|
6 |
|
7 |
import os
|
8 |
import sys
|
9 |
+
import tempfile
|
10 |
|
11 |
# Set up environment for Hugging Face Spaces
|
12 |
if 'SPACE_ID' in os.environ:
|
13 |
# Running on Hugging Face Spaces
|
14 |
PORT = int(os.environ.get('PORT', 7860))
|
15 |
HOST = '0.0.0.0'
|
16 |
+
|
17 |
+
# Ensure we have a writable temp directory
|
18 |
+
temp_dir = os.environ.get('TMPDIR', tempfile.gettempdir())
|
19 |
+
print(f"📁 Using temp directory: {temp_dir}")
|
20 |
+
|
21 |
+
# Test write permissions
|
22 |
+
try:
|
23 |
+
test_file = os.path.join(temp_dir, 'test_write.txt')
|
24 |
+
with open(test_file, 'w') as f:
|
25 |
+
f.write('test')
|
26 |
+
os.unlink(test_file)
|
27 |
+
print("✅ Write permissions confirmed")
|
28 |
+
except Exception as e:
|
29 |
+
print(f"⚠️ Write permission test failed: {e}")
|
30 |
else:
|
31 |
# Running locally
|
32 |
PORT = 5001
|
|
|
39 |
print(f"🚀 Starting DOCX to LaTeX Converter API")
|
40 |
print(f"🌐 Server running on http://{HOST}:{PORT}")
|
41 |
print(f"📖 Health check: http://{HOST}:{PORT}/api/health")
|
42 |
+
print(f"📚 API Documentation: https://huggingface.co/spaces/shayan5422/Docx_to_latex")
|
43 |
|
44 |
+
# Enable proper error handling for file operations
|
45 |
+
try:
|
46 |
+
app.run(
|
47 |
+
host=HOST,
|
48 |
+
port=PORT,
|
49 |
+
debug=False # Disable debug in production
|
50 |
+
)
|
51 |
+
except Exception as e:
|
52 |
+
print(f"❌ Failed to start server: {e}")
|
53 |
+
sys.exit(1)
|
web_api.py
CHANGED
@@ -6,18 +6,33 @@ import uuid
|
|
6 |
from werkzeug.utils import secure_filename
|
7 |
from converter import convert_docx_to_latex
|
8 |
import shutil
|
|
|
9 |
|
10 |
app = Flask(__name__)
|
11 |
CORS(app) # Enable CORS for all routes
|
12 |
|
13 |
# Configuration
|
14 |
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size
|
15 |
-
UPLOAD_FOLDER = 'temp/uploads'
|
16 |
-
OUTPUT_FOLDER = 'temp/outputs'
|
17 |
|
18 |
-
#
|
19 |
-
|
20 |
-
os.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
# Store conversion tasks
|
23 |
conversion_tasks = {}
|
@@ -25,7 +40,13 @@ conversion_tasks = {}
|
|
25 |
@app.route('/api/health', methods=['GET'])
|
26 |
def health_check():
|
27 |
"""Health check endpoint"""
|
28 |
-
return jsonify({
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
@app.route('/api/upload', methods=['POST'])
|
31 |
def upload_file():
|
@@ -44,26 +65,51 @@ def upload_file():
|
|
44 |
# Generate unique task ID
|
45 |
task_id = str(uuid.uuid4())
|
46 |
|
47 |
-
# Save uploaded file
|
48 |
filename = secure_filename(file.filename)
|
49 |
-
file_path = os.path.join(UPLOAD_FOLDER, f"{task_id}_{filename}")
|
50 |
-
file.save(file_path)
|
51 |
-
|
52 |
-
# Store task info
|
53 |
-
conversion_tasks[task_id] = {
|
54 |
-
'status': 'uploaded',
|
55 |
-
'original_filename': filename,
|
56 |
-
'file_path': file_path,
|
57 |
-
'output_filename': filename.replace('.docx', '.tex'),
|
58 |
-
'created_at': os.path.getctime(file_path)
|
59 |
-
}
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
'filename'
|
64 |
-
'
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
except Exception as e:
|
69 |
return jsonify({'error': f'Upload failed: {str(e)}'}), 500
|
@@ -95,9 +141,18 @@ def convert_document():
|
|
95 |
task['status'] = 'converting'
|
96 |
task['output_filename'] = output_filename
|
97 |
|
98 |
-
# Prepare output paths
|
99 |
-
output_path =
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
# Perform conversion
|
103 |
success, message = convert_docx_to_latex(
|
@@ -425,12 +480,26 @@ def cleanup_old_files():
|
|
425 |
except Exception as e:
|
426 |
print(f"Warning: Failed to cleanup old files: {e}")
|
427 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
428 |
if __name__ == '__main__':
|
429 |
# Cleanup old files on startup
|
430 |
cleanup_old_files()
|
431 |
|
432 |
# Run the Flask app
|
433 |
print("Starting DOCX to LaTeX API server...")
|
|
|
434 |
print("API endpoints:")
|
435 |
print(" POST /api/upload - Upload DOCX file")
|
436 |
print(" POST /api/convert - Convert to LaTeX")
|
|
|
6 |
from werkzeug.utils import secure_filename
|
7 |
from converter import convert_docx_to_latex
|
8 |
import shutil
|
9 |
+
import stat
|
10 |
|
11 |
app = Flask(__name__)
|
12 |
CORS(app) # Enable CORS for all routes
|
13 |
|
14 |
# Configuration
|
15 |
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size
|
|
|
|
|
16 |
|
17 |
+
# Use system temp directory for better compatibility with Hugging Face Spaces
|
18 |
+
TEMP_BASE_DIR = tempfile.mkdtemp(prefix='docx_converter_')
|
19 |
+
UPLOAD_FOLDER = os.path.join(TEMP_BASE_DIR, 'uploads')
|
20 |
+
OUTPUT_FOLDER = os.path.join(TEMP_BASE_DIR, 'outputs')
|
21 |
+
|
22 |
+
# Ensure directories exist with proper permissions
|
23 |
+
def create_temp_dirs():
|
24 |
+
"""Create temporary directories with proper permissions"""
|
25 |
+
for directory in [UPLOAD_FOLDER, OUTPUT_FOLDER]:
|
26 |
+
os.makedirs(directory, exist_ok=True)
|
27 |
+
# Set full permissions for the directory
|
28 |
+
try:
|
29 |
+
os.chmod(directory, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
|
30 |
+
except OSError:
|
31 |
+
# If chmod fails, continue anyway (some systems don't allow it)
|
32 |
+
pass
|
33 |
+
|
34 |
+
# Create directories on startup
|
35 |
+
create_temp_dirs()
|
36 |
|
37 |
# Store conversion tasks
|
38 |
conversion_tasks = {}
|
|
|
40 |
@app.route('/api/health', methods=['GET'])
|
41 |
def health_check():
|
42 |
"""Health check endpoint"""
|
43 |
+
return jsonify({
|
44 |
+
'status': 'healthy',
|
45 |
+
'message': 'DOCX to LaTeX API is running',
|
46 |
+
'temp_dir': TEMP_BASE_DIR,
|
47 |
+
'upload_dir': UPLOAD_FOLDER,
|
48 |
+
'output_dir': OUTPUT_FOLDER
|
49 |
+
})
|
50 |
|
51 |
@app.route('/api/upload', methods=['POST'])
|
52 |
def upload_file():
|
|
|
65 |
# Generate unique task ID
|
66 |
task_id = str(uuid.uuid4())
|
67 |
|
68 |
+
# Save uploaded file using tempfile for better compatibility
|
69 |
filename = secure_filename(file.filename)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
+
# Create a temporary file instead of using a fixed path
|
72 |
+
temp_fd, temp_path = tempfile.mkstemp(
|
73 |
+
suffix=f'_{filename}',
|
74 |
+
prefix=f'{task_id}_',
|
75 |
+
dir=UPLOAD_FOLDER
|
76 |
+
)
|
77 |
+
|
78 |
+
try:
|
79 |
+
# Close the file descriptor and save the file
|
80 |
+
os.close(temp_fd)
|
81 |
+
file.save(temp_path)
|
82 |
+
|
83 |
+
# Set proper permissions on the file
|
84 |
+
try:
|
85 |
+
os.chmod(temp_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP)
|
86 |
+
except OSError:
|
87 |
+
# If chmod fails, continue anyway
|
88 |
+
pass
|
89 |
+
|
90 |
+
# Store task info
|
91 |
+
conversion_tasks[task_id] = {
|
92 |
+
'status': 'uploaded',
|
93 |
+
'original_filename': filename,
|
94 |
+
'file_path': temp_path,
|
95 |
+
'output_filename': filename.replace('.docx', '.tex'),
|
96 |
+
'created_at': os.path.getctime(temp_path)
|
97 |
+
}
|
98 |
+
|
99 |
+
return jsonify({
|
100 |
+
'task_id': task_id,
|
101 |
+
'filename': filename,
|
102 |
+
'status': 'uploaded',
|
103 |
+
'message': 'File uploaded successfully'
|
104 |
+
})
|
105 |
+
|
106 |
+
except Exception as e:
|
107 |
+
# Clean up the temp file if something goes wrong
|
108 |
+
try:
|
109 |
+
os.unlink(temp_path)
|
110 |
+
except:
|
111 |
+
pass
|
112 |
+
raise e
|
113 |
|
114 |
except Exception as e:
|
115 |
return jsonify({'error': f'Upload failed: {str(e)}'}), 500
|
|
|
141 |
task['status'] = 'converting'
|
142 |
task['output_filename'] = output_filename
|
143 |
|
144 |
+
# Prepare output paths using tempfile for better compatibility
|
145 |
+
output_fd, output_path = tempfile.mkstemp(
|
146 |
+
suffix=f'_{output_filename}',
|
147 |
+
prefix=f'{task_id}_',
|
148 |
+
dir=OUTPUT_FOLDER
|
149 |
+
)
|
150 |
+
os.close(output_fd) # Close file descriptor, we'll write to the path directly
|
151 |
+
|
152 |
+
media_path = tempfile.mkdtemp(
|
153 |
+
prefix=f'{task_id}_media_',
|
154 |
+
dir=OUTPUT_FOLDER
|
155 |
+
)
|
156 |
|
157 |
# Perform conversion
|
158 |
success, message = convert_docx_to_latex(
|
|
|
480 |
except Exception as e:
|
481 |
print(f"Warning: Failed to cleanup old files: {e}")
|
482 |
|
483 |
+
# Add cleanup on application exit
|
484 |
+
import atexit
|
485 |
+
|
486 |
+
def cleanup_on_exit():
|
487 |
+
"""Clean up temporary directory on exit"""
|
488 |
+
try:
|
489 |
+
shutil.rmtree(TEMP_BASE_DIR)
|
490 |
+
print(f"Cleaned up temporary directory: {TEMP_BASE_DIR}")
|
491 |
+
except OSError:
|
492 |
+
pass
|
493 |
+
|
494 |
+
atexit.register(cleanup_on_exit)
|
495 |
+
|
496 |
if __name__ == '__main__':
|
497 |
# Cleanup old files on startup
|
498 |
cleanup_old_files()
|
499 |
|
500 |
# Run the Flask app
|
501 |
print("Starting DOCX to LaTeX API server...")
|
502 |
+
print(f"Using temporary directory: {TEMP_BASE_DIR}")
|
503 |
print("API endpoints:")
|
504 |
print(" POST /api/upload - Upload DOCX file")
|
505 |
print(" POST /api/convert - Convert to LaTeX")
|