shayan5422 commited on
Commit
de80732
·
verified ·
1 Parent(s): a469ee1

Upload 9 files

Browse files
Files changed (3) hide show
  1. Dockerfile +11 -2
  2. app.py +27 -7
  3. web_api.py +96 -27
Dockerfile CHANGED
@@ -18,8 +18,16 @@ COPY web_api.py .
18
  COPY converter.py .
19
  COPY preserve_linebreaks.lua .
20
 
21
- # Create necessary directories
22
- RUN mkdir -p temp/uploads temp/outputs
 
 
 
 
 
 
 
 
23
 
24
  # Expose port
25
  EXPOSE 7860
@@ -27,6 +35,7 @@ EXPOSE 7860
27
  # Set environment variables
28
  ENV PYTHONPATH=/app
29
  ENV PORT=7860
 
30
 
31
  # Run the application
32
  CMD ["python", "app.py"]
 
18
  COPY converter.py .
19
  COPY preserve_linebreaks.lua .
20
 
21
+ # Create a user for the application (better security)
22
+ RUN useradd -m -u 1000 appuser
23
+
24
+ # Create necessary directories with proper permissions
25
+ RUN mkdir -p /tmp/docx_converter && \
26
+ chmod 777 /tmp/docx_converter && \
27
+ chown -R appuser:appuser /app /tmp/docx_converter
28
+
29
+ # Switch to the application user
30
+ USER appuser
31
 
32
  # Expose port
33
  EXPOSE 7860
 
35
  # Set environment variables
36
  ENV PYTHONPATH=/app
37
  ENV PORT=7860
38
+ ENV TMPDIR=/tmp/docx_converter
39
 
40
  # Run the application
41
  CMD ["python", "app.py"]
app.py CHANGED
@@ -1,17 +1,32 @@
1
  #!/usr/bin/env python3
2
  """
3
  DOCX to LaTeX Converter API
4
- Main entry point for Hugging Face Spaces deployment
5
  """
6
 
7
  import os
8
  import sys
 
9
 
10
  # Set up environment for Hugging Face Spaces
11
  if 'SPACE_ID' in os.environ:
12
  # Running on Hugging Face Spaces
13
  PORT = int(os.environ.get('PORT', 7860))
14
  HOST = '0.0.0.0'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  else:
16
  # Running locally
17
  PORT = 5001
@@ -24,10 +39,15 @@ if __name__ == "__main__":
24
  print(f"🚀 Starting DOCX to LaTeX Converter API")
25
  print(f"🌐 Server running on http://{HOST}:{PORT}")
26
  print(f"📖 Health check: http://{HOST}:{PORT}/api/health")
27
- print(f"📚 API Documentation: https://huggingface.co/spaces/YOUR_USERNAME/docx-to-latex")
28
 
29
- app.run(
30
- host=HOST,
31
- port=PORT,
32
- debug=False # Disable debug in production
33
- )
 
 
 
 
 
 
1
  #!/usr/bin/env python3
2
  """
3
  DOCX to LaTeX Converter API
4
+ Main entry point for Hugging Face Spaces deployment with improved file handling
5
  """
6
 
7
  import os
8
  import sys
9
+ import tempfile
10
 
11
  # Set up environment for Hugging Face Spaces
12
  if 'SPACE_ID' in os.environ:
13
  # Running on Hugging Face Spaces
14
  PORT = int(os.environ.get('PORT', 7860))
15
  HOST = '0.0.0.0'
16
+
17
+ # Ensure we have a writable temp directory
18
+ temp_dir = os.environ.get('TMPDIR', tempfile.gettempdir())
19
+ print(f"📁 Using temp directory: {temp_dir}")
20
+
21
+ # Test write permissions
22
+ try:
23
+ test_file = os.path.join(temp_dir, 'test_write.txt')
24
+ with open(test_file, 'w') as f:
25
+ f.write('test')
26
+ os.unlink(test_file)
27
+ print("✅ Write permissions confirmed")
28
+ except Exception as e:
29
+ print(f"⚠️ Write permission test failed: {e}")
30
  else:
31
  # Running locally
32
  PORT = 5001
 
39
  print(f"🚀 Starting DOCX to LaTeX Converter API")
40
  print(f"🌐 Server running on http://{HOST}:{PORT}")
41
  print(f"📖 Health check: http://{HOST}:{PORT}/api/health")
42
+ print(f"📚 API Documentation: https://huggingface.co/spaces/shayan5422/Docx_to_latex")
43
 
44
+ # Enable proper error handling for file operations
45
+ try:
46
+ app.run(
47
+ host=HOST,
48
+ port=PORT,
49
+ debug=False # Disable debug in production
50
+ )
51
+ except Exception as e:
52
+ print(f"❌ Failed to start server: {e}")
53
+ sys.exit(1)
web_api.py CHANGED
@@ -6,18 +6,33 @@ import uuid
6
  from werkzeug.utils import secure_filename
7
  from converter import convert_docx_to_latex
8
  import shutil
 
9
 
10
  app = Flask(__name__)
11
  CORS(app) # Enable CORS for all routes
12
 
13
  # Configuration
14
  app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size
15
- UPLOAD_FOLDER = 'temp/uploads'
16
- OUTPUT_FOLDER = 'temp/outputs'
17
 
18
- # Ensure directories exist
19
- os.makedirs(UPLOAD_FOLDER, exist_ok=True)
20
- os.makedirs(OUTPUT_FOLDER, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  # Store conversion tasks
23
  conversion_tasks = {}
@@ -25,7 +40,13 @@ conversion_tasks = {}
25
  @app.route('/api/health', methods=['GET'])
26
  def health_check():
27
  """Health check endpoint"""
28
- return jsonify({'status': 'healthy', 'message': 'DOCX to LaTeX API is running'})
 
 
 
 
 
 
29
 
30
  @app.route('/api/upload', methods=['POST'])
31
  def upload_file():
@@ -44,26 +65,51 @@ def upload_file():
44
  # Generate unique task ID
45
  task_id = str(uuid.uuid4())
46
 
47
- # Save uploaded file
48
  filename = secure_filename(file.filename)
49
- file_path = os.path.join(UPLOAD_FOLDER, f"{task_id}_{filename}")
50
- file.save(file_path)
51
-
52
- # Store task info
53
- conversion_tasks[task_id] = {
54
- 'status': 'uploaded',
55
- 'original_filename': filename,
56
- 'file_path': file_path,
57
- 'output_filename': filename.replace('.docx', '.tex'),
58
- 'created_at': os.path.getctime(file_path)
59
- }
60
 
61
- return jsonify({
62
- 'task_id': task_id,
63
- 'filename': filename,
64
- 'status': 'uploaded',
65
- 'message': 'File uploaded successfully'
66
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  except Exception as e:
69
  return jsonify({'error': f'Upload failed: {str(e)}'}), 500
@@ -95,9 +141,18 @@ def convert_document():
95
  task['status'] = 'converting'
96
  task['output_filename'] = output_filename
97
 
98
- # Prepare output paths
99
- output_path = os.path.join(OUTPUT_FOLDER, f"{task_id}_{output_filename}")
100
- media_path = os.path.join(OUTPUT_FOLDER, f"{task_id}_media")
 
 
 
 
 
 
 
 
 
101
 
102
  # Perform conversion
103
  success, message = convert_docx_to_latex(
@@ -425,12 +480,26 @@ def cleanup_old_files():
425
  except Exception as e:
426
  print(f"Warning: Failed to cleanup old files: {e}")
427
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
  if __name__ == '__main__':
429
  # Cleanup old files on startup
430
  cleanup_old_files()
431
 
432
  # Run the Flask app
433
  print("Starting DOCX to LaTeX API server...")
 
434
  print("API endpoints:")
435
  print(" POST /api/upload - Upload DOCX file")
436
  print(" POST /api/convert - Convert to LaTeX")
 
6
  from werkzeug.utils import secure_filename
7
  from converter import convert_docx_to_latex
8
  import shutil
9
+ import stat
10
 
11
  app = Flask(__name__)
12
  CORS(app) # Enable CORS for all routes
13
 
14
  # Configuration
15
  app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size
 
 
16
 
17
+ # Use system temp directory for better compatibility with Hugging Face Spaces
18
+ TEMP_BASE_DIR = tempfile.mkdtemp(prefix='docx_converter_')
19
+ UPLOAD_FOLDER = os.path.join(TEMP_BASE_DIR, 'uploads')
20
+ OUTPUT_FOLDER = os.path.join(TEMP_BASE_DIR, 'outputs')
21
+
22
+ # Ensure directories exist with proper permissions
23
+ def create_temp_dirs():
24
+ """Create temporary directories with proper permissions"""
25
+ for directory in [UPLOAD_FOLDER, OUTPUT_FOLDER]:
26
+ os.makedirs(directory, exist_ok=True)
27
+ # Set full permissions for the directory
28
+ try:
29
+ os.chmod(directory, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
30
+ except OSError:
31
+ # If chmod fails, continue anyway (some systems don't allow it)
32
+ pass
33
+
34
+ # Create directories on startup
35
+ create_temp_dirs()
36
 
37
  # Store conversion tasks
38
  conversion_tasks = {}
 
40
  @app.route('/api/health', methods=['GET'])
41
  def health_check():
42
  """Health check endpoint"""
43
+ return jsonify({
44
+ 'status': 'healthy',
45
+ 'message': 'DOCX to LaTeX API is running',
46
+ 'temp_dir': TEMP_BASE_DIR,
47
+ 'upload_dir': UPLOAD_FOLDER,
48
+ 'output_dir': OUTPUT_FOLDER
49
+ })
50
 
51
  @app.route('/api/upload', methods=['POST'])
52
  def upload_file():
 
65
  # Generate unique task ID
66
  task_id = str(uuid.uuid4())
67
 
68
+ # Save uploaded file using tempfile for better compatibility
69
  filename = secure_filename(file.filename)
 
 
 
 
 
 
 
 
 
 
 
70
 
71
+ # Create a temporary file instead of using a fixed path
72
+ temp_fd, temp_path = tempfile.mkstemp(
73
+ suffix=f'_{filename}',
74
+ prefix=f'{task_id}_',
75
+ dir=UPLOAD_FOLDER
76
+ )
77
+
78
+ try:
79
+ # Close the file descriptor and save the file
80
+ os.close(temp_fd)
81
+ file.save(temp_path)
82
+
83
+ # Set proper permissions on the file
84
+ try:
85
+ os.chmod(temp_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP)
86
+ except OSError:
87
+ # If chmod fails, continue anyway
88
+ pass
89
+
90
+ # Store task info
91
+ conversion_tasks[task_id] = {
92
+ 'status': 'uploaded',
93
+ 'original_filename': filename,
94
+ 'file_path': temp_path,
95
+ 'output_filename': filename.replace('.docx', '.tex'),
96
+ 'created_at': os.path.getctime(temp_path)
97
+ }
98
+
99
+ return jsonify({
100
+ 'task_id': task_id,
101
+ 'filename': filename,
102
+ 'status': 'uploaded',
103
+ 'message': 'File uploaded successfully'
104
+ })
105
+
106
+ except Exception as e:
107
+ # Clean up the temp file if something goes wrong
108
+ try:
109
+ os.unlink(temp_path)
110
+ except:
111
+ pass
112
+ raise e
113
 
114
  except Exception as e:
115
  return jsonify({'error': f'Upload failed: {str(e)}'}), 500
 
141
  task['status'] = 'converting'
142
  task['output_filename'] = output_filename
143
 
144
+ # Prepare output paths using tempfile for better compatibility
145
+ output_fd, output_path = tempfile.mkstemp(
146
+ suffix=f'_{output_filename}',
147
+ prefix=f'{task_id}_',
148
+ dir=OUTPUT_FOLDER
149
+ )
150
+ os.close(output_fd) # Close file descriptor, we'll write to the path directly
151
+
152
+ media_path = tempfile.mkdtemp(
153
+ prefix=f'{task_id}_media_',
154
+ dir=OUTPUT_FOLDER
155
+ )
156
 
157
  # Perform conversion
158
  success, message = convert_docx_to_latex(
 
480
  except Exception as e:
481
  print(f"Warning: Failed to cleanup old files: {e}")
482
 
483
+ # Add cleanup on application exit
484
+ import atexit
485
+
486
+ def cleanup_on_exit():
487
+ """Clean up temporary directory on exit"""
488
+ try:
489
+ shutil.rmtree(TEMP_BASE_DIR)
490
+ print(f"Cleaned up temporary directory: {TEMP_BASE_DIR}")
491
+ except OSError:
492
+ pass
493
+
494
+ atexit.register(cleanup_on_exit)
495
+
496
  if __name__ == '__main__':
497
  # Cleanup old files on startup
498
  cleanup_old_files()
499
 
500
  # Run the Flask app
501
  print("Starting DOCX to LaTeX API server...")
502
+ print(f"Using temporary directory: {TEMP_BASE_DIR}")
503
  print("API endpoints:")
504
  print(" POST /api/upload - Upload DOCX file")
505
  print(" POST /api/convert - Convert to LaTeX")