File size: 1,336 Bytes
7ec6a6a
 
 
0d08e31
 
 
7ec6a6a
 
 
 
6eec827
 
 
 
 
 
 
7ec6a6a
 
 
 
 
 
 
 
 
 
 
 
 
e168ec6
 
 
 
 
 
0d08e31
e168ec6
3760b8e
 
 
7ec6a6a
e168ec6
 
 
 
7ec6a6a
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Base image with Python and common dependencies
FROM python:3.10-slim

# Set working directory
WORKDIR /app

# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1

# Copy requirements file and install Python dependencies
COPY requirements.txt requirements.txt 
COPY extract_img_pdf.py extract_img_pdf.py
COPY templates/ /app/templates 
COPY .env .env

# Install system dependencies
RUN apt-get update && apt-get install -y \
    build-essential \
    libglib2.0-0 \
    libsm6 \
    libxext6 \
    libxrender-dev \
    tesseract-ocr \
    poppler-utils \
    libgl1 \
    && apt-get clean && rm -rf /var/lib/apt/lists/*

RUN pip install -r requirements.txt
# Fontconfig Warning Suppression (optional)
ENV FONTCONFIG_PATH=/etc/fonts
ENV FONTCONFIG_FILE=/etc/fonts/fonts.conf

# Set writable path for nltk data
ENV NLTK_DATA=/app/nltk_data
RUN mkdir -p /app/nltk_data

# Pre-download required NLTK models
RUN python -m nltk.downloader -d /app/nltk_data \
    punkt averaged_perceptron_tagger averaged_perceptron_tagger_eng

RUN mkdir -p /app/cache /app/data && chmod -R 777 /app/cache /app/data
RUN mkdir -p /app/OUTPUTS
RUN chmod -R 777 /app

# Expose the required port for HF Spaces
EXPOSE 7860

# Set the command to run your Flask app
CMD ["python", "extract_img_pdf.py"]