Spaces:
Sleeping
Sleeping
# Base image with Python and common dependencies | |
FROM python:3.10-slim | |
# Set working directory | |
WORKDIR /app | |
# Set environment variables | |
ENV DEBIAN_FRONTEND=noninteractive | |
ENV PYTHONUNBUFFERED=1 | |
ENV PYTHONDONTWRITEBYTECODE=1 | |
# Copy requirements file and install Python dependencies | |
COPY requirements.txt requirements.txt | |
COPY extract_img_pdf.py extract_img_pdf.py | |
COPY templates/ /app/templates | |
COPY .env .env | |
# Install system dependencies | |
RUN apt-get update && apt-get install -y \ | |
build-essential \ | |
libglib2.0-0 \ | |
libsm6 \ | |
libxext6 \ | |
libxrender-dev \ | |
tesseract-ocr \ | |
poppler-utils \ | |
libgl1 \ | |
&& apt-get clean && rm -rf /var/lib/apt/lists/* | |
RUN pip install -r requirements.txt | |
# Fontconfig Warning Suppression (optional) | |
ENV FONTCONFIG_PATH=/etc/fonts | |
ENV FONTCONFIG_FILE=/etc/fonts/fonts.conf | |
# Set writable path for nltk data | |
ENV NLTK_DATA=/app/nltk_data | |
RUN mkdir -p /app/nltk_data | |
# Pre-download required NLTK models | |
RUN python -m nltk.downloader -d /app/nltk_data \ | |
punkt averaged_perceptron_tagger averaged_perceptron_tagger_eng | |
RUN mkdir -p /app/cache /app/data && chmod -R 777 /app/cache /app/data | |
RUN mkdir -p /app/OUTPUTS | |
RUN chmod -R 777 /app | |
# Expose the required port for HF Spaces | |
EXPOSE 7860 | |
# Set the command to run your Flask app | |
CMD ["python", "extract_img_pdf.py"] | |