File size: 1,871 Bytes
43aa272
 
 
 
 
 
ad61798
89561b9
5b68aa9
 
 
 
 
 
 
 
 
43aa272
 
ad61798
 
 
43aa272
 
 
 
 
 
 
 
 
 
 
 
 
ad61798
43aa272
 
ad61798
 
89561b9
ad61798
 
 
 
43aa272
 
 
 
ad61798
 
 
89561b9
 
ad61798
5b68aa9
 
 
 
 
43aa272
5b68aa9
 
89561b9
 
 
 
43aa272
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
FROM python:3.10-slim

# Install system dependencies
RUN apt-get update && apt-get install -y \
    wget \
    gnupg \
    fontconfig \
    build-essential \
    xvfb \
    libgbm1 \
    libnss3 \
    libxss1 \
    libasound2 \
    libxrandr2 \
    libatk1.0-0 \
    libgtk-3-0 \
    libxshmfence1 \
    && rm -rf /var/lib/apt/lists/*

# Create non-root user
RUN useradd -m -u 1000 crawler

# Install latest Chrome and its dependencies
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
    && echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list \
    && apt-get update \
    && apt-get install -y \
    google-chrome-stable \
    fonts-ipafont-gothic \
    fonts-wqy-zenhei \
    fonts-thai-tlwg \
    fonts-kacst \
    fonts-freefont-ttf \
    && rm -rf /var/lib/apt/lists/*

# Set up working directory and permissions
WORKDIR /app

# Create and set permissions for cache directories
RUN mkdir -p /home/crawler/.cache/fontconfig \
    && mkdir -p /home/crawler/.cache/pip \
    && mkdir -p /home/crawler/.crawl4ai \
    && chown -R crawler:crawler /home/crawler \
    && chown -R crawler:crawler /app

# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV GRADIO_SERVER_NAME=0.0.0.0
ENV GRADIO_SERVER_PORT=7860
ENV HOME=/home/crawler
ENV FONTCONFIG_PATH=/etc/fonts
ENV XDG_CACHE_HOME=/home/crawler/.cache
ENV PIP_CACHE_DIR=/home/crawler/.cache/pip
ENV PYTHONDONTWRITEBYTECODE=1

# Install Python dependencies and Playwright as root first
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt && \
    playwright install --with-deps chromium && \
    rm -rf /root/.cache/*

# Switch to non-root user
USER crawler

# Copy application code
COPY --chown=crawler:crawler . .

# Expose port
EXPOSE 7860

# Start the application
CMD ["python", "app.py"]