Spaces:
Sleeping
Sleeping
FROM python:3.10-slim | |
# Set working directory | |
WORKDIR /app | |
# Install system dependencies | |
RUN apt-get update && apt-get install -y --no-install-recommends \ | |
build-essential \ | |
wget \ | |
curl \ | |
gnupg \ | |
&& rm -rf /var/lib/apt/lists/* | |
# Install MongoDB | |
RUN wget -qO - https://www.mongodb.org/static/pgp/server-6.0.asc | apt-key add - \ | |
&& echo "deb http://repo.mongodb.org/apt/debian buster/mongodb-org/6.0 main" | tee /etc/apt/sources.list.d/mongodb-org-6.0.list \ | |
&& apt-get update \ | |
&& apt-get install -y mongodb-org \ | |
&& mkdir -p /data/db \ | |
&& apt-get clean \ | |
&& rm -rf /var/lib/apt/lists/* | |
# Install Redis | |
RUN apt-get update && apt-get install -y --no-install-recommends \ | |
redis-server \ | |
&& apt-get clean \ | |
&& rm -rf /var/lib/apt/lists/* | |
# Copy requirements.txt | |
COPY requirements.txt . | |
# Install Python dependencies | |
RUN pip install --no-cache-dir -r requirements.txt | |
# Copy the crawler code | |
COPY . . | |
# Create necessary directories | |
RUN mkdir -p /data/storage/html_pages \ | |
&& mkdir -p /data/storage/logs \ | |
&& mkdir -p /data/storage/exports | |
# Expose ports | |
# Prometheus metrics port | |
EXPOSE 9100 | |
# MongoDB port | |
EXPOSE 27017 | |
# Redis port | |
EXPOSE 6379 | |
# Set environment variables | |
ENV MONGODB_URI=mongodb://localhost:27017/ | |
ENV REDIS_URI=redis://localhost:6379/0 | |
ENV PYTHONUNBUFFERED=1 | |
# Create entrypoint script | |
RUN echo '#!/bin/bash\n\ | |
# Start MongoDB\n\ | |
mongod --fork --logpath /var/log/mongodb.log\n\ | |
\n\ | |
# Start Redis\n\ | |
redis-server --daemonize yes\n\ | |
\n\ | |
# Check if services are running\n\ | |
echo "Waiting for MongoDB to start..."\n\ | |
until mongo --eval "print(\"MongoDB is ready\")" > /dev/null 2>&1; do\n\ | |
sleep 1\n\ | |
done\n\ | |
\n\ | |
echo "Waiting for Redis to start..."\n\ | |
until redis-cli ping > /dev/null 2>&1; do\n\ | |
sleep 1\n\ | |
done\n\ | |
\n\ | |
echo "All services are running!"\n\ | |
\n\ | |
# Execute the provided command or default to help\n\ | |
if [ $# -eq 0 ]; then\n\ | |
python crawl.py --help\n\ | |
else\n\ | |
exec "$@"\n\ | |
fi' > /app/entrypoint.sh \ | |
&& chmod +x /app/entrypoint.sh | |
# Set entrypoint | |
ENTRYPOINT ["/app/entrypoint.sh"] | |
# Default command is to show help | |
CMD ["python", "crawl.py", "--help"] |