chatCPU / Dockerfile
SkyNetWalker's picture
Update Dockerfile
da805c9 verified
raw
history blame
1.98 kB
# Use the official Ollama base image, which already includes Ollama
FROM ollama/ollama:latest
# Set environment variables to prevent interactive prompts during apt operations
ENV DEBIAN_FRONTEND=noninteractive
# Install system dependencies: git (optional), python3 and pip for the application
RUN apt update && apt install -y git python3 python3-pip
# Set up a non-root user as recommended for Hugging Face Spaces
RUN useradd -m -u 1000 user
USER user
ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH \
OLLAMA_HOST=0.0.0.0 # Allow Ollama to be accessed from outside localhost
# Set the working directory inside the container
WORKDIR $HOME/app
# Define the model to be pulled
ENV OLLAMA_HF_MODEL="hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M"
# Pull the model during the build process
# Start ollama serve in background, wait for it, pull the model, then kill ollama.
# This ensures the model is downloaded and cached within the image.
RUN ollama serve & \
echo "Waiting for Ollama to start for model pull..." && \
sleep 5 && \
while ! curl -s http://localhost:11434 > /dev/null; do \
sleep 1; \
done && \
echo "Ollama started. Pulling model: ${OLLAMA_HF_MODEL}" && \
ollama pull ${OLLAMA_HF_MODEL} && \
echo "Model pull complete. Stopping Ollama for build process." && \
pkill ollama || true # '|| true' to prevent build failure if pkill returns non-zero when ollama already stopped
# Copy the application files
COPY app.py .
COPY requirements.txt .
COPY run.sh .
# Install Python dependencies required by your Gradio application
RUN pip install --no-cache-dir -r requirements.txt
# Make the startup script executable
RUN chmod +x run.sh
# Expose the port that your Gradio application will listen on.
# Hugging Face Spaces typically use port 7860 for Gradio apps.
EXPOSE 7860
# Set the entrypoint for the container to execute our startup script.
# This script will start Ollama and then your application.
CMD ["./run.sh"]