Spaces:

SkyNetWalker
/

chatCPU

Running

File size: 1,821 Bytes

# Start with a Python base image, which is convenient for running the Gradio app.
FROM python:3.9

# Set environment variables to prevent interactive prompts during apt operations
ENV DEBIAN_FRONTEND=noninteractive

# Install system dependencies: curl for Ollama and wget for model download.
RUN apt-get update && apt-get install -y curl wget

# Install Ollama using its official installation script.
RUN curl -fsSL https://ollama.com/install.sh | sh

# Set environment variables for the model for easy modification.
ENV MODEL_REPO="unsloth/gemma-3-4b-it-qat-GGUF"
ENV MODEL_FILENAME="gemma-3-4b-it-qat.Q4_K_M.gguf"

# Create a non-root user and switch to its home directory. This is a best practice for security and avoids permission errors with model caching.
# As seen in the Hugging Face Docker guide. [1]
RUN useradd -m -u 1000 user
WORKDIR /home/user/app

# Download the GGUF model file directly from Hugging Face Hub.
RUN wget -O ${MODEL_FILENAME} https://huggingface.co/${MODEL_REPO}/resolve/main/${MODEL_FILENAME}

# Copy the application files and set the correct ownership to the new user. [1]
COPY --chown=user:user Modelfile .
COPY --chown=user:user app.py .
COPY --chown=user:user requirements.txt .
COPY --chown=user:user run.sh .

# Install Python dependencies.
RUN pip install --no-cache-dir -r requirements.txt

# Make the startup script executable.
RUN chmod +x run.sh

# Switch to the non-root user. [1]
USER user

# Set environment variables for the user and for Ollama.
# This ensures cached models are stored in a writable directory and that Ollama is accessible within the container network. [1]
ENV HOME=/home/user
ENV PATH=/home/user/.local/bin:$PATH
ENV OLLAMA_HOST=0.0.0.0

# Expose the port for the Gradio application.
EXPOSE 7860

# Set the command to run our startup script.
CMD ["./run.sh"]