chatCPU / Dockerfile
SkyNetWalker's picture
Create Dockerfile
936cdc4 verified
raw
history blame
1.86 kB
# Use a base Ubuntu image
FROM ubuntu:22.04
# Set environment variables to prevent interactive prompts during apt operations
ENV DEBIAN_FRONTEND=noninteractive
# Install system dependencies: curl for Ollama, wget for model download, git (optional, but good for debugging), python3 and pip for the application
RUN apt update && apt install -y curl wget git python3 python3-pip
# Install Ollama using its official installation script
# This script automatically detects the system architecture and installs the correct binary.
RUN curl -fsSL https://ollama.com/install.sh | sh
# Set the working directory inside the container
WORKDIR /app
# Define environment variables for the model repository and filename
# This makes it easy to change the model later without editing the RUN command directly.
ENV MODEL_REPO="unsloth/gemma-3-4b-it-qat-GGUF"
ENV MODEL_FILENAME="gemma-3-4b-it-qat.Q4_K_M.gguf"
# Download the specific GGUF model file directly from Hugging Face Hub.
# We use 'wget -O' to save the file with the desired filename in the current directory.
# The 'resolve/main' path ensures we get the raw file content.
RUN wget -O ${MODEL_FILENAME} https://huggingface.co/${MODEL_REPO}/resolve/main/${MODEL_FILENAME}
# Copy the Modelfile, application script, Python requirements, and the startup script into the container
COPY Modelfile .
COPY app.py .
COPY requirements.txt .
COPY run.sh .
# Install Python dependencies required by your Gradio application
RUN pip install --no-cache-dir -r requirements.txt
# Make the startup script executable
RUN chmod +x run.sh
# Expose the port that your Gradio application will listen on.
# Hugging Face Spaces typically use port 7860 for Gradio apps.
EXPOSE 7860
# Set the entrypoint for the container to execute our startup script.
# This script will start Ollama and then your application.
CMD ["./run.sh"]