nanoVLM-inference / Dockerfile
vidhanm
trying new approach
873030d
raw
history blame
1.81 kB
FROM python:3.9-slim
WORKDIR /app
# Install git
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
# Copy requirements and install
COPY requirements.txt requirements.txt
RUN echo "DEBUG: Installing packages from requirements.txt" && \
pip install --no-cache-dir -r requirements.txt && \
echo "DEBUG: Finished installing packages."
# Clone the nanoVLM repository which contains generate.py and the models directory
# This also ensures the 'models' module is available for VisionLanguageModel import
RUN echo "DEBUG: Cloning huggingface/nanoVLM repository..." && \
git clone https://github.com/huggingface/nanoVLM.git /app/nanoVLM && \
echo "DEBUG: nanoVLM repository cloned to /app/nanoVLM."
# Add a test image to the Space.
# You need to create a simple 'test_image.jpg' and add it to the root of your Space repo.
COPY ./test_image.jpg /app/test_image.jpg
RUN if [ ! -f /app/test_image.jpg ]; then echo "ERROR: test_image.jpg not found!"; exit 1; fi
# Set Python path to include the nanoVLM models directory, so `from models...` works
ENV PYTHONPATH="/app/nanoVLM:${PYTHONPATH}"
# Define a writable cache directory for Hugging Face downloads
ENV HF_HOME=/app/.cache/huggingface
# Create cache directory with write permissions
RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME
# The generate.py script is at /app/nanoVLM/generate.py
# It takes arguments like --model_path, --image_path, --prompt, --device
# We will run it directly. Its output will go to the Space's container logs.
CMD ["python", "-u", "/app/nanoVLM/generate.py", \
"--model_path", "lusxvr/nanoVLM-222M", \
"--image_path", "/app/test_image.jpg", \
"--prompt", "describe this image in detail", \
"--device", "cpu", \
"--num_generations", "1", \
"--max_new_tokens", "50"]