Spaces:

witcher23
/

nanoVLM-inference

Sleeping

nanoVLM-inference / Dockerfile

vidhanm

trying new approach

873030d 3 months ago

1.81 kB

	FROM python:3.9-slim

	WORKDIR /app

	# Install git
	RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*

	# Copy requirements and install
	COPY requirements.txt requirements.txt
	RUN echo "DEBUG: Installing packages from requirements.txt" && \
	pip install --no-cache-dir -r requirements.txt && \
	echo "DEBUG: Finished installing packages."

	# Clone the nanoVLM repository which contains generate.py and the models directory
	# This also ensures the 'models' module is available for VisionLanguageModel import
	RUN echo "DEBUG: Cloning huggingface/nanoVLM repository..." && \
	git clone https://github.com/huggingface/nanoVLM.git /app/nanoVLM && \
	echo "DEBUG: nanoVLM repository cloned to /app/nanoVLM."

	# Add a test image to the Space.
	# You need to create a simple 'test_image.jpg' and add it to the root of your Space repo.
	COPY ./test_image.jpg /app/test_image.jpg
	RUN if [ ! -f /app/test_image.jpg ]; then echo "ERROR: test_image.jpg not found!"; exit 1; fi

	# Set Python path to include the nanoVLM models directory, so `from models...` works
	ENV PYTHONPATH="/app/nanoVLM:${PYTHONPATH}"

	# Define a writable cache directory for Hugging Face downloads
	ENV HF_HOME=/app/.cache/huggingface

	# Create cache directory with write permissions
	RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME

	# The generate.py script is at /app/nanoVLM/generate.py
	# It takes arguments like --model_path, --image_path, --prompt, --device
	# We will run it directly. Its output will go to the Space's container logs.
	CMD ["python", "-u", "/app/nanoVLM/generate.py", \
	"--model_path", "lusxvr/nanoVLM-222M", \
	"--image_path", "/app/test_image.jpg", \
	"--prompt", "describe this image in detail", \
	"--device", "cpu", \
	"--num_generations", "1", \
	"--max_new_tokens", "50"]