Spaces:

witcher23
/

nanoVLM-inference

Running

File size: 1,813 Bytes

FROM python:3.9-slim

WORKDIR /app

# Install git
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*

# Copy requirements and install
COPY requirements.txt requirements.txt
RUN echo "DEBUG: Installing packages from requirements.txt" && \
    pip install --no-cache-dir -r requirements.txt && \
    echo "DEBUG: Finished installing packages."

# Clone the nanoVLM repository which contains generate.py and the models directory
# This also ensures the 'models' module is available for VisionLanguageModel import
RUN echo "DEBUG: Cloning huggingface/nanoVLM repository..." && \
    git clone https://github.com/huggingface/nanoVLM.git /app/nanoVLM && \
    echo "DEBUG: nanoVLM repository cloned to /app/nanoVLM."

# Add a test image to the Space.
# You need to create a simple 'test_image.jpg' and add it to the root of your Space repo.
COPY ./test_image.jpg /app/test_image.jpg
RUN if [ ! -f /app/test_image.jpg ]; then echo "ERROR: test_image.jpg not found!"; exit 1; fi

# Set Python path to include the nanoVLM models directory, so `from models...` works
ENV PYTHONPATH="/app/nanoVLM:${PYTHONPATH}"

# Define a writable cache directory for Hugging Face downloads
ENV HF_HOME=/app/.cache/huggingface

# Create cache directory with write permissions
RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME

# The generate.py script is at /app/nanoVLM/generate.py
# It takes arguments like --model_path, --image_path, --prompt, --device
# We will run it directly. Its output will go to the Space's container logs.
CMD ["python", "-u", "/app/nanoVLM/generate.py", \
     "--model_path", "lusxvr/nanoVLM-222M", \
     "--image_path", "/app/test_image.jpg", \
     "--prompt", "describe this image in detail", \
     "--device", "cpu", \
     "--num_generations", "1", \
     "--max_new_tokens", "50"]