Spaces:
Runtime error
Runtime error
File size: 750 Bytes
fe7c322 4a15a33 fe7c322 4a15a33 fe7c322 4a15a33 fe7c322 4a15a33 fe7c322 4a15a33 fe7c322 4a15a33 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
# 1. Start from the official vLLM OpenAI-compatible image.
# This image is guaranteed to have the correct vLLM and CUDA dependencies.
FROM vllm/vllm-openai:latest
# 2. Override the default entrypoint of the base image so we can run our own code.
ENTRYPOINT []
# 3. Install the extra packages needed for our custom Gradio UI.
# The base image already contains vllm, fastapi, pydantic, and torch.
RUN pip3 install --no-cache-dir \
gradio==4.31.0 \
uvicorn
# 4. Copy our custom application file.
COPY ./app.py /app/app.py
WORKDIR /app
# 5. Expose the port our app will run on.
EXPOSE 7860
# 6. Define the command to run our combined FastAPI/Gradio application using uvicorn.
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"] |