Spaces:
Running
Running
File size: 5,280 Bytes
76b7f75 0d43e84 6d92724 60a0d81 7cb1a56 60a0d81 76b7f75 6d92724 76b7f75 6d92724 76b7f75 db02c7e 8d35f8b 9f74649 0d43e84 9f74649 6d92724 76b7f75 a22d55d 1a4823d cac33b4 76b7f75 7b821bd e73e1ec b248e43 e73e1ec 15618c1 d477c68 15618c1 6a9e0d6 2512b6f 54a754d 565f8a3 9de78ff 2288815 fb8e545 b9596d0 0e790c3 fb8e545 3db0d4d 182afef 66980e2 df6c35a 8ae9a5c 565f8a3 e3721e6 8f1a866 565f8a3 c73cd7c 565f8a3 3d13cbf c1ea34f 3d13cbf 565f8a3 76b7f75 e9b9aba 6da37cf 565f8a3 ac07750 565f8a3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
# Start with NVIDIA CUDA 12.6 base image
FROM nvidia/cuda:12.2.0-base-ubuntu22.04 AS base
ENV DEBIAN_FRONTEND=noninteractive
ENV DEBCONF_NOWARNINGS="yes"
ENV CUDA_VISIBLE_DEVICES=0
ENV LLAMA_CUBLAS=1
# Install necessary dependencies and musl
RUN apt-get update && apt-get install -y \
software-properties-common \
wget \
musl \
musl-dev \
musl-tools \
libffi-dev \
git \
build-essential \
pkg-config \
cuda-toolkit-12-2 \
cuda-nvcc-12-2 \
libcublas-12-2 \
libcudnn8 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Install Python 3.11
RUN add-apt-repository ppa:deadsnakes/ppa && \
apt-get update && \
apt-get install -y \
python3.11 \
python3.11-venv \
python3.11-dev \
python3-pip \
tzdata \
&& rm -rf /var/lib/apt/lists/*
# Set Python 3.11 as the default python version
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
RUN update-alternatives --set python3 /usr/bin/python3.11
# Create softlink so that text2vec-transformer can invoke python3 when using /usr/local/bin/python.
RUN echo "#### ls /usr/bin | grep -i python; ls -l /usr/local/bin | grep -i python"
RUN ls -l /usr/bin | grep -i python; ls -l /usr/local/bin | grep -i python || test '1 .gt 0'
RUN ln -s /usr/bin/python3.11 /usr/local/bin/python3.11
RUN ls -l /usr/local/bin | grep -i python || test '1 .gt 0'
# Set up environment variables
ENV LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:/usr/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH"
ENV PATH="/usr/local/cuda/bin:/app:/app/text2vec-transformers:/app/text2vec-transformers/bin:/usr/local/bin:/usr/bin:$PATH"
# Upgrade pip to support --break-system-packages.
#RUN python3 -m pip install --upgrade pip
# Install requirements packages, semantic text splitter, llama_cpp.
COPY ./requirements.txt /app/requirements.txt
#RUN pip3 install --break-system-packages --no-cache-dir --upgrade -r /app/requirements.txt
#RUN pip3 install --break-system-packages https://files.pythonhosted.org/packages/13/87/e0cb08c2d4bd7d38ab63816b306c8b1e7cfdc0e59bd54462e8b0df069078/semantic_text_splitter-0.6.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
RUN pip3 install --no-cache-dir --upgrade -r /app/requirements.txt
RUN pip3 install https://files.pythonhosted.org/packages/13/87/e0cb08c2d4bd7d38ab63816b306c8b1e7cfdc0e59bd54462e8b0df069078/semantic_text_splitter-0.6.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
#RUN pip3 install --break-system-packages llama_cpp_python
#RUN FORCE_CMAKE=1 CMAKE_SYSTEM_PROCESSOR=AMD64 pip3 install --break-system-packages --verbose --no-cache-dir llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
#RUN FORCE_CMAKE=1 CMAKE_SYSTEM_PROCESSOR=AMD64 pip3 install --break-system-packages --verbose --no-cache-dir llama-cpp-python
RUN pip3 install ninja
#RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on -DCUDA_PATH=/usr/local/cuda-12.2 -DCUDAToolkit_ROOT=/usr/local/cuda-12.2 -DCUDAToolkit_INCLUDE_DIR=/usr/local/cuda-12/include -DCUDAToolkit_LIBRARY_DIR=/usr/local/cuda-12.2/lib64" FORCE_CMAKE=1 pip install llama-cpp-python - no-cache-dir
#RUN CMAKE_ARGS="GGML_CUDA=on" FORCE_CMAKE=1 pip install --break-system-packages llama-cpp-python --no-cache-dir
RUN CMAKE_ARGS="GGML_CUDA=on" FORCE_CMAKE=1 pip install llama-cpp-python --no-cache-dir
#RUN pip3 install --break-system-packages cffi
RUN pip3 install cffi
RUN pip install huggingface-hub==0.25.2
# Install text2vec-transformers
WORKDIR /app/text2vec-transformers
COPY --from=semitechnologies/transformers-inference:sentence-transformers-multi-qa-MiniLM-L6-cos-v1 /app /app/text2vec-transformers
COPY --from=semitechnologies/transformers-inference:sentence-transformers-multi-qa-MiniLM-L6-cos-v1 /usr/local/bin /app/text2vec-transformers/bin
RUN ./custom_prerequisites.py
COPY ./multi-qa-MiniLM-L6-cos-v1 /app/multi-qa-MiniLM-L6-cos-v1
# Copy application files
WORKDIR /app
COPY ./semsearch.py /app/semsearch.py
COPY ./startup.sh /app/startup.sh
COPY ./.streamlit/main.css /app/.streamlit/main.css
COPY ./app.py /app/app.py
RUN chmod 755 /app/startup.sh
# Copy input documents
RUN mkdir -p /app/inputDocs
COPY ./inputDocs/* /app/inputDocs/
# Install Weaviate
WORKDIR /app/weaviate
RUN wget -qO- https://github.com/weaviate/weaviate/releases/download/v1.24.10/weaviate-v1.24.10-linux-amd64.tar.gz | tar -xzf -
# Download Llama model
WORKDIR /app
RUN wget -v https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_0.gguf
#RUN wget -v https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_0.gguf
#RUN wget -v https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf
# Create a non-root user
RUN groupadd -g 1000 user && useradd -m -u 1000 -g user user
# Set permissions
RUN chown -R user:user /app
RUN chmod -R 755 /app
# Switch to non-root user
USER user
# Verify Python and musl installations
#RUN python3 --version && \
# ldd --version | grep musl
EXPOSE 8080 8501
CMD ["streamlit", "run", "/app/app.py", "--server.headless", "true", "--server.enableCORS", "false", "--server.enableXsrfProtection", "false", "--server.fileWatcherType", "none"]
#CMD ["/app/delay.sh", "1200"]
|