File size: 5,280 Bytes
76b7f75
0d43e84
6d92724
60a0d81
 
7cb1a56
 
60a0d81
76b7f75
6d92724
76b7f75
6d92724
76b7f75
 
 
db02c7e
8d35f8b
9f74649
 
0d43e84
 
 
9f74649
 
6d92724
 
76b7f75
 
 
 
 
 
 
 
a22d55d
1a4823d
cac33b4
76b7f75
 
 
7b821bd
 
e73e1ec
b248e43
e73e1ec
 
15618c1
 
 
d477c68
15618c1
6a9e0d6
2512b6f
54a754d
565f8a3
 
9de78ff
 
 
2288815
fb8e545
 
 
 
 
b9596d0
0e790c3
fb8e545
3db0d4d
 
182afef
 
66980e2
df6c35a
8ae9a5c
565f8a3
e3721e6
 
8f1a866
 
 
565f8a3
 
c73cd7c
565f8a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d13cbf
c1ea34f
3d13cbf
565f8a3
 
 
 
 
 
 
 
 
 
 
76b7f75
e9b9aba
 
6da37cf
565f8a3
ac07750
565f8a3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# Start with NVIDIA CUDA 12.6 base image
FROM nvidia/cuda:12.2.0-base-ubuntu22.04 AS base

ENV DEBIAN_FRONTEND=noninteractive
ENV DEBCONF_NOWARNINGS="yes"
ENV CUDA_VISIBLE_DEVICES=0
ENV LLAMA_CUBLAS=1

# Install necessary dependencies and musl
RUN apt-get update && apt-get install -y \
    software-properties-common \
    wget \
    musl \
    musl-dev \
    musl-tools \
	libffi-dev \
	git \
	build-essential \
    pkg-config \
    cuda-toolkit-12-2 \
    cuda-nvcc-12-2 \
    libcublas-12-2 \
    libcudnn8 \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

# Install Python 3.11
RUN add-apt-repository ppa:deadsnakes/ppa && \
    apt-get update && \
    apt-get install -y \
    python3.11 \
    python3.11-venv \
    python3.11-dev \
    python3-pip \
	tzdata \
    && rm -rf /var/lib/apt/lists/*

# Set Python 3.11 as the default python version
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
RUN update-alternatives --set python3 /usr/bin/python3.11

# Create softlink so that text2vec-transformer can invoke python3 when using /usr/local/bin/python.
RUN echo "#### ls /usr/bin | grep -i python; ls -l /usr/local/bin | grep -i python"
RUN ls -l /usr/bin | grep -i python; ls -l /usr/local/bin | grep -i python || test '1 .gt 0'
RUN ln -s /usr/bin/python3.11 /usr/local/bin/python3.11
RUN ls -l /usr/local/bin | grep -i python || test '1 .gt 0'

# Set up environment variables
ENV LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:/usr/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH"
ENV PATH="/usr/local/cuda/bin:/app:/app/text2vec-transformers:/app/text2vec-transformers/bin:/usr/local/bin:/usr/bin:$PATH"

# Upgrade pip to support --break-system-packages.
#RUN python3 -m pip install --upgrade pip

# Install requirements packages, semantic text splitter, llama_cpp.
COPY ./requirements.txt /app/requirements.txt
#RUN pip3 install --break-system-packages --no-cache-dir --upgrade -r /app/requirements.txt
#RUN pip3 install --break-system-packages https://files.pythonhosted.org/packages/13/87/e0cb08c2d4bd7d38ab63816b306c8b1e7cfdc0e59bd54462e8b0df069078/semantic_text_splitter-0.6.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
RUN pip3 install  --no-cache-dir --upgrade -r /app/requirements.txt
RUN pip3 install  https://files.pythonhosted.org/packages/13/87/e0cb08c2d4bd7d38ab63816b306c8b1e7cfdc0e59bd54462e8b0df069078/semantic_text_splitter-0.6.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

#RUN pip3 install --break-system-packages llama_cpp_python
#RUN FORCE_CMAKE=1 CMAKE_SYSTEM_PROCESSOR=AMD64 pip3 install --break-system-packages --verbose --no-cache-dir llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
#RUN FORCE_CMAKE=1 CMAKE_SYSTEM_PROCESSOR=AMD64 pip3 install --break-system-packages --verbose --no-cache-dir llama-cpp-python 

RUN pip3 install ninja

#RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on -DCUDA_PATH=/usr/local/cuda-12.2 -DCUDAToolkit_ROOT=/usr/local/cuda-12.2 -DCUDAToolkit_INCLUDE_DIR=/usr/local/cuda-12/include -DCUDAToolkit_LIBRARY_DIR=/usr/local/cuda-12.2/lib64" FORCE_CMAKE=1 pip install llama-cpp-python - no-cache-dir
#RUN CMAKE_ARGS="GGML_CUDA=on" FORCE_CMAKE=1 pip install --break-system-packages llama-cpp-python --no-cache-dir
RUN CMAKE_ARGS="GGML_CUDA=on" FORCE_CMAKE=1 pip install llama-cpp-python --no-cache-dir
#RUN pip3 install --break-system-packages cffi
RUN pip3 install cffi
RUN pip install huggingface-hub==0.25.2

# Install text2vec-transformers
WORKDIR /app/text2vec-transformers
COPY --from=semitechnologies/transformers-inference:sentence-transformers-multi-qa-MiniLM-L6-cos-v1 /app /app/text2vec-transformers
COPY --from=semitechnologies/transformers-inference:sentence-transformers-multi-qa-MiniLM-L6-cos-v1 /usr/local/bin /app/text2vec-transformers/bin
RUN ./custom_prerequisites.py

COPY ./multi-qa-MiniLM-L6-cos-v1 /app/multi-qa-MiniLM-L6-cos-v1

# Copy application files
WORKDIR /app
COPY ./semsearch.py /app/semsearch.py
COPY ./startup.sh /app/startup.sh
COPY ./.streamlit/main.css /app/.streamlit/main.css
COPY ./app.py /app/app.py
RUN chmod 755 /app/startup.sh

# Copy input documents
RUN mkdir -p /app/inputDocs
COPY ./inputDocs/* /app/inputDocs/

# Install Weaviate
WORKDIR /app/weaviate
RUN wget -qO- https://github.com/weaviate/weaviate/releases/download/v1.24.10/weaviate-v1.24.10-linux-amd64.tar.gz | tar -xzf -

# Download Llama model
WORKDIR /app
RUN wget -v https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_0.gguf
#RUN wget -v https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_0.gguf
#RUN wget -v https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf

# Create a non-root user
RUN groupadd -g 1000 user && useradd -m -u 1000 -g user user

# Set permissions
RUN chown -R user:user /app
RUN chmod -R 755 /app

# Switch to non-root user
USER user

# Verify Python and musl installations
#RUN python3 --version && \
#    ldd --version | grep musl

EXPOSE 8080 8501

CMD ["streamlit", "run", "/app/app.py", "--server.headless", "true", "--server.enableCORS", "false", "--server.enableXsrfProtection", "false", "--server.fileWatcherType", "none"]
#CMD ["/app/delay.sh", "1200"]