MVPilgrim commited on
Commit
720552c
·
1 Parent(s): b1f73d1

8vcpu invalid instruction

Browse files
Files changed (6) hide show
  1. Dockerfile +4 -22
  2. Dockerfile.Hld02 +93 -0
  3. app.py +1 -1
  4. app.py.Hld02 +622 -0
  5. startup.sh +3 -0
  6. startup.sh.Hld02 +60 -0
Dockerfile CHANGED
@@ -5,7 +5,9 @@ FROM python:3.11.5
5
  #FROM python:3.11.9-alpine
6
  #FROM python:3.11-bookworm
7
 
8
- RUN echo "\n\n############################################# Dockerfile DbgUI ######################################\n\n"
 
 
9
 
10
  #ENTRYPOINT ["/app/startup.sh"]
11
  #RUN apt-get update && \
@@ -18,14 +20,9 @@ WORKDIR /app
18
  COPY ./requirements.txt /app/requirements.txt
19
  COPY ./semsearch.py /app/semsearch.py
20
  COPY ./startup.sh /app/startup.sh
21
- COPY ./semsearchDbgUI.py /app/semsearchDbgUI.py
22
- COPY ./startupDbgUI.sh /app/startupDbgUI.sh
23
  COPY ./.streamlit/main.css /app/.streamlit/main.css
24
  COPY ./app.py /app/app.py
25
- COPY ./cmd.sh /app/cmd.sh
26
- RUN chmod 755 /app/startup.sh /app/cmd.sh
27
-
28
- COPY ./multi-qa-MiniLM-L6-cos-v1 /app/multi-qa-MiniLM-L6-cos-v1
29
 
30
  RUN mkdir -p /app/inputDocs
31
  COPY ./inputDocs/* /app/inputDocs
@@ -58,8 +55,6 @@ COPY --from=semitechnologies/transformers-inference:sentence-transformers-multi-
58
  COPY ./multi-qa-MiniLM-L6-cos-v1 /app/app/text2vec-transformers
59
 
60
  ENV PATH="/usr/bin/local:/app/text2vec-transformers:/app/text2vec-transformers/bin:${PATH}"
61
- #RUN pip install -r requirements.txt--server.port=8501 --server.address=0.0.0.0
62
- #RUN pip install nltk==3.8.1 optimum==1.13.2 onnxruntime==1.16.1 onnx==1.14.1
63
  RUN ./custom_prerequisites.py
64
 
65
  ##############################
@@ -69,25 +64,12 @@ RUN chmod -R 755 /app
69
  RUN chown -R user /app
70
  RUN chgrp -R user /app
71
 
72
- #############################################
73
- # Specify /data volume.
74
- #VOLUME /data
75
-
76
  WORKDIR /app
77
  USER user
78
 
79
- ##############################################################################
80
- # Start the weaviate vector database, text2vec-transformers and the semantic search app.
81
- #RUN /app/startup.sh
82
- #RUN --mount=type=cache,target=/data,mode=777 /app/startup.sh
83
- #RUN --mount=type=cache,target=/data,mode=777 echo "### Mounting /data"
84
- #CMD ["/app/startupDbgUI.sh"]
85
  EXPOSE 8501
86
- #CMD /app/startup.sh; /usr/local/bin/streamlit run semsearch.py --server.port=8501 --server.address=0.0.0.0
87
  CMD streamlit run /app/app.py \
88
  --server.headless true \
89
  --server.enableCORS false \
90
  --server.enableXsrfProtection false \
91
  --server.fileWatcherType none
92
-
93
- #CMD python apptst.py
 
5
  #FROM python:3.11.9-alpine
6
  #FROM python:3.11-bookworm
7
 
8
+ RUN echo "\n\n############################################# Dockerfile ######################################\n\n"
9
+ RUN echo "### cat /proc/cpuinfo"
10
+ RUN cat /proc/cpuinfo
11
 
12
  #ENTRYPOINT ["/app/startup.sh"]
13
  #RUN apt-get update && \
 
20
  COPY ./requirements.txt /app/requirements.txt
21
  COPY ./semsearch.py /app/semsearch.py
22
  COPY ./startup.sh /app/startup.sh
 
 
23
  COPY ./.streamlit/main.css /app/.streamlit/main.css
24
  COPY ./app.py /app/app.py
25
+ RUN chmod 755 /app/startup.sh
 
 
 
26
 
27
  RUN mkdir -p /app/inputDocs
28
  COPY ./inputDocs/* /app/inputDocs
 
55
  COPY ./multi-qa-MiniLM-L6-cos-v1 /app/app/text2vec-transformers
56
 
57
  ENV PATH="/usr/bin/local:/app/text2vec-transformers:/app/text2vec-transformers/bin:${PATH}"
 
 
58
  RUN ./custom_prerequisites.py
59
 
60
  ##############################
 
64
  RUN chown -R user /app
65
  RUN chgrp -R user /app
66
 
 
 
 
 
67
  WORKDIR /app
68
  USER user
69
 
 
 
 
 
 
 
70
  EXPOSE 8501
 
71
  CMD streamlit run /app/app.py \
72
  --server.headless true \
73
  --server.enableCORS false \
74
  --server.enableXsrfProtection false \
75
  --server.fileWatcherType none
 
 
Dockerfile.Hld02 ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ###############################################################################
2
+ #python environment, main app and startup script.
3
+ FROM python:3.11.5
4
+ #FROM python:3.11.9-slim
5
+ #FROM python:3.11.9-alpine
6
+ #FROM python:3.11-bookworm
7
+
8
+ RUN echo "\n\n############################################# Dockerfile DbgUI ######################################\n\n"
9
+
10
+ #ENTRYPOINT ["/app/startup.sh"]
11
+ #RUN apt-get update && \
12
+ # apt-get install -y libc6 && \
13
+ # rm -rf /var/lib/apt/lists/*
14
+ WORKDIR /app
15
+
16
+ #RUN ls -l / || ls -l /lib || ls -l /usr || ls -l /usr/lib6 || echo "### An ls failed."
17
+
18
+ COPY ./requirements.txt /app/requirements.txt
19
+ COPY ./semsearch.py /app/semsearch.py
20
+ COPY ./startup.sh /app/startup.sh
21
+ COPY ./semsearchDbgUI.py /app/semsearchDbgUI.py
22
+ COPY ./startupDbgUI.sh /app/startupDbgUI.sh
23
+ COPY ./.streamlit/main.css /app/.streamlit/main.css
24
+ COPY ./app.py /app/app.py
25
+ COPY ./cmd.sh /app/cmd.sh
26
+ RUN chmod 755 /app/startup.sh /app/cmd.sh
27
+
28
+ COPY ./multi-qa-MiniLM-L6-cos-v1 /app/multi-qa-MiniLM-L6-cos-v1
29
+
30
+ RUN mkdir -p /app/inputDocs
31
+ COPY ./inputDocs/* /app/inputDocs
32
+ RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
33
+ RUN pip install https://files.pythonhosted.org/packages/13/87/e0cb08c2d4bd7d38ab63816b306c8b1e7cfdc0e59bd54462e8b0df069078/semantic_text_splitter-0.6.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
34
+ RUN pip show semantic-text-splitter
35
+
36
+ RUN pip install llama_cpp_python
37
+
38
+ ##############################################################################
39
+ # Install Weaviate
40
+ WORKDIR /app/weaviate
41
+ RUN wget -qO- https://github.com/weaviate/weaviate/releases/download/v1.24.10/weaviate-v1.24.10-linux-amd64.tar.gz | tar -xzf -
42
+ RUN ls -al /app/weaviate
43
+
44
+ # Set environment variables for Weaviate
45
+ ENV PATH="/app:/app/weaviate-v1.24.10-linux-x86_64:${PATH}"
46
+ # Expose the Weaviate port
47
+ EXPOSE 8080
48
+
49
+ #COPY Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q4_0.gguf /app
50
+ RUN cd /app; wget -v https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_0.gguf
51
+
52
+ ##############################################################################
53
+ # Install text2vec-transformers
54
+ WORKDIR /app/text2vec-transformers
55
+ COPY --from=semitechnologies/transformers-inference:sentence-transformers-multi-qa-MiniLM-L6-cos-v1 /app /app/text2vec-transformers
56
+ COPY --from=semitechnologies/transformers-inference:sentence-transformers-multi-qa-MiniLM-L6-cos-v1 /usr/local/bin /app/text2vec-transformers/bin
57
+
58
+ COPY ./multi-qa-MiniLM-L6-cos-v1 /app/app/text2vec-transformers
59
+
60
+ ENV PATH="/usr/bin/local:/app/text2vec-transformers:/app/text2vec-transformers/bin:${PATH}"
61
+ #RUN pip install -r requirements.txt--server.port=8501 --server.address=0.0.0.0
62
+ #RUN pip install nltk==3.8.1 optimum==1.13.2 onnxruntime==1.16.1 onnx==1.14.1
63
+ RUN ./custom_prerequisites.py
64
+
65
+ ##############################
66
+ RUN useradd -m -u 1000 user
67
+
68
+ RUN chmod -R 755 /app
69
+ RUN chown -R user /app
70
+ RUN chgrp -R user /app
71
+
72
+ #############################################
73
+ # Specify /data volume.
74
+ #VOLUME /data
75
+
76
+ WORKDIR /app
77
+ USER user
78
+
79
+ ##############################################################################
80
+ # Start the weaviate vector database, text2vec-transformers and the semantic search app.
81
+ #RUN /app/startup.sh
82
+ #RUN --mount=type=cache,target=/data,mode=777 /app/startup.sh
83
+ #RUN --mount=type=cache,target=/data,mode=777 echo "### Mounting /data"
84
+ #CMD ["/app/startupDbgUI.sh"]
85
+ EXPOSE 8501
86
+ #CMD /app/startup.sh; /usr/local/bin/streamlit run semsearch.py --server.port=8501 --server.address=0.0.0.0
87
+ CMD streamlit run /app/app.py \
88
+ --server.headless true \
89
+ --server.enableCORS false \
90
+ --server.enableXsrfProtection false \
91
+ --server.fileWatcherType none
92
+
93
+ #CMD python apptst.py
app.py CHANGED
@@ -510,7 +510,7 @@ try:
510
  stop = ["Q", "\n"]
511
 
512
  modelOutput = ""
513
- with st.spinner('Generating Completion...'):
514
  modelOutput = llm.create_chat_completion(
515
  prompt
516
  #max_tokens=max_tokens,
 
510
  stop = ["Q", "\n"]
511
 
512
  modelOutput = ""
513
+ with st.spinner('Generating Completion (but slowly)...'):
514
  modelOutput = llm.create_chat_completion(
515
  prompt
516
  #max_tokens=max_tokens,
app.py.Hld02 ADDED
@@ -0,0 +1,622 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import weaviate
2
+ from weaviate.connect import ConnectionParams
3
+ from weaviate.classes.init import AdditionalConfig, Timeout
4
+
5
+ from sentence_transformers import SentenceTransformer
6
+ from langchain_community.document_loaders import BSHTMLLoader
7
+ from pathlib import Path
8
+ from lxml import html
9
+ import logging
10
+ from semantic_text_splitter import HuggingFaceTextSplitter
11
+ from tokenizers import Tokenizer
12
+ import json
13
+ import os
14
+ import re
15
+
16
+ import llama_cpp
17
+ from llama_cpp import Llama
18
+
19
+ import streamlit as st
20
+ import subprocess
21
+ import time
22
+ import pprint
23
+ import io
24
+
25
+
26
+
27
+ try:
28
+ #############################################
29
+ # Logging setup including weaviate logging. #
30
+ #############################################
31
+ if 'logging' not in st.session_state:
32
+ weaviate_logger = logging.getLogger("httpx")
33
+ weaviate_logger.setLevel(logging.WARNING)
34
+ logger = logging.getLogger(__name__)
35
+ logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s',level=logging.INFO)
36
+ st.session_state.weaviate_logger = weaviate_logger
37
+ st.session_state.logger = logger
38
+ else:
39
+ weaviate_logger = st.session_state.weaviate_logger
40
+ logger = st.session_state.logger
41
+
42
+
43
+ logger.info("###################### Program Entry ############################")
44
+
45
+ ##########################################################################
46
+ # Asynchonously run startup.sh which run text2vec-transformers #
47
+ # asynchronously and the Weaviate Vector Database server asynchronously. #
48
+ ##########################################################################
49
+ def runStartup():
50
+ logger.info("### Running startup.sh")
51
+ try:
52
+ subprocess.Popen(["/app/startup.sh"])
53
+ # Wait for text2vec-transformers and Weaviate DB to initialize.
54
+ time.sleep(180)
55
+ #subprocess.run(["/app/cmd.sh 'ps -ef'"])
56
+ except Exception as e:
57
+ emsg = str(e)
58
+ logger.error(f"### subprocess.run EXCEPTION. e: {emsg}")
59
+ logger.info("### Running startup.sh complete")
60
+ if 'runStartup' not in st.session_state:
61
+ st.session_state.runStartup = False
62
+ if 'runStartup' not in st.session_state:
63
+ logger.info("### runStartup still not in st.session_state after setting variable.")
64
+ with st.spinner('Initializing Weaviate DB and text2vec-transformer...'):
65
+ runStartup()
66
+ try:
67
+ logger.info("### Displaying /app/startup.log")
68
+ with open("/app/startup.log", "r") as file:
69
+ line = file.readline().rstrip()
70
+ while line:
71
+ logger.info(line)
72
+ line = file.readline().rstrip()
73
+ except Exception as e2:
74
+ emsg = str(e2)
75
+ logger.error(f"#### Displaying startup.log EXCEPTION. e2: {emsg}")
76
+
77
+
78
+ #########################################
79
+ # Function to load the CSS syling file. #
80
+ #########################################
81
+ def load_css(file_name):
82
+ logger.info("#### load_css entered.")
83
+ with open(file_name) as f:
84
+ st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
85
+ logger.info("#### load_css exited.")
86
+ if 'load_css' not in st.session_state:
87
+ load_css(".streamlit/main.css")
88
+ st.session_state.load_css = True
89
+
90
+ # Display UI heading.
91
+ st.markdown("<h1 style='text-align: center; color: #666666;'>LLM with RAG Prompting <br style='page-break-after: always;'>Proof of Concept</h1>",
92
+ unsafe_allow_html=True)
93
+
94
+ pathString = "/app/inputDocs"
95
+ chunks = []
96
+ webpageDocNames = []
97
+ page_contentArray = []
98
+ webpageChunks = []
99
+ webpageTitles = []
100
+ webpageChunksDocNames = []
101
+
102
+
103
+ ############################################
104
+ # Connect to the Weaviate vector database. #
105
+ ############################################
106
+ if 'client' not in st.session_state:
107
+ logger.info("#### Create Weaviate db client connection.")
108
+ client = weaviate.WeaviateClient(
109
+ connection_params=ConnectionParams.from_params(
110
+ http_host="localhost",
111
+ http_port="8080",
112
+ http_secure=False,
113
+ grpc_host="localhost",
114
+ grpc_port="50051",
115
+ grpc_secure=False
116
+ ),
117
+ additional_config=AdditionalConfig(
118
+ timeout=Timeout(init=60, query=1800, insert=1800), # Values in seconds
119
+ )
120
+ )
121
+ for i in range(3):
122
+ try:
123
+ client.connect()
124
+ st.session_state.client = client
125
+ logger.info("#### Create Weaviate db client connection exited.")
126
+ break
127
+ except Exception as e:
128
+ emsg = str(e)
129
+ logger.error(f"### client.connect() EXCEPTION. e2: {emsg}")
130
+ time.sleep(45)
131
+ if i >= 3:
132
+ raise Exception("client.connect retries exhausted.")
133
+ else:
134
+ client = st.session_state.client
135
+
136
+
137
+ ########################################################
138
+ # Read each text input file, parse it into a document, #
139
+ # chunk it, collect chunks and document names. #
140
+ ########################################################
141
+ if not client.collections.exists("Documents") or not client.collections.exists("Chunks") :
142
+ logger.info("#### Read and chunk input RAG document files.")
143
+ for filename in os.listdir(pathString):
144
+ logger.debug(filename)
145
+ path = Path(pathString + "/" + filename)
146
+ filename = filename.rstrip(".html")
147
+ webpageDocNames.append(filename)
148
+ htmlLoader = BSHTMLLoader(path,"utf-8")
149
+ htmlData = htmlLoader.load()
150
+
151
+ title = htmlData[0].metadata['title']
152
+ page_content = htmlData[0].page_content
153
+
154
+ # Clean data. Remove multiple newlines, etc.
155
+ page_content = re.sub(r'\n+', '\n',page_content)
156
+
157
+ page_contentArray.append(page_content)
158
+ webpageTitles.append(title)
159
+ max_tokens = 1000
160
+ tokenizer = Tokenizer.from_pretrained("bert-base-uncased")
161
+ logger.info(f"### tokenizer: {tokenizer}")
162
+ splitter = HuggingFaceTextSplitter(tokenizer, trim_chunks=True)
163
+ chunksOnePage = splitter.chunks(page_content, chunk_capacity=50)
164
+
165
+ chunks = []
166
+ for chnk in chunksOnePage:
167
+ logger.debug(f"#### chnk in file: {chnk}")
168
+ chunks.append(chnk)
169
+ logger.debug(f"chunks: {chunks}")
170
+ webpageChunks.append(chunks)
171
+ webpageChunksDocNames.append(filename + "Chunks")
172
+
173
+ logger.info(f"### filename, title: {filename}, {title}")
174
+ logger.info(f"### webpageDocNames: {webpageDocNames}")
175
+ logger.info("#### Read and chunk input RAG document files.")
176
+
177
+
178
+ #############################################################
179
+ # Create database documents and chunks schemas/collections. #
180
+ # Each chunk schema points to its corresponding document. #
181
+ #############################################################
182
+ if not client.collections.exists("Documents"):
183
+ logger.info("#### Create documents schema/collection started.")
184
+ class_obj = {
185
+ "class": "Documents",
186
+ "description": "For first attempt at loading a Weviate database.",
187
+ "vectorizer": "text2vec-transformers",
188
+ "moduleConfig": {
189
+ "text2vec-transformers": {
190
+ "vectorizeClassName": False
191
+ }
192
+ },
193
+ "vectorIndexType": "hnsw",
194
+ "vectorIndexConfig": {
195
+ "distance": "cosine",
196
+ },
197
+ "properties": [
198
+ {
199
+ "name": "title",
200
+ "dataType": ["text"],
201
+ "description": "HTML doc title.",
202
+ "vectorizer": "text2vec-transformers",
203
+ "moduleConfig": {
204
+ "text2vec-transformers": {
205
+ "vectorizePropertyName": True,
206
+ "skip": False,
207
+ "tokenization": "lowercase"
208
+ }
209
+ },
210
+ "invertedIndexConfig": {
211
+ "bm25": {
212
+ "b": 0.75,
213
+ "k1": 1.2
214
+ },
215
+ }
216
+ },
217
+ {
218
+ "name": "content",
219
+ "dataType": ["text"],
220
+ "description": "HTML page content.",
221
+ "moduleConfig": {
222
+ "text2vec-transformers": {
223
+ "vectorizePropertyName": True,
224
+ "tokenization": "whitespace"
225
+ }
226
+ }
227
+ }
228
+ ]
229
+ }
230
+ wpCollection = client.collections.create_from_dict(class_obj)
231
+ st.session_state.wpCollection = wpCollection
232
+ logger.info("#### Create documents schema/collection ended.")
233
+ else:
234
+ wpCollection = client.collections.get("Documents")
235
+ st.session_state.wpCollection = wpCollection
236
+
237
+ # Create chunks in db.
238
+ if not client.collections.exists("Chunks"):
239
+ logger.info("#### create document chunks schema/collection started.")
240
+ #client.collections.delete("Chunks")
241
+ class_obj = {
242
+ "class": "Chunks",
243
+ "description": "Collection for document chunks.",
244
+ "vectorizer": "text2vec-transformers",
245
+ "moduleConfig": {
246
+ "text2vec-transformers": {
247
+ "vectorizeClassName": True
248
+ }
249
+ },
250
+ "vectorIndexType": "hnsw",
251
+ "vectorIndexConfig": {
252
+ "distance": "cosine"
253
+ },
254
+ "properties": [
255
+ {
256
+ "name": "chunk",
257
+ "dataType": ["text"],
258
+ "description": "Single webpage chunk.",
259
+ "vectorizer": "text2vec-transformers",
260
+ "moduleConfig": {
261
+ "text2vec-transformers": {
262
+ "vectorizePropertyName": False,
263
+ "skip": False,
264
+ "tokenization": "lowercase"
265
+ }
266
+ }
267
+ },
268
+ {
269
+ "name": "chunk_index",
270
+ "dataType": ["int"]
271
+ },
272
+ {
273
+ "name": "webpage",
274
+ "dataType": ["Documents"],
275
+ "description": "Webpage content chunks.",
276
+
277
+ "invertedIndexConfig": {
278
+ "bm25": {
279
+ "b": 0.75,
280
+ "k1": 1.2
281
+ }
282
+ }
283
+ }
284
+ ]
285
+ }
286
+ wpChunksCollection = client.collections.create_from_dict(class_obj)
287
+ st.session_state.wpChunksCollection = wpChunksCollection
288
+ logger.info("#### create document chunks schedma/collection ended.")
289
+ else:
290
+ wpChunksCollection = client.collections.get("Chunks")
291
+ st.session_state.wpChunksCollection = wpChunksCollection
292
+
293
+
294
+ ##################################################################
295
+ # Create the actual document and chunks objects in the database. #
296
+ ##################################################################
297
+ if 'dbObjsCreated' not in st.session_state:
298
+ logger.info("#### Create db document and chunk objects started.")
299
+ st.session_state.dbObjsCreated = True
300
+ for i, className in enumerate(webpageDocNames):
301
+ logger.info("#### Creating document object.")
302
+ title = webpageTitles[i]
303
+ logger.debug(f"## className, title: {className}, {title}")
304
+ # Create Webpage Object
305
+ page_content = page_contentArray[i]
306
+ # Insert the document.
307
+ wpCollectionObj_uuid = wpCollection.data.insert(
308
+ {
309
+ "name": className,
310
+ "title": title,
311
+ "content": page_content
312
+ }
313
+ )
314
+ logger.info("#### Document object created.")
315
+
316
+ logger.info("#### Create chunk db objects.")
317
+ st.session_state.wpChunksCollection = wpChunksCollection
318
+ # Insert the chunks for the document.
319
+ for i2, chunk in enumerate(webpageChunks[i]):
320
+ chunk_uuid = wpChunksCollection.data.insert(
321
+ {
322
+ "title": title,
323
+ "chunk": chunk,
324
+ "chunk_index": i2,
325
+ "references":
326
+ {
327
+ "webpage": wpCollectionObj_uuid
328
+ }
329
+ }
330
+ )
331
+ logger.info("#### Create chunk db objects created.")
332
+ logger.info("#### Create db document and chunk objects ended.")
333
+
334
+
335
+ #######################
336
+ # Initialize the LLM. #
337
+ #######################
338
+ model_path = "/app/llama-2-7b-chat.Q4_0.gguf"
339
+ if 'llm' not in st.session_state:
340
+ logger.info("### Initializing LLM.")
341
+ llm = Llama(model_path,
342
+ #*,
343
+ n_gpu_layers=0,
344
+ split_mode=llama_cpp.LLAMA_SPLIT_MODE_LAYER,
345
+ main_gpu=0,
346
+ tensor_split=None,
347
+ vocab_only=False,
348
+ use_mmap=True,
349
+ use_mlock=False,
350
+ kv_overrides=None,
351
+ seed=llama_cpp.LLAMA_DEFAULT_SEED,
352
+ n_ctx=2048,
353
+ n_batch=512,
354
+ n_threads=8,
355
+ n_threads_batch=16,
356
+ rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED,
357
+ pooling_type=llama_cpp.LLAMA_POOLING_TYPE_UNSPECIFIED,
358
+ rope_freq_base=0.0,
359
+ rope_freq_scale=0.0,
360
+ yarn_ext_factor=-1.0,
361
+ yarn_attn_factor=1.0,
362
+ yarn_beta_fast=32.0,
363
+ yarn_beta_slow=1.0,
364
+ yarn_orig_ctx=0,
365
+ logits_all=False,
366
+ embedding=False,
367
+ offload_kqv=True,
368
+ last_n_tokens_size=64,
369
+ lora_base=None,
370
+ lora_scale=1.0,
371
+ lora_path=None,
372
+ numa=False,
373
+ chat_format="llama-2",
374
+ chat_handler=None,
375
+ draft_model=None,
376
+ tokenizer=None,
377
+ type_k=None,
378
+ type_v=None,
379
+ verbose=False
380
+ )
381
+ st.session_state.llm = llm
382
+ logger.info("### Initializing LLM completed.")
383
+ else:
384
+ llm = st.session_state.llm
385
+
386
+
387
+ #####################################################
388
+ # Get RAG data from vector db based on user prompt. #
389
+ #####################################################
390
+ def getRagData(promptText):
391
+ logger.info("#### getRagData() entered.")
392
+ ###############################################################################
393
+ # Initial the the sentence transformer and encode the query prompt.
394
+ logger.debug(f"#### Encode text query prompt to create vectors. {promptText}")
395
+ model = SentenceTransformer('/app/multi-qa-MiniLM-L6-cos-v1')
396
+ vector = model.encode(promptText)
397
+
398
+ logLevel = logger.getEffectiveLevel()
399
+ if logLevel >= logging.DEBUG:
400
+ wrks = str(vector)
401
+ logger.debug(f"### vector: {wrks}")
402
+
403
+
404
+ vectorList = []
405
+ for vec in vector:
406
+ vectorList.append(vec)
407
+
408
+ if logLevel >= logging.DEBUG:
409
+ logger.debug("#### Print vectors.")
410
+ wrks = str(vectorList)
411
+ logger.debug(f"vectorList: {wrks}")
412
+
413
+ # Fetch chunks and print chunks.
414
+ logger.debug("#### Retrieve semchunks from db using vectors from prompt.")
415
+ wpChunksCollection = st.session_state.wpChunksCollection
416
+ semChunks = wpChunksCollection.query.near_vector(
417
+ near_vector=vectorList,
418
+ distance=0.7,
419
+ limit=3
420
+ )
421
+
422
+ if logLevel >= logging.DEBUG:
423
+ wrks = str(semChunks)
424
+ logger.debug(f"### semChunks[0]: {wrks}")
425
+
426
+ # Print chunks, corresponding document and document title.
427
+ ragData = ""
428
+ logger.debug("#### Print individual retrieved chunks.")
429
+ wpCollection = st.session_state.wpCollection
430
+ for chunk in enumerate(semChunks.objects):
431
+ logger.debug(f"#### chunk: {chunk}")
432
+ ragData = ragData + chunk[1].properties['chunk'] + "\n"
433
+ webpage_uuid = chunk[1].properties['references']['webpage']
434
+ logger.debug(f"webpage_uuid: {webpage_uuid}")
435
+ wpFromChunk = wpCollection.query.fetch_object_by_id(webpage_uuid)
436
+ logger.debug(f"### wpFromChunk title: {wpFromChunk.properties['title']}")
437
+ #collection = client.collections.get("Chunks")
438
+ logger.debug("#### ragData: {ragData}")
439
+ if ragData == "" or ragData == None:
440
+ ragData = "None found."
441
+ logger.info("#### getRagData() exited.")
442
+ return ragData
443
+
444
+
445
+ #################################################
446
+ # Retrieve all RAG data for the user to review. #
447
+ #################################################
448
+ def getAllRagData():
449
+ logger.info("#### getAllRagData() entered.")
450
+
451
+ chunksCollection = client.collections.get("Chunks")
452
+ response = chunksCollection.query.fetch_objects()
453
+ wstrObjs = str(response.objects)
454
+ logger.debug(f"### response.objects: {wstrObjs}")
455
+ for o in response.objects:
456
+ wstr = o.properties
457
+ logger.debug(f"### o.properties: {wstr}")
458
+ logger.info("#### getAllRagData() exited.")
459
+ return wstrObjs
460
+
461
+ ##########################
462
+ # Display UI text areas. #
463
+ ##########################
464
+ col1, col2 = st.columns(2)
465
+ with col1:
466
+ if "sysTA" not in st.session_state:
467
+ st.session_state.sysTA = st.text_area(label="System Prompt",placeholder="You are a helpful AI assistant", help="Instruct the LLM about how to handle the user prompt.")
468
+ elif "sysTAtext" in st.session_state:
469
+ st.session_state.sysTA = st.text_area(label="System Prompt",value=st.session_state.sysTAtext,placeholder="You are a helpful AI assistant", help="Instruct the LLM about how to handle the user prompt.")
470
+ else:
471
+ st.session_state.sysTA = st.text_area(label="System Prompt",value=st.session_state.sysTA,placeholder="You are a helpful AI assistant", help="Instruct the LLM about how to handle the user prompt.")
472
+
473
+ if "userpTA" not in st.session_state:
474
+ st.session_state.userpTA = st.text_area(label="User Prompt",placeholder="Prompt the LLM with a question or instruction.", \
475
+ help="Enter a prompt for the LLM. No special characters needed.")
476
+ elif "userpTAtext" in st.session_state:
477
+ st.session_state.userpTA = st.text_area (label="User Prompt",value=st.session_state.userpTAtext,placeholder="Prompt the LLM with a question or instruction.", \
478
+ help="Enter a prompt for the LLM. No special characters needed.")
479
+ else:
480
+ st.session_state.userpTA = st.text_area(label="User Prompt",value=st.session_state.userpTA,placeholder="Prompt the LLM with a question or instruction.", \
481
+ help="Enter a prompt for the LLM. No special characters needed.")
482
+
483
+ with col2:
484
+ if "ragpTA" not in st.session_state:
485
+ st.session_state.ragpTA = st.text_area(label="RAG Response",placeholder="Output if RAG selected.",help="RAG output if enabled.")
486
+ elif "ragpTAtext" in st.session_state:
487
+ st.session_state.ragpTA = st.text_area(label="RAG Response",value=st.session_state.ragpTAtext,placeholder="Output if RAG selected.",help="RAG output if enabled.")
488
+ else:
489
+ st.session_state.ragpTA = st.text_area(label="RAG Response",value=st.session_state.ragpTA,placeholder="Output if RAG selected.",help="RAG output if enabled.")
490
+
491
+ if "rspTA" not in st.session_state:
492
+ st.session_state.rspTA = st.text_area(label="LLM Completion",placeholder="LLM completion.",help="Output area for LLM completion (response).")
493
+ elif "rspTAtext" in st.session_state:
494
+ st.session_state.rspTA = st.text_area(label="LLM Completion",value=st.session_state.rspTAtext,placeholder="LLM completion.",help="Output area for LLM completion (response).")
495
+ else:
496
+ st.session_state.rspTA = st.text_area(label="LLM Completion",value=st.session_state.rspTA,placeholder="LLM completion.",help="Output area for LLM completion (response).")
497
+
498
+
499
+ ####################################################################
500
+ # Prompt the LLM with the user's input and return the completion. #
501
+ ####################################################################
502
+ def runLLM(prompt):
503
+ logger = st.session_state.logger
504
+ logger.info("### runLLM entered.")
505
+
506
+ max_tokens = 1000
507
+ temperature = 0.3
508
+ top_p = 0.1
509
+ echoVal = True
510
+ stop = ["Q", "\n"]
511
+
512
+ modelOutput = ""
513
+ with st.spinner('Generating Completion (but slowly)...'):
514
+ modelOutput = llm.create_chat_completion(
515
+ prompt
516
+ #max_tokens=max_tokens,
517
+ #temperature=temperature,
518
+ #top_p=top_p,
519
+ #echo=echoVal,
520
+ #stop=stop,
521
+ )
522
+ result = modelOutput["choices"][0]["message"]["content"]
523
+ #result = str(modelOutput)
524
+ logger.debug(f"### llmResult: {result}")
525
+ logger.info("### runLLM exited.")
526
+ return result
527
+
528
+
529
+ ##########################################################################
530
+ # Build a llama-2 prompt from the user prompt and RAG input if selected. #
531
+ ##########################################################################
532
+ def setPrompt(pprompt,ragFlag):
533
+ logger = st.session_state.logger
534
+ logger.info(f"### setPrompt() entered. ragFlag: {ragFlag}")
535
+ if ragFlag:
536
+ ragPrompt = getRagData(pprompt)
537
+ st.session_state.ragpTA = ragPrompt
538
+ if ragFlag != "None found.":
539
+ userPrompt = pprompt + " " \
540
+ + "Also, combine the following information with information in the LLM itself. " \
541
+ + "Use the combined information to generate the response. " \
542
+ + ragPrompt + " "
543
+ else:
544
+ userPrompt = pprompt
545
+ else:
546
+ userPrompt = pprompt
547
+
548
+ fullPrompt = [
549
+ {"role": "system", "content": st.session_state.sysTA},
550
+ {"role": "user", "content": userPrompt}
551
+ ]
552
+
553
+ logger.debug(f"### userPrompt: {userPrompt}")
554
+ logger.info("setPrompt exited.")
555
+ return fullPrompt
556
+
557
+
558
+ #####################################
559
+ # Run the LLM with the user prompt. #
560
+ #####################################
561
+ def on_runLLMButton_Clicked():
562
+ logger = st.session_state.logger
563
+ logger.info("### on_runLLMButton_Clicked entered.")
564
+ st.session_state.sysTAtext = st.session_state.sysTA
565
+ logger.debug(f"sysTAtext: {st.session_state.sysTAtext}")
566
+
567
+ wrklist = setPrompt(st.session_state.userpTA,st.selectRag)
568
+ st.session_state.userpTA = wrklist[1]["content"]
569
+ logger.debug(f"userpTAtext: {st.session_state.userpTA}")
570
+
571
+ rsp = runLLM(wrklist)
572
+ st.session_state.rspTA = rsp
573
+ logger.debug(f"rspTAtext: {st.session_state.rspTA}")
574
+
575
+ logger.info("### on_runLLMButton_Clicked exited.")
576
+
577
+
578
+ #########################################
579
+ # Get all the RAG data for user review. #
580
+ #########################################
581
+ def on_getAllRagDataButton_Clicked():
582
+ logger = st.session_state.logger
583
+ logger.info("### on_getAllRagButton_Clicked entered.")
584
+ st.session_state.ragpTA = getAllRagData();
585
+ logger.info("### on_getAllRagButton_Clicked exited.")
586
+
587
+
588
+ #######################################
589
+ # Reset all the input, output fields. #
590
+ #######################################
591
+ def on_resetButton_Clicked():
592
+ logger = st.session_state.logger
593
+ logger.info("### on_resetButton_Clicked entered.")
594
+ st.session_state.sysTA = ""
595
+ st.session_state.userpTA = ""
596
+ st.session_state.ragpTA = ""
597
+ st.session_state.rspTA = ""
598
+ logger.info("### on_resetButton_Clicked exited.")
599
+
600
+
601
+ ###########################################
602
+ # Display the sidebar with a checkbox and #
603
+ # text areas. #
604
+ ###########################################
605
+ with st.sidebar:
606
+ st.selectRag = st.checkbox("Enable RAG",value=False,key="selectRag",help=None,on_change=None,args=None,kwargs=None,disabled=False,label_visibility="visible")
607
+ st.runLLMButton = st.button("Run LLM Prompt",key=None,help=None,on_click=on_runLLMButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
608
+ st.getAllRagDataButton = st.button("Get All Rag Data",key=None,help=None,on_click=on_getAllRagDataButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
609
+ st.resetButton = st.button("Reset",key=None,help=None,on_click=on_resetButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
610
+
611
+ logger.info("#### Program End Execution.")
612
+
613
+ except Exception as e:
614
+ try:
615
+ emsg = str(e)
616
+ logger.error(f"Program-wide EXCEPTION. e: {emsg}")
617
+ with open("/app/startup.log", "r") as file:
618
+ content = file.read()
619
+ logger.debug(content)
620
+ except Exception as e2:
621
+ emsg = str(e2)
622
+ logger.error(f"#### Displaying startup.log EXCEPTION. e2: {emsg}")
startup.sh CHANGED
@@ -1,5 +1,8 @@
1
  #! /bin/bash
2
 
 
 
 
3
  #####################################
4
  # Start text2vec-transformers and #
5
  # Weaviate DB to run asynchronously #
 
1
  #! /bin/bash
2
 
3
+ echo "### cat /proc/cpuinfo"
4
+ cat /proc/cpuinfo
5
+
6
  #####################################
7
  # Start text2vec-transformers and #
8
  # Weaviate DB to run asynchronously #
startup.sh.Hld02 ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /bin/bash
2
+
3
+ #####################################
4
+ # Start text2vec-transformers and #
5
+ # Weaviate DB to run asynchronously #
6
+ # and wait. #
7
+ #####################################
8
+ exec &> /app/startup.log
9
+
10
+ echo "#### startup.sh entered."
11
+ echo "### ps -ef 1"; ps -ef
12
+
13
+ # Is startup.sh already running?
14
+ echo " "
15
+ echo "### before ps and grep startup.sh"
16
+ ps -ef | grep -i startup.sh
17
+ cnt=$(ps -ef | grep -i startup.sh | wc -l)
18
+ echo "### cnt: $cnt"
19
+ if [ $cnt -gt 3 ];then
20
+ echo "#### startup.sh already running. Exiting."
21
+ exit 0
22
+ fi
23
+
24
+ # Make sure Weaviate DB directory exists.
25
+ echo "### Before mkdir -p ~/data/var/lib/weaviate"
26
+ weaviateDir=~/data/var/lib/weaviate
27
+ mkdir -p $weaviateDir
28
+ chmod -R 777 $weaviateDir
29
+
30
+
31
+ # Start tex2vec-transformers
32
+ echo "#### Before /app/text2vec-transformers"
33
+ cd /app/text2vec-transformers
34
+ /app/text2vec-transformers/bin/uvicorn app:app --host 0.0.0.0 --port 8081 --log-level warning --timeout-keep-alive 1440 &
35
+ echo "### After text2vec start. RC=$?"
36
+ cd /app
37
+
38
+
39
+ # Start the weaviate vector database server.
40
+ echo "#### Before /app/weaviate"
41
+
42
+ export AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true \
43
+ PERSISTENCE_DATA_PATH=$weaviateDir \
44
+ DEFAULT_VECTORIZER_MODULE=text2vec-transformers \
45
+ ENABLE_MODULES=text2vec-transformers \
46
+ TRANSFORMERS_INFERENCE_API=http://127.0.0.1:8081 \
47
+ LOG_LEVEL=warning \
48
+ MODULES_CLIENT_TIMEOUT=600s
49
+ /app/weaviate/weaviate --host 127.0.0.1 --port 8080 --scheme http --write-timeout 600s &
50
+ echo "### After Weaviate DB start. RC=$?"
51
+
52
+ #echo "### Before sleep 120"
53
+ #sleep 120
54
+
55
+ echo "### Before wait."
56
+ echo "### ps -ef 2: "; ps -ef
57
+ wait
58
+
59
+
60
+