MVPilgrim commited on
Commit
45bc919
·
1 Parent(s): 0d4153f

Got it running.

Browse files
Dockerfile CHANGED
@@ -30,6 +30,8 @@ RUN go mod download
30
  ###############################################################################
31
  # This image builds the weaviate server
32
  FROM build_base AS server_builder
 
 
33
  ARG TARGETARCH
34
  ARG GITHASH="unknown"
35
  ARG EXTRA_BUILD_ARGS=""
@@ -40,9 +42,11 @@ RUN CGO_ENABLED=0 GOARCH=$TARGETARCH go build $EXTRA_BUILD_ARGS \
40
 
41
  ###############################################################################
42
  #python environment and app.
43
- FROM python:3.11.5
44
  #ENTRYPOINT ["/app/startup.sh"]
45
- RUN apt update
 
 
46
  WORKDIR /app
47
 
48
  #RUN ls -l / || ls -l /lib || ls -l /usr || ls -l /usr/lib6 || echo "### An ls failed."
@@ -55,10 +59,11 @@ RUN chmod 755 /app/startup.sh
55
  COPY --from=weaviate /bin/weaviate /app/weaviate
56
  COPY --from=weaviate ./modules ./
57
 
58
- COPY --from=server_builder /lib/libc.musl-x86_64.so.1 /lib
59
- RUN mkdir -p /usr/lib64 y
60
- RUN ls -l /usr/lib64
61
- RUN ln -s /usr/lib64/libc.so.6 /usr/lib64/libc.musl-x86_64.so.1
 
62
 
63
  RUN mkdir -p /var/lib/weaviate/data y
64
  RUN chmod -R 777 /var
 
30
  ###############################################################################
31
  # This image builds the weaviate server
32
  FROM build_base AS server_builder
33
+ RUN apk add python3.11.5
34
+
35
  ARG TARGETARCH
36
  ARG GITHASH="unknown"
37
  ARG EXTRA_BUILD_ARGS=""
 
42
 
43
  ###############################################################################
44
  #python environment and app.
45
+ #FROM python:3.11.5
46
  #ENTRYPOINT ["/app/startup.sh"]
47
+ #RUN apt-get update && \
48
+ # apt-get install -y libc6 && \
49
+ # rm -rf /var/lib/apt/lists/*
50
  WORKDIR /app
51
 
52
  #RUN ls -l / || ls -l /lib || ls -l /usr || ls -l /usr/lib6 || echo "### An ls failed."
 
59
  COPY --from=weaviate /bin/weaviate /app/weaviate
60
  COPY --from=weaviate ./modules ./
61
 
62
+ #COPY --from=server_builder /lib/libc.musl-x86_64.so.1 /lib
63
+ #COPY /lib/libc.musl-x86_64.so.1 /lib
64
+ #RUN mkdir -p /usr/lib64 y
65
+ #RUN ls -l /usr/lib64
66
+ #RUN ln -s /usr/lib64/libc.so.6 /usr/lib64/libc.musl-x86_64.so.1
67
 
68
  RUN mkdir -p /var/lib/weaviate/data y
69
  RUN chmod -R 777 /var
DockerfilePythonWeaviate ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ###############################################################################
2
+ #python environment, main app and startup script.
3
+ FROM python:3.11.5
4
+ #FROM python:3.11.9-slim
5
+ #FROM python:3.11.9-alpine
6
+ #FROM python:3.11-bookworm
7
+
8
+ ENTRYPOINT ["/app/startup.sh"]
9
+ #RUN apt-get update && \
10
+ # apt-get install -y libc6 && \
11
+ # rm -rf /var/lib/apt/lists/*
12
+ WORKDIR /app
13
+
14
+ #RUN ls -l / || ls -l /lib || ls -l /usr || ls -l /usr/lib6 || echo "### An ls failed."
15
+
16
+ COPY ./requirements.txt /app/requirements.txt
17
+ COPY ./semsearch.py /app/semsearch.py
18
+ COPY ./startup.sh /app/startup.sh
19
+ RUN chmod 755 /app/startup.sh
20
+
21
+ COPY ./multi-qa-MiniLM-L6-cos-v1 /app/multi-qa-MiniLM-L6-cos-v1
22
+
23
+ RUN mkdir -p /app/inputDocs
24
+ COPY ./inputDocs/* /app/inputDocs
25
+ RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
26
+ RUN pip install https://files.pythonhosted.org/packages/13/87/e0cb08c2d4bd7d38ab63816b306c8b1e7cfdc0e59bd54462e8b0df069078/semantic_text_splitter-0.6.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
27
+ RUN pip show semantic-text-splitter
28
+
29
+ ##############################################################################
30
+ # Install Weaviate
31
+ WORKDIR /app/weaviate
32
+ RUN wget -qO- https://github.com/weaviate/weaviate/releases/download/v1.24.10/weaviate-v1.24.10-linux-amd64.tar.gz | tar -xzf -
33
+ RUN ls -al /app/weaviate
34
+
35
+ # Set environment variables for Weaviate
36
+ ENV PATH="/app:/app/weaviate-v1.24.10-linux-x86_64:${PATH}"
37
+ # Expose the Weaviate port
38
+ EXPOSE 8080
39
+
40
+ ##############################################################################
41
+ # Install text2vec-transformers
42
+ WORKDIR /app/text2vec-transformers
43
+ COPY --from=semitechnologies/transformers-inference:sentence-transformers-multi-qa-MiniLM-L6-cos-v1 /app /app/text2vec-transformers
44
+ COPY --from=semitechnologies/transformers-inference:sentence-transformers-multi-qa-MiniLM-L6-cos-v1 /usr/local/bin /app/text2vec-transformers/bin
45
+
46
+ COPY ./multi-qa-MiniLM-L6-cos-v1 /app/app/text2vec-transformers
47
+
48
+ ENV PATH="/app/text2vec-transformers:/app/text2vec-transformers/bin:${PATH}"
49
+ #RUN pip install -r requirements.txt
50
+ #RUN pip install nltk==3.8.1 optimum==1.13.2 onnxruntime==1.16.1 onnx==1.14.1
51
+ RUN ./custom_prerequisites.py
52
+
53
+ ##############################
54
+ RUN useradd -m -u 1000 user
55
+
56
+ ##############################################################################
57
+ # Start the weaviate vector database, text2vec-transformers and the semantic search app.
58
+ #RUN /app/startup.sh
59
+ CMD ["/app/startup.sh"]
DockerfileTestWvT2v ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Start with the official Weaviate image
2
+ FROM semitechnologies/weaviate:latest
3
+
4
+ # Set environment variables
5
+ ENV WEAVIATE_SERVE_MODULES text2vec-transformers
6
+
7
+ # Install Python and pip via apk, the package manager for Alpine
8
+ RUN apk update && apk add --no-cache python3 py3-pip transformers
9
+ #RUN pip3 install --no-cache-dir transformers
10
+
11
+ # Expose the default port for Weaviate
12
+ EXPOSE 8080
13
+
14
+ # Start Weaviate
15
+ CMD ["weaviate", "start"]
multi-qa-MiniLM-L6-cos-v1 ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 38845167a107b59398111f0cfb430897cf1a4639
requirements.txt CHANGED
@@ -1,15 +1,16 @@
1
- transformers
2
- torch
3
- gradio
4
- sentencepiece
5
- protobuf
6
- weaviate-client==4.5.1
7
  sentence-transformers
8
  langchain
9
  lxml
10
- huggingface-hub
11
- #semantic-text-splitter
12
- tokenizers
13
- json5
14
- regex
15
- beautifulsoup4
 
 
 
 
 
 
 
1
+ weaviate-client==4.*
 
 
 
 
 
2
  sentence-transformers
3
  langchain
4
  lxml
5
+ beautifulsoup4
6
+
7
+ transformers==4.34.1
8
+ fastapi==0.103.2
9
+ uvicorn==0.23.2
10
+ nltk==3.8.1
11
+ torch==2.0.1
12
+ sentencepiece==0.1.99
13
+ sentence-transformers==2.2.2
14
+ optimum==1.13.2
15
+ onnxruntime==1.16.1
16
+ onnx==1.14.1
requirements_Orig.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers==4.40.1
2
+ torch==2.3.0
3
+ #gradio
4
+ #sentencepiece
5
+ #protobuf
6
+ weaviate-client==4.*
7
+ sentence-transformers
8
+ langchain
9
+ lxml
10
+ #huggingface-hub
11
+ #semantic-text-splitter
12
+ #tokenizers
13
+ #json5
14
+ #regex
15
+ beautifulsoup4
16
+ uvicorn
17
+ fastapi
18
+ optimum==1.16.2
19
+ onnx
semsearch.py CHANGED
@@ -1,6 +1,5 @@
1
  import weaviate
2
- #import weaviate.classes as wvc
3
- #from weaviate.embedded import EmbeddedOptions
4
  from sentence_transformers import SentenceTransformer
5
  from langchain_community.document_loaders import BSHTMLLoader
6
  from pathlib import Path
@@ -11,9 +10,19 @@ from tokenizers import Tokenizer
11
  import json
12
  import os
13
  import re
 
 
 
 
 
 
 
 
14
 
 
 
15
  def createChunksCollection():
16
- print("#### createChunksCollection() entered.")
17
  if client.collections.exists("Chunks"):
18
  client.collections.delete("Chunks")
19
 
@@ -62,11 +71,13 @@ def createChunksCollection():
62
  }
63
  ]
64
  }
65
-
66
  return(client.collections.create_from_dict(class_obj))
67
 
 
 
 
68
  def createWebpageCollection():
69
- print("#### createWebpageCollection() entered.")
70
  if client.collections.exists("Documents"):
71
  client.collections.delete("Documents")
72
 
@@ -84,11 +95,6 @@ def createWebpageCollection():
84
  "distance": "cosine",
85
  },
86
  "properties": [
87
- #{
88
- # "docname": "fdsa",
89
- # "dataType": ["text"],
90
- # "description": "Name of document"
91
- #},
92
  {
93
  "name": "title",
94
  "dataType": ["text"],
@@ -121,61 +127,43 @@ def createWebpageCollection():
121
  }
122
  ]
123
  }
124
-
125
  return(client.collections.create_from_dict(class_obj))
126
 
127
 
128
- #
129
  # MAINLINE
130
  #
 
 
131
  #pathString = "/Users/660565/KPSAllInOne/ProgramFilesX86/WebCopy/DownloadedWebSites/LLMPOC_HTML"
132
- pathString = "inputDocs"
133
  chunks = []
134
  webpageDocNames = []
135
- #webpageChunksClassesNames = []
136
  page_contentArray = []
137
  webpageChunks = []
138
  webpageTitles = []
139
  webpageChunksDocNames = []
140
 
141
- #client = weaviate.WeaviateClient(
142
- # embedded_options=EmbeddedOptions(
143
- # additional_env_vars={
144
- # "ENABLE_MODULES": "backup-filesystem,text2vec-transformers",
145
- # "BACKUP_FILESYSTEM_PATH": "/tmp/backups",
146
- # "PERSISTENCE_DATA_PATH": "/var/lib/weaviate",
147
- # "DEFAULT_VECTORIZER_MODULE": "text2vec-transformers"
148
- # #"TRANSFORMERS_INFERENCE_API": "http://huggingface.co/spaces/MVPilgrim/WeaviateDB:8080"
149
- #
150
- # }
151
- # )
152
- #)
153
-
154
- #client = weaviate.connect_to_custom(
155
- # #http_host="http://huggingface.co/spaces/MVPilgrim/WeaviateDB",
156
- # http_host="http://weaviate",
157
- # http_port=8080,
158
- # http_secure=False,
159
- # #grpc_host="huggingface.co",
160
- # grpc_host="127.0.0.1",
161
- # grpc_port=50051,
162
- # grpc_secure=False
163
- # #auth_credentials=AuthApiKey(weaviate_key), # `weaviate_key`: your Weaviate API key
164
- #)
165
 
166
- client = weaviate.Client(
167
- url="http://localhost:8080"
 
 
 
 
 
 
 
 
168
  )
169
-
170
- #client = weaviate.connect_to_local(
171
- # #cluster_url="http://localhost:8080"
172
- #)
173
- print("#### client: ",client)
174
-
175
  client.connect()
176
 
 
 
 
 
177
  for filename in os.listdir(pathString):
178
- print(filename)
179
  path = Path(pathString + "/" + filename)
180
  filename = filename.rstrip(".html")
181
  webpageDocNames.append(filename)
@@ -185,38 +173,43 @@ for filename in os.listdir(pathString):
185
  title = htmlData[0].metadata['title']
186
  page_content = htmlData[0].page_content
187
 
188
- # Clean data. Remove multiple newlines, etc.
189
  page_content = re.sub(r'\n+', '\n',page_content)
190
 
191
  page_contentArray.append(page_content);
192
  webpageTitles.append(title)
193
- #htmlDocument = htmlData[0]
194
  max_tokens = 1000
195
  tokenizer = Tokenizer.from_pretrained("bert-base-uncased")
 
196
  splitter = HuggingFaceTextSplitter(tokenizer, trim_chunks=True)
197
  chunksOnePage = splitter.chunks(page_content, chunk_capacity=50)
198
 
199
  chunks = []
200
  for chnk in chunksOnePage:
201
- #print("\n\n#### chnk: ",chnk)
202
  chunks.append(chnk)
203
- #print("chunks: ",chunks)
204
  webpageChunks.append(chunks)
205
  webpageChunksDocNames.append(filename + "Chunks")
206
 
207
- print("### filename, title: ",filename,",",title)
208
- print("### webpageDocNames: ",webpageDocNames)
 
209
 
 
 
210
  wpCollection = createWebpageCollection()
211
  wpChunkCollection = createChunksCollection()
212
 
 
 
 
213
  for i, className in enumerate(webpageDocNames):
214
  title = webpageTitles[i]
215
- print("## className, title: ",className,",",title)
216
  # Create Webpage Object
217
  page_content = page_contentArray[i]
218
- #print("\n#### page_content: ",page_content)
219
-
220
  wpCollectionObj_uuid = wpCollection.data.insert(
221
  {
222
  "name": className,
@@ -225,8 +218,8 @@ for i, className in enumerate(webpageDocNames):
225
  }
226
  )
227
 
 
228
  for i2, chunk in enumerate(webpageChunks[i]):
229
- #print("#### chunk: ",chunk)
230
  chunk_uuid = wpChunkCollection.data.insert(
231
  {
232
  "title": title,
@@ -238,55 +231,44 @@ for i, className in enumerate(webpageDocNames):
238
  }
239
  }
240
  )
241
- #print("### chunk_index,chunk: ",i2,",",chunk[0:20])
242
 
243
- #text = "List the main capabilities of artificial intelligence."
244
- #text = "List three of the greatest Norwegian authors."
245
- #text = "turkey burgers golden fried with lots of mayonaise"
246
  text = "human-made computer cognitive ability"
247
- #text = "literature authors"
248
- #text = "artifical intelligence"
249
 
250
 
251
- model = SentenceTransformer('../multi-qa-MiniLM-L6-cos-v1')
 
 
 
 
252
  vector = model.encode(text)
253
- #print("#### vector: ",vector[0])
254
  vectorList = []
255
 
 
256
  for vec in vector:
257
  vectorList.append(vec)
258
- print("vectorList: ",vectorList[2])
259
 
 
 
260
  semChunks = wpChunkCollection.query.near_vector(
261
  near_vector=vectorList,
262
  distance=0.7,
263
  limit=3
264
  )
265
- print("### semChunks[0]: ",semChunks)
266
- #print("### semChunks.objects[0]: ",semChunks.objects[0])
267
 
 
 
268
  for chunk in enumerate(semChunks.objects):
269
- print("\n\n#### chunk: ",chunk)
270
- #webpage_uuid = chunk.properties['references']['webpage']
271
- #webpage_uuid = chunk.references.webpage
272
  webpage_uuid = chunk[1].properties['references']['webpage']
273
- print("\nwebpage_uuid: ",webpage_uuid)
274
  wpFromChunk = wpCollection.query.fetch_object_by_id(webpage_uuid)
275
- print("\n\n### wpFromChunk title: ",wpFromChunk.properties['title'])
276
 
 
 
277
 
278
- #print("response: ",response)
279
-
280
- if False:
281
- client = weaviate.connect_to_local(
282
- #cluster_url="http://localhost:8080"
283
- )
284
-
285
- for item in wpCollection.iterator():
286
- print(print("\n## webpage collection: ",item.uuid, item.properties))
287
-
288
- for item in wpChunkCollection.iterator():
289
- print(print("\n## chunk collection: ",item.uuid, item.properties))
290
-
291
- client.close()
292
-
 
1
  import weaviate
2
+
 
3
  from sentence_transformers import SentenceTransformer
4
  from langchain_community.document_loaders import BSHTMLLoader
5
  from pathlib import Path
 
10
  import json
11
  import os
12
  import re
13
+ import logging
14
+
15
+ weaviate_logger = logging.getLogger("httpx")
16
+ weaviate_logger.setLevel(logging.WARNING)
17
+
18
+ logger = logging.getLogger(__name__)
19
+ logging.basicConfig(level=logging.INFO)
20
+
21
 
22
+ #################################################################
23
+ # Create the chunks collection for the Weaviate database.
24
  def createChunksCollection():
25
+ logger.info("#### createChunksCollection() entered.")
26
  if client.collections.exists("Chunks"):
27
  client.collections.delete("Chunks")
28
 
 
71
  }
72
  ]
73
  }
 
74
  return(client.collections.create_from_dict(class_obj))
75
 
76
+
77
+ #####################################################################
78
+ # Create the document collection for the Weaviate database.
79
  def createWebpageCollection():
80
+ logger.info("#### createWebpageCollection() entered.")
81
  if client.collections.exists("Documents"):
82
  client.collections.delete("Documents")
83
 
 
95
  "distance": "cosine",
96
  },
97
  "properties": [
 
 
 
 
 
98
  {
99
  "name": "title",
100
  "dataType": ["text"],
 
127
  }
128
  ]
129
  }
 
130
  return(client.collections.create_from_dict(class_obj))
131
 
132
 
133
+ ######################################################################
134
  # MAINLINE
135
  #
136
+ logger.info("#### MAINLINE ENTERED.")
137
+
138
  #pathString = "/Users/660565/KPSAllInOne/ProgramFilesX86/WebCopy/DownloadedWebSites/LLMPOC_HTML"
139
+ pathString = "/app/inputDocs"
140
  chunks = []
141
  webpageDocNames = []
 
142
  page_contentArray = []
143
  webpageChunks = []
144
  webpageTitles = []
145
  webpageChunksDocNames = []
146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
+ ######################################################
149
+ # Connect to the Weaviate vector database.
150
+ logger.info("#### Create Weaviate db client connection.")
151
+ client = weaviate.connect_to_custom(
152
+ http_host="127.0.0.1",
153
+ http_port=8080,
154
+ http_secure=False,
155
+ grpc_host="127.0.0.1",
156
+ grpc_port=50051,
157
+ grpc_secure=False
158
  )
 
 
 
 
 
 
159
  client.connect()
160
 
161
+ #######################################################
162
+ # Read each text input file, parse it into a document,
163
+ # chunk it, collect chunks and document name.
164
+ logger.info("#### Read and chunk input text files.")
165
  for filename in os.listdir(pathString):
166
+ logger.info(filename)
167
  path = Path(pathString + "/" + filename)
168
  filename = filename.rstrip(".html")
169
  webpageDocNames.append(filename)
 
173
  title = htmlData[0].metadata['title']
174
  page_content = htmlData[0].page_content
175
 
176
+ # Clean data. Remove multiple newlines, etc.
177
  page_content = re.sub(r'\n+', '\n',page_content)
178
 
179
  page_contentArray.append(page_content);
180
  webpageTitles.append(title)
 
181
  max_tokens = 1000
182
  tokenizer = Tokenizer.from_pretrained("bert-base-uncased")
183
+ logger.debug(f"### tokenizer: {tokenizer}")
184
  splitter = HuggingFaceTextSplitter(tokenizer, trim_chunks=True)
185
  chunksOnePage = splitter.chunks(page_content, chunk_capacity=50)
186
 
187
  chunks = []
188
  for chnk in chunksOnePage:
189
+ logger.debug(f"#### chnk in file: {chnk}")
190
  chunks.append(chnk)
191
+ logger.debug(f"chunks: {chunks}")
192
  webpageChunks.append(chunks)
193
  webpageChunksDocNames.append(filename + "Chunks")
194
 
195
+ logger.debug(f"### filename, title: {filename}, {title}")
196
+
197
+ logger.debug(f"### webpageDocNames: {webpageDocNames}")
198
 
199
+ ######################################################
200
+ # Create database webpage and chunks collections.
201
  wpCollection = createWebpageCollection()
202
  wpChunkCollection = createChunksCollection()
203
 
204
+ ###########################################################
205
+ # Create document and chunks objects in the database.
206
+ logger.info("#### Create page/doc and chunk db objects.")
207
  for i, className in enumerate(webpageDocNames):
208
  title = webpageTitles[i]
209
+ logger.debug(f"## className, title: {className}, {title}")
210
  # Create Webpage Object
211
  page_content = page_contentArray[i]
212
+ # Insert the document.
 
213
  wpCollectionObj_uuid = wpCollection.data.insert(
214
  {
215
  "name": className,
 
218
  }
219
  )
220
 
221
+ # Insert the chunks for the document.
222
  for i2, chunk in enumerate(webpageChunks[i]):
 
223
  chunk_uuid = wpChunkCollection.data.insert(
224
  {
225
  "title": title,
 
231
  }
232
  }
233
  )
 
234
 
235
+ ###############################################################################
236
+ # text contains prompt for vector DB.
 
237
  text = "human-made computer cognitive ability"
 
 
238
 
239
 
240
+ ###############################################################################
241
+ # Initial the the sentence transformer and encode the query prompt.
242
+ logger.info(f"#### Encode text query prompt to create vectors. {text}")
243
+ model = SentenceTransformer('/app/multi-qa-MiniLM-L6-cos-v1')
244
+
245
  vector = model.encode(text)
 
246
  vectorList = []
247
 
248
+ logger.debug("#### Print vectors.")
249
  for vec in vector:
250
  vectorList.append(vec)
251
+ logger.debug(f"vectorList: {vectorList[2]}")
252
 
253
+ # Fetch chunks and print chunks.
254
+ logger.info("#### Retrieve semchunks from db using vectors from prompt.")
255
  semChunks = wpChunkCollection.query.near_vector(
256
  near_vector=vectorList,
257
  distance=0.7,
258
  limit=3
259
  )
260
+ logger.debug(f"### semChunks[0]: {semChunks}")
 
261
 
262
+ # Print chunks, corresponding document and document title.
263
+ logger.info("#### Print individual retrieved chunks.")
264
  for chunk in enumerate(semChunks.objects):
265
+ logger.info(f"#### chunk: {chunk}")
 
 
266
  webpage_uuid = chunk[1].properties['references']['webpage']
267
+ logger.info(f"webpage_uuid: {webpage_uuid}")
268
  wpFromChunk = wpCollection.query.fetch_object_by_id(webpage_uuid)
269
+ logger.info(f"### wpFromChunk title: {wpFromChunk.properties['title']}")
270
 
271
+ logger.info("#### Closing client db connection.")
272
+ client.close()
273
 
274
+ logger.info("#### Program terminating.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
startup.sh CHANGED
@@ -1,20 +1,32 @@
1
  #! /bin/bash
2
 
3
  echo "#### startup.sh entered."
4
- ls -l /
5
- ls -l /lib
6
- ls -l /usr/lib64
7
 
8
- echo "#### ldd weaviate"
9
- ldd /app/weaviate
 
 
10
 
 
 
11
  echo "#### Before /app/weaviate"
12
- /app/weaviate --host 127.0.0.1 --port 8080 --scheme http &
 
 
 
 
 
 
13
 
14
  echo "#### Before sleep."
15
- sleep 10
16
 
17
  echo "#### Before /app/semsearch.py"
18
  python /app/semsearch.py &
19
 
20
- wait
 
 
 
1
  #! /bin/bash
2
 
3
  echo "#### startup.sh entered."
4
+ echo "### ls -l /app"; ls -l /app
5
+ echo "### ls -l /app/weaviate"; ls -l /app/weaviate
6
+ echo "### ls -l /app/text2vec-transformers"; ls -l /app/text2vec-transformers
7
 
8
+ ################################################
9
+ # Start tex2vec-transformers
10
+ echo "#### Before /app/text2vec-transformers"
11
+ /app/text2vec-transformers/bin/uvicorn app:app --host 0.0.0.0 --port 8081 --log-level warning &
12
 
13
+ ###############################################
14
+ # Start the weaviate vector database server.
15
  echo "#### Before /app/weaviate"
16
+ export AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true \
17
+ PERSISTENCE_DATA_PATH=/var/lib/weaviate \
18
+ DEFAULT_VECTORIZER_MODULE=text2vec-transformers \
19
+ ENABLE_MODULES=text2vec-transformers \
20
+ TRANSFORMERS_INFERENCE_API=http://127.0.0.1:8081 \
21
+ LOG_LEVEL=warning
22
+ /app/weaviate/weaviate --host 127.0.0.1 --port 8080 --scheme http &
23
 
24
  echo "#### Before sleep."
25
+ sleep 60
26
 
27
  echo "#### Before /app/semsearch.py"
28
  python /app/semsearch.py &
29
 
30
+ wait
31
+
32
+