MVPilgrim commited on
Commit
8ca4dcf
·
1 Parent(s): 43e405a
Files changed (2) hide show
  1. Dockerfile +5 -1
  2. app.py +26 -10
Dockerfile CHANGED
@@ -61,15 +61,19 @@ ENV PATH="/usr/bin/local:/app/text2vec-transformers:/app/text2vec-transformers/b
61
  #RUN pip install nltk==3.8.1 optimum==1.13.2 onnxruntime==1.16.1 onnx==1.14.1
62
  RUN ./custom_prerequisites.py
63
 
64
-
65
  ##############################
66
  RUN useradd -m -u 1000 user
67
 
 
 
 
 
68
  #############################################
69
  # Specify /data volume.
70
  #VOLUME /data
71
 
72
  WORKDIR /app
 
73
 
74
  ##############################################################################
75
  # Start the weaviate vector database, text2vec-transformers and the semantic search app.
 
61
  #RUN pip install nltk==3.8.1 optimum==1.13.2 onnxruntime==1.16.1 onnx==1.14.1
62
  RUN ./custom_prerequisites.py
63
 
 
64
  ##############################
65
  RUN useradd -m -u 1000 user
66
 
67
+ chmod -R 755 /app
68
+ chown -R user /app
69
+ chgrp -R user /app
70
+
71
  #############################################
72
  # Specify /data volume.
73
  #VOLUME /data
74
 
75
  WORKDIR /app
76
+ USER user
77
 
78
  ##############################################################################
79
  # Start the weaviate vector database, text2vec-transformers and the semantic search app.
app.py CHANGED
@@ -36,7 +36,7 @@ try:
36
 
37
 
38
  def runStartup():
39
- logger.info("### Running startup.sh")
40
  result = ""
41
  try:
42
  #result = subprocess.run("/app/startup.sh",shell=False,capture_output=None,text=None,timeout=300)
@@ -55,7 +55,7 @@ try:
55
  except Exception as e2:
56
  emsg = str(e2)
57
  logger.error(f"#### Displaying startup.log EXCEPTION. e2: {emsg}")
58
- logger.info("### Running startup.sh complete")
59
  if 'runStartup' not in st.session_state:
60
  st.session_state.runStartup = True
61
  runStartup()
@@ -69,8 +69,10 @@ try:
69
 
70
  # Function to load the CSS file
71
  def load_css(file_name):
 
72
  with open(file_name) as f:
73
  st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
 
74
 
75
  # Load the custom CSS
76
  if 'load_css' not in st.session_state:
@@ -93,7 +95,7 @@ try:
93
  # Connect to the Weaviate vector database.
94
  #if 'client' not in st.session_state:
95
  if 'client' not in st.session_state:
96
- logger.info("#### Create Weaviate db client connection.")
97
  client = weaviate.WeaviateClient(
98
  connection_params=ConnectionParams.from_params(
99
  http_host="localhost",
@@ -109,6 +111,7 @@ try:
109
  )
110
  client.connect()
111
  st.session_state.client = client
 
112
  else:
113
  client = st.session_state.client
114
 
@@ -116,8 +119,8 @@ try:
116
  #######################################################
117
  # Read each text input file, parse it into a document,
118
  # chunk it, collect chunks and document name.
119
- if not client.collections.exists("Documents") or not client.collections.exists("Documes") :
120
- logger.info("#### Read and chunk input text files.")
121
  for filename in os.listdir(pathString):
122
  logger.info(filename)
123
  path = Path(pathString + "/" + filename)
@@ -150,6 +153,7 @@ try:
150
 
151
  logger.debug(f"### filename, title: {filename}, {title}")
152
  logger.debug(f"### webpageDocNames: {webpageDocNames}")
 
153
 
154
 
155
 
@@ -158,7 +162,7 @@ try:
158
  #wpCollection = createWebpageCollection()
159
  #wpChunkCollection = createChunksCollection()
160
  if not client.collections.exists("Documents"):
161
- logger.info("#### createWebpageCollection() entered.")
162
  #client.collections.delete("Documents")
163
  class_obj = {
164
  "class": "Documents",
@@ -207,10 +211,11 @@ try:
207
  ]
208
  }
209
  wpCollection = client.collections.create_from_dict(class_obj)
 
210
 
211
 
212
  if not client.collections.exists("Chunks"):
213
- logger.info("#### createChunksCollection() entered.")
214
  #client.collections.delete("Chunks")
215
  class_obj = {
216
  "class": "Chunks",
@@ -258,12 +263,13 @@ try:
258
  ]
259
  }
260
  wpChunkCollection = client.collections.create_from_dict(class_obj)
 
261
 
262
 
263
  ###########################################################
264
  # Create document and chunks objects in the database.
265
  if not client.collections.exists("Documents") :
266
- logger.info("#### Create page/doc db objects.")
267
  for i, className in enumerate(webpageDocNames):
268
  title = webpageTitles[i]
269
  logger.debug(f"## className, title: {className}, {title}")
@@ -277,9 +283,10 @@ try:
277
  "content": page_content
278
  }
279
  )
 
280
 
281
  if not client.collections.exists("Chunks") :
282
- logger.info("#### Create chunk db objects.")
283
  # Insert the chunks for the document.
284
  for i2, chunk in enumerate(webpageChunks[i]):
285
  chunk_uuid = wpChunkCollection.data.insert(
@@ -293,12 +300,14 @@ try:
293
  }
294
  }
295
  )
 
296
 
297
 
298
  #################################################################
299
  # Initialize the LLM.
300
  model_path = "/app/llama-2-7b-chat.Q4_0.gguf"
301
  if 'llm' not in st.session_state:
 
302
  llm = Llama(model_path,
303
  #*,
304
  n_gpu_layers=0,
@@ -340,6 +349,7 @@ try:
340
  verbose=True
341
  )
342
  st.session_state.llm = llm
 
343
  else:
344
  llm = st.session_state.llm
345
 
@@ -415,6 +425,7 @@ try:
415
  st.session_state.rspTA = st.text_area(label="rspTA",value=st.session_state.rspTA)
416
 
417
  def runLLM(prompt):
 
418
  max_tokens = 1000
419
  temperature = 0.3
420
  top_p = 0.1
@@ -430,6 +441,7 @@ try:
430
  stop=stop,
431
  )
432
  result = modelOutput["choices"][0]["text"].strip()
 
433
  return(result)
434
 
435
  def setPrompt(pprompt,ragFlag):
@@ -445,11 +457,13 @@ try:
445
  else:
446
  userPrompt = pprompt
447
  #prompt = f""" <s> [INST] <<SYS>> {systemTextArea.value} </SYS>> Q: {userPrompt} A: [/INST]"""
 
448
  return userPrompt
449
 
450
 
451
  def on_submitButton_clicked():
452
  logger = st.session_state.logger
 
453
  logger.debug("\n### on_submitButton_clicked")
454
  st.session_state.sysTAtext = st.session_state.sysTA
455
  logger.info(f"sysTAtext: {st.session_state.sysTAtext}")
@@ -461,13 +475,15 @@ try:
461
  st.session_state.rspTAtext = runLLM(st.session_state.userpTAtext)
462
  st.session_state.rspTA = st.session_state.rspTAtext
463
  logger.info(f"rspTAtext: {st.session_state.rspTAtext}")
 
 
464
 
465
 
466
  with st.sidebar:
467
  st.selectRag = st.checkbox("Enable Query With RAG",value=False,key="selectRag",help=None,on_change=None,args=None,kwargs=None,disabled=False,label_visibility="visible")
468
  st.submitButton = st.button("Run LLM Query",key=None,help=None,on_click=on_submitButton_clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
469
 
470
- logger.info("#### semsearch.py terminating.")
471
  except Exception as e:
472
  try:
473
  emsg = str(e)
 
36
 
37
 
38
  def runStartup():
39
+ logger.debug("### Running startup.sh")
40
  result = ""
41
  try:
42
  #result = subprocess.run("/app/startup.sh",shell=False,capture_output=None,text=None,timeout=300)
 
55
  except Exception as e2:
56
  emsg = str(e2)
57
  logger.error(f"#### Displaying startup.log EXCEPTION. e2: {emsg}")
58
+ logger.debug("### Running startup.sh complete")
59
  if 'runStartup' not in st.session_state:
60
  st.session_state.runStartup = True
61
  runStartup()
 
69
 
70
  # Function to load the CSS file
71
  def load_css(file_name):
72
+ logger.debug("#### load_css entered.")
73
  with open(file_name) as f:
74
  st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
75
+ logger.debug("#### load_css exited.")
76
 
77
  # Load the custom CSS
78
  if 'load_css' not in st.session_state:
 
95
  # Connect to the Weaviate vector database.
96
  #if 'client' not in st.session_state:
97
  if 'client' not in st.session_state:
98
+ logger.debug("#### Create Weaviate db client connection.")
99
  client = weaviate.WeaviateClient(
100
  connection_params=ConnectionParams.from_params(
101
  http_host="localhost",
 
111
  )
112
  client.connect()
113
  st.session_state.client = client
114
+ logger.debug("#### Create Weaviate db client connection exited.")
115
  else:
116
  client = st.session_state.client
117
 
 
119
  #######################################################
120
  # Read each text input file, parse it into a document,
121
  # chunk it, collect chunks and document name.
122
+ if not client.collections.exists("Documents") or not client.collections.exists("Chunks) :
123
+ logger.debug("#### Read and chunk input text files.")
124
  for filename in os.listdir(pathString):
125
  logger.info(filename)
126
  path = Path(pathString + "/" + filename)
 
153
 
154
  logger.debug(f"### filename, title: {filename}, {title}")
155
  logger.debug(f"### webpageDocNames: {webpageDocNames}")
156
+ logger.debug("#### Read and chunk input text files exited.")
157
 
158
 
159
 
 
162
  #wpCollection = createWebpageCollection()
163
  #wpChunkCollection = createChunksCollection()
164
  if not client.collections.exists("Documents"):
165
+ logger.debug("#### createWebpageCollection() entered.")
166
  #client.collections.delete("Documents")
167
  class_obj = {
168
  "class": "Documents",
 
211
  ]
212
  }
213
  wpCollection = client.collections.create_from_dict(class_obj)
214
+ logger.debug("#### createWebpageCollection() exited.")
215
 
216
 
217
  if not client.collections.exists("Chunks"):
218
+ logger.debug("#### createChunksCollection() entered.")
219
  #client.collections.delete("Chunks")
220
  class_obj = {
221
  "class": "Chunks",
 
263
  ]
264
  }
265
  wpChunkCollection = client.collections.create_from_dict(class_obj)
266
+ logger.debug("#### createChunksCollection() exited.")
267
 
268
 
269
  ###########################################################
270
  # Create document and chunks objects in the database.
271
  if not client.collections.exists("Documents") :
272
+ logger.debug("#### Create page/doc db objects.")
273
  for i, className in enumerate(webpageDocNames):
274
  title = webpageTitles[i]
275
  logger.debug(f"## className, title: {className}, {title}")
 
283
  "content": page_content
284
  }
285
  )
286
+ logger.debug("#### Create page/doc/db/objects exited.")
287
 
288
  if not client.collections.exists("Chunks") :
289
+ logger.debug("#### Create chunk db objects.")
290
  # Insert the chunks for the document.
291
  for i2, chunk in enumerate(webpageChunks[i]):
292
  chunk_uuid = wpChunkCollection.data.insert(
 
300
  }
301
  }
302
  )
303
+ logger.debug("#### Create chunk db objects exited.")
304
 
305
 
306
  #################################################################
307
  # Initialize the LLM.
308
  model_path = "/app/llama-2-7b-chat.Q4_0.gguf"
309
  if 'llm' not in st.session_state:
310
+ logger.debug("### Initializing LLM.")
311
  llm = Llama(model_path,
312
  #*,
313
  n_gpu_layers=0,
 
349
  verbose=True
350
  )
351
  st.session_state.llm = llm
352
+ logger.debug("### Initializing LLM exited.")
353
  else:
354
  llm = st.session_state.llm
355
 
 
425
  st.session_state.rspTA = st.text_area(label="rspTA",value=st.session_state.rspTA)
426
 
427
  def runLLM(prompt):
428
+ echo "### runLLM entered."
429
  max_tokens = 1000
430
  temperature = 0.3
431
  top_p = 0.1
 
441
  stop=stop,
442
  )
443
  result = modelOutput["choices"][0]["text"].strip()
444
+ logger.debug("### runLLM exited.")
445
  return(result)
446
 
447
  def setPrompt(pprompt,ragFlag):
 
457
  else:
458
  userPrompt = pprompt
459
  #prompt = f""" <s> [INST] <<SYS>> {systemTextArea.value} </SYS>> Q: {userPrompt} A: [/INST]"""
460
+ logger.debug("setPrompt exited.")
461
  return userPrompt
462
 
463
 
464
  def on_submitButton_clicked():
465
  logger = st.session_state.logger
466
+ logger.debug("### on_submitButton_clicked entered.")
467
  logger.debug("\n### on_submitButton_clicked")
468
  st.session_state.sysTAtext = st.session_state.sysTA
469
  logger.info(f"sysTAtext: {st.session_state.sysTAtext}")
 
475
  st.session_state.rspTAtext = runLLM(st.session_state.userpTAtext)
476
  st.session_state.rspTA = st.session_state.rspTAtext
477
  logger.info(f"rspTAtext: {st.session_state.rspTAtext}")
478
+
479
+ logger.debug("### on_submitButton_clicked exited.")
480
 
481
 
482
  with st.sidebar:
483
  st.selectRag = st.checkbox("Enable Query With RAG",value=False,key="selectRag",help=None,on_change=None,args=None,kwargs=None,disabled=False,label_visibility="visible")
484
  st.submitButton = st.button("Run LLM Query",key=None,help=None,on_click=on_submitButton_clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
485
 
486
+ logger.info("#### semsearch.py end of code.")
487
  except Exception as e:
488
  try:
489
  emsg = str(e)