MVPilgrim commited on
Commit
2430b06
·
1 Parent(s): 00cc465
Files changed (2) hide show
  1. app.py +86 -63
  2. startup.sh +6 -52
app.py CHANGED
@@ -47,16 +47,12 @@ try:
47
  def runStartup():
48
  logger.info("### Running startup.sh")
49
  try:
50
- #result = subprocess.run("/app/startup.sh",shell=False,capture_output=None,
51
- # text=None,timeout=300)
52
- #logger.info(f"startup.sh stdout: {result.stdout}")
53
- #logger.info(f"startup.sh stderr: {result.stderr}")
54
- #logger.info(f"Return code: {result.returncode}")
55
  subprocess.Popen(["/app/startup.sh"])
 
56
  time.sleep(180)
57
  except Exception as e:
58
  emsg = str(e)
59
- logger.ERROR(f"subprocess.run EXCEPTION. e: {emsg}")
60
  try:
61
  with open("/app/startup.log", "r") as file:
62
  content = file.read()
@@ -70,10 +66,9 @@ try:
70
  runStartup()
71
 
72
 
73
-
74
-
75
-
76
- # Function to load the CSS file
77
  def load_css(file_name):
78
  logger.info("#### load_css entered.")
79
  with open(file_name) as f:
@@ -83,11 +78,10 @@ try:
83
  load_css(".streamlit/main.css")
84
  st.session_state.load_css = True
85
 
86
- st.markdown("<h1 style='text-align: center; color: #666666;'>RAG Vector Database</h1>",
 
87
  unsafe_allow_html=True)
88
- st.markdown("<h1 style='text-align: center; color: #666666;'>RAG Vector Database</h1>",
89
- unsafe_allow_html=True)
90
-
91
  pathString = "/app/inputDocs"
92
  chunks = []
93
  webpageDocNames = []
@@ -96,6 +90,7 @@ try:
96
  webpageTitles = []
97
  webpageChunksDocNames = []
98
 
 
99
  ############################################
100
  # Connect to the Weaviate vector database. #
101
  ############################################
@@ -126,7 +121,7 @@ try:
126
  # chunk it, collect chunks and document names. #
127
  ########################################################
128
  if not client.collections.exists("Documents") or not client.collections.exists("Chunks") :
129
- logger.info("#### Read and chunk input text files.")
130
  for filename in os.listdir(pathString):
131
  logger.debug(filename)
132
  path = Path(pathString + "/" + filename)
@@ -159,7 +154,7 @@ try:
159
 
160
  logger.info(f"### filename, title: {filename}, {title}")
161
  logger.info(f"### webpageDocNames: {webpageDocNames}")
162
- logger.info("#### Read and chunk input text files exited.")
163
 
164
 
165
  #############################################################
@@ -167,8 +162,7 @@ try:
167
  # Each chunk schema points to its corresponding document. #
168
  #############################################################
169
  if not client.collections.exists("Documents"):
170
- logger.info("#### createWebpageCollection() entered.")
171
- #client.collections.delete("Documents")
172
  class_obj = {
173
  "class": "Documents",
174
  "description": "For first attempt at loading a Weviate database.",
@@ -217,14 +211,14 @@ try:
217
  }
218
  wpCollection = client.collections.create_from_dict(class_obj)
219
  st.session_state.wpCollection = wpCollection
220
- logger.info("#### createWebpageCollection() exited.")
221
  else:
222
  wpCollection = st.session_state.wpCollection
223
  st.session_state.wpCollection = wpCollection
224
 
225
-
226
  if not client.collections.exists("Chunks"):
227
- logger.info("#### createChunksCollection() entered.")
228
  #client.collections.delete("Chunks")
229
  class_obj = {
230
  "class": "Chunks",
@@ -273,7 +267,7 @@ try:
273
  }
274
  wpChunksCollection = client.collections.create_from_dict(class_obj)
275
  st.session_state.wpChunksCollection = wpChunksCollection
276
- logger.info("#### createChunksCollection() exited.")
277
  else:
278
  wpChunksCollection = client.collections.get("Chunks")
279
  st.session_state.wpChunksCollection = wpChunksCollection
@@ -283,9 +277,10 @@ try:
283
  # Create the actual document and chunks objects in the database. #
284
  ##################################################################
285
  if 'dbObjsCreated' not in st.session_state:
286
- logger.info("#### Create db objects.")
287
  st.session_state.dbObjsCreated = True
288
  for i, className in enumerate(webpageDocNames):
 
289
  title = webpageTitles[i]
290
  logger.debug(f"## className, title: {className}, {title}")
291
  # Create Webpage Object
@@ -298,7 +293,7 @@ try:
298
  "content": page_content
299
  }
300
  )
301
- logger.info("#### page/doc/db/objects created.")
302
 
303
  logger.info("#### Create chunk db objects.")
304
  st.session_state.wpChunksCollection = wpChunksCollection
@@ -316,7 +311,7 @@ try:
316
  }
317
  )
318
  logger.info("#### Create chunk db objects created.")
319
- logger.info("#### db objects created.")
320
 
321
 
322
  #######################
@@ -366,30 +361,34 @@ try:
366
  verbose=False
367
  )
368
  st.session_state.llm = llm
369
- logger.info("### Initializing LLM exited.")
370
  else:
371
  llm = st.session_state.llm
372
 
 
 
 
 
373
  def getRagData(promptText):
374
  logger.info("#### getRagData() entered.")
375
  ###############################################################################
376
  # Initial the the sentence transformer and encode the query prompt.
377
- logger.info(f"#### Encode text query prompt to create vectors. {promptText}")
378
  model = SentenceTransformer('/app/multi-qa-MiniLM-L6-cos-v1')
379
 
380
  vector = model.encode(promptText)
381
  wrks = str(vector)
382
- logger.info(f"### vector: {wrks}")
383
  vectorList = []
384
 
385
- logger.info("#### Print vectors.")
386
  for vec in vector:
387
  vectorList.append(vec)
388
  wrks = str(vectorList)
389
- logger.info(f"vectorList: {wrks}")
390
 
391
  # Fetch chunks and print chunks.
392
- logger.info("#### Retrieve semchunks from db using vectors from prompt.")
393
  wpChunksCollection = st.session_state.wpChunksCollection
394
  semChunks = wpChunksCollection.query.near_vector(
395
  near_vector=vectorList,
@@ -397,25 +396,28 @@ try:
397
  limit=3
398
  )
399
  wrks = str(semChunks)
400
- logger.info(f"### semChunks[0]: {wrks}")
401
 
402
  # Print chunks, corresponding document and document title.
403
  ragData = ""
404
- logger.info("#### Print individual retrieved chunks.")
405
  wpCollection = st.session_state.wpCollection
406
  for chunk in enumerate(semChunks.objects):
407
- logger.info(f"#### chunk: {chunk}")
408
  ragData = ragData + chunk[1].properties['chunk'] + "\n"
409
  webpage_uuid = chunk[1].properties['references']['webpage']
410
- logger.info(f"webpage_uuid: {webpage_uuid}")
411
  wpFromChunk = wpCollection.query.fetch_object_by_id(webpage_uuid)
412
- logger.info(f"### wpFromChunk title: {wpFromChunk.properties['title']}")
413
  #collection = client.collections.get("Chunks")
414
- logger.info("#### ragData: {ragData}")
415
  logger.info("#### getRagData() exited.")
416
  return ragData
417
-
418
-
 
 
 
419
  def getAllRagData():
420
  logger.info("#### getAllRagData() entered.")
421
 
@@ -423,16 +425,17 @@ try:
423
  response = chunksCollection.query.fetch_objects()
424
 
425
  wstrObjs = str(response.objects)
426
- logger.info(f"### response.objects: {wstrObjs}")
427
  for o in response.objects:
428
  wstr = o.properties
429
- logger.info(f"### o.properties: {wstr}")
 
430
  return wstrObjs
431
 
432
-
433
- # Display UI
 
434
  col1, col2 = st.columns(2)
435
-
436
  with col1:
437
  if "sysTA" not in st.session_state:
438
  st.session_state.sysTA = st.text_area(label="System Prompt",placeholder="You are a helpful AI assistant", help="Instruct the LLM about how to handle the user prompt.")
@@ -465,7 +468,11 @@ try:
465
  st.session_state.rspTA = st.text_area(label="LLM Completion",value=st.session_state.rspTAtext,placeholder="LLM completion.",help="Output area for LLM completion (response).")
466
  else:
467
  st.session_state.rspTA = st.text_area(label="LLM Completion",value=st.session_state.rspTA,placeholder="LLM completion.",help="Output area for LLM completion (response).")
468
-
 
 
 
 
469
  def runLLM(prompt):
470
  logger = st.session_state.logger
471
  logger.info("### runLLM entered.")
@@ -487,25 +494,22 @@ try:
487
  )
488
  result = modelOutput["choices"][0]["message"]["content"]
489
  result = str(modelOutput)
490
- logger.info(f"### llmResult: {result}")
491
  logger.info("### runLLM exited.")
492
  return result
493
 
 
 
 
 
494
  def setPrompt(pprompt,ragFlag):
495
  logger = st.session_state.logger
496
- logger.info(f"\n### setPrompt() entered. ragFlag: {ragFlag}")
497
  if ragFlag:
498
  ragPrompt = getRagData(pprompt)
499
  st.session_state.ragpTA = ragPrompt
500
  userPrompt = pprompt + "\n" + ragPrompt
501
  prompt = userPrompt
502
- #userPrompt = "This prompt is divided into two main sections. " \
503
- # "The first section starts with 'MAINPROMPT:' which is the actual question or instruction of the prompt. " \
504
- # + "The second section of the prompt starts with 'ADDITIONALCONTEXT:'. It contains additional information to evaluate along with " \
505
- # + "information within from the large language model itself. " \
506
- # + "Use it to clarify and supplement the prompt, but otherwise make sure to process the prompt in the standard manner. " \
507
- # + "MAINPROMPT: " + pprompt + " " \
508
- # + "ADDITIONALCONTEXT: " + ragPrompt
509
  userPrompt = "<prompt>" \
510
  + "Answer the following question or carry out the following instruction and also supplement " \
511
  + "the LLM processing of the question or instruction using the retrieved information from the knowledge base. " \
@@ -518,7 +522,6 @@ try:
518
  + "</prompt>"
519
 
520
  else:
521
- #userPrompt = st.session_state.sysTA + " " + pprompt
522
  userPrompt = pprompt
523
 
524
  fullPrompt = [
@@ -528,48 +531,62 @@ try:
528
  "content": userPrompt
529
  }
530
  ]
531
- #fullPrompt = userPrompt
532
- logger.info(f"### userPrompt: {userPrompt}")
533
  logger.info("setPrompt exited.")
534
  return fullPrompt
535
 
536
 
 
 
 
537
  def formatJson(jsonText):
538
  try:
539
- logger.info(f"#### formatJson jsonText: {jsonText}")
540
  if not isinstance(jsonText,str):
541
  jsonText = str(jsonText)
542
  jsonData = json.loads(jsonText)
543
  formattedJson = json.dumps(jsonData, indent=2)
544
  return formattedJson
545
  except json.JSONDecodeError as e:
546
- logger.error(f"Invalid JSON text - {str(e)}")
547
  return jsonText
548
 
 
 
 
 
549
  def on_runLLMButton_Clicked():
550
  logger = st.session_state.logger
551
  logger.info("### on_runLLMButton_Clicked entered.")
552
  st.session_state.sysTAtext = st.session_state.sysTA
553
- logger.info(f"sysTAtext: {st.session_state.sysTAtext}")
554
 
555
  #st.session_state.userpTAtext = st.session_state.userpTA
556
  wrkList = setPrompt(st.session_state.userpTA,st.selectRag)
557
  st.session_state.userpTA = formatJson(wrkList)
558
- logger.info(f"userpTAtext: {st.session_state.userpTA}")
559
 
560
  #st.session_state.rspTAtext = runLLM(st.session_state.userpTAtext)
561
  rsp = runLLM(wrkList)
562
  st.session_state.rspTA = formatJson(rsp)
563
- logger.info(f"rspTAtext: {st.session_state.rspTA}")
564
 
565
  logger.info("### on_runLLMButton_Clicked exited.")
566
 
 
 
 
 
567
  def on_getAllRagDataButton_Clicked():
568
  logger = st.session_state.logger
569
  logger.info("### on_getAllRagButton_Clicked entered.")
570
  st.session_state.ragpTA = formatJson(getAllRagData())
571
  logger.info("### on_getAllRagButton_Clicked exited.")
572
 
 
 
 
 
573
  def on_resetButton_Clicked():
574
  logger = st.session_state.logger
575
  logger.info("### on_Button_Clicked entered.")
@@ -577,14 +594,20 @@ try:
577
  st.session_state.userpTA = ""
578
  st.session_state.ragpTA = ""
579
  st.session_state.rspTA = ""
 
580
 
 
 
 
 
581
  with st.sidebar:
582
  st.selectRag = st.checkbox("Enable RAG",value=False,key="selectRag",help=None,on_change=None,args=None,kwargs=None,disabled=False,label_visibility="visible")
583
  st.runLLMButton = st.button("Run LLM Prompt",key=None,help=None,on_click=on_runLLMButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
584
  st.getAllRagDataButton = st.button("Get All Rag Data",key=None,help=None,on_click=on_getAllRagDataButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
585
  st.resetButton = st.button("Reset",key=None,help=None,on_click=on_resetButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
586
 
587
- logger.info("#### semsearch.py end of code.")
 
588
  except Exception as e:
589
  try:
590
  emsg = str(e)
 
47
  def runStartup():
48
  logger.info("### Running startup.sh")
49
  try:
 
 
 
 
 
50
  subprocess.Popen(["/app/startup.sh"])
51
+ # Wait for text2vec-transformers and Weaviate DB to initialize.
52
  time.sleep(180)
53
  except Exception as e:
54
  emsg = str(e)
55
+ logger.ERROR(f"### subprocess.run EXCEPTION. e: {emsg}")
56
  try:
57
  with open("/app/startup.log", "r") as file:
58
  content = file.read()
 
66
  runStartup()
67
 
68
 
69
+ #########################################
70
+ # Function to load the CSS syling file. #
71
+ #########################################
 
72
  def load_css(file_name):
73
  logger.info("#### load_css entered.")
74
  with open(file_name) as f:
 
78
  load_css(".streamlit/main.css")
79
  st.session_state.load_css = True
80
 
81
+ # Display UI heading.
82
+ st.markdown("<h1 style='text-align: center; color: #666666;'>LLM with RAG Vector Database Proof of Concept</h1>",
83
  unsafe_allow_html=True)
84
+
 
 
85
  pathString = "/app/inputDocs"
86
  chunks = []
87
  webpageDocNames = []
 
90
  webpageTitles = []
91
  webpageChunksDocNames = []
92
 
93
+
94
  ############################################
95
  # Connect to the Weaviate vector database. #
96
  ############################################
 
121
  # chunk it, collect chunks and document names. #
122
  ########################################################
123
  if not client.collections.exists("Documents") or not client.collections.exists("Chunks") :
124
+ logger.info("#### Read and chunk input RAG document files.")
125
  for filename in os.listdir(pathString):
126
  logger.debug(filename)
127
  path = Path(pathString + "/" + filename)
 
154
 
155
  logger.info(f"### filename, title: {filename}, {title}")
156
  logger.info(f"### webpageDocNames: {webpageDocNames}")
157
+ logger.info("#### Read and chunk input RAG document files.")
158
 
159
 
160
  #############################################################
 
162
  # Each chunk schema points to its corresponding document. #
163
  #############################################################
164
  if not client.collections.exists("Documents"):
165
+ logger.info("#### Create documents schema/collection started.")
 
166
  class_obj = {
167
  "class": "Documents",
168
  "description": "For first attempt at loading a Weviate database.",
 
211
  }
212
  wpCollection = client.collections.create_from_dict(class_obj)
213
  st.session_state.wpCollection = wpCollection
214
+ logger.info("#### Create documents schema/collection ended.")
215
  else:
216
  wpCollection = st.session_state.wpCollection
217
  st.session_state.wpCollection = wpCollection
218
 
219
+ # Create chunks in db.
220
  if not client.collections.exists("Chunks"):
221
+ logger.info("#### create document chunks schema/collection started.")
222
  #client.collections.delete("Chunks")
223
  class_obj = {
224
  "class": "Chunks",
 
267
  }
268
  wpChunksCollection = client.collections.create_from_dict(class_obj)
269
  st.session_state.wpChunksCollection = wpChunksCollection
270
+ logger.info("#### create document chunks schedma/collection ended.")
271
  else:
272
  wpChunksCollection = client.collections.get("Chunks")
273
  st.session_state.wpChunksCollection = wpChunksCollection
 
277
  # Create the actual document and chunks objects in the database. #
278
  ##################################################################
279
  if 'dbObjsCreated' not in st.session_state:
280
+ logger.info("#### Create db document and chunk objects started.")
281
  st.session_state.dbObjsCreated = True
282
  for i, className in enumerate(webpageDocNames):
283
+ logger.info("#### Creating document object.")
284
  title = webpageTitles[i]
285
  logger.debug(f"## className, title: {className}, {title}")
286
  # Create Webpage Object
 
293
  "content": page_content
294
  }
295
  )
296
+ logger.info("#### Document object created.")
297
 
298
  logger.info("#### Create chunk db objects.")
299
  st.session_state.wpChunksCollection = wpChunksCollection
 
311
  }
312
  )
313
  logger.info("#### Create chunk db objects created.")
314
+ logger.info("#### Create db document and chunk objects ended.")
315
 
316
 
317
  #######################
 
361
  verbose=False
362
  )
363
  st.session_state.llm = llm
364
+ logger.info("### Initializing LLM completed.")
365
  else:
366
  llm = st.session_state.llm
367
 
368
+
369
+ #####################################################
370
+ # Get RAG data from vector db based on user prompt. #
371
+ #####################################################
372
  def getRagData(promptText):
373
  logger.info("#### getRagData() entered.")
374
  ###############################################################################
375
  # Initial the the sentence transformer and encode the query prompt.
376
+ logger.debug(f"#### Encode text query prompt to create vectors. {promptText}")
377
  model = SentenceTransformer('/app/multi-qa-MiniLM-L6-cos-v1')
378
 
379
  vector = model.encode(promptText)
380
  wrks = str(vector)
381
+ logger.debug(f"### vector: {wrks}")
382
  vectorList = []
383
 
384
+ logger.debug("#### Print vectors.")
385
  for vec in vector:
386
  vectorList.append(vec)
387
  wrks = str(vectorList)
388
+ logger.debug(f"vectorList: {wrks}")
389
 
390
  # Fetch chunks and print chunks.
391
+ logger.debug("#### Retrieve semchunks from db using vectors from prompt.")
392
  wpChunksCollection = st.session_state.wpChunksCollection
393
  semChunks = wpChunksCollection.query.near_vector(
394
  near_vector=vectorList,
 
396
  limit=3
397
  )
398
  wrks = str(semChunks)
399
+ logger.debug(f"### semChunks[0]: {wrks}")
400
 
401
  # Print chunks, corresponding document and document title.
402
  ragData = ""
403
+ logger.debug("#### Print individual retrieved chunks.")
404
  wpCollection = st.session_state.wpCollection
405
  for chunk in enumerate(semChunks.objects):
406
+ logger.debug(f"#### chunk: {chunk}")
407
  ragData = ragData + chunk[1].properties['chunk'] + "\n"
408
  webpage_uuid = chunk[1].properties['references']['webpage']
409
+ logger.debug(f"webpage_uuid: {webpage_uuid}")
410
  wpFromChunk = wpCollection.query.fetch_object_by_id(webpage_uuid)
411
+ logger.debug(f"### wpFromChunk title: {wpFromChunk.properties['title']}")
412
  #collection = client.collections.get("Chunks")
413
+ logger.debug("#### ragData: {ragData}")
414
  logger.info("#### getRagData() exited.")
415
  return ragData
416
+
417
+
418
+ #################################################
419
+ # Retrieve all RAG data for the user to review. #
420
+ #################################################
421
  def getAllRagData():
422
  logger.info("#### getAllRagData() entered.")
423
 
 
425
  response = chunksCollection.query.fetch_objects()
426
 
427
  wstrObjs = str(response.objects)
428
+ logger.debug(f"### response.objects: {wstrObjs}")
429
  for o in response.objects:
430
  wstr = o.properties
431
+ logger.info(f"### o.properties: {wstr}")
432
+ logger.info("#### getAllRagData() exited.")
433
  return wstrObjs
434
 
435
+ ##########################
436
+ # Display UI text areas. #
437
+ ##########################
438
  col1, col2 = st.columns(2)
 
439
  with col1:
440
  if "sysTA" not in st.session_state:
441
  st.session_state.sysTA = st.text_area(label="System Prompt",placeholder="You are a helpful AI assistant", help="Instruct the LLM about how to handle the user prompt.")
 
468
  st.session_state.rspTA = st.text_area(label="LLM Completion",value=st.session_state.rspTAtext,placeholder="LLM completion.",help="Output area for LLM completion (response).")
469
  else:
470
  st.session_state.rspTA = st.text_area(label="LLM Completion",value=st.session_state.rspTA,placeholder="LLM completion.",help="Output area for LLM completion (response).")
471
+
472
+
473
+ ####################################################################
474
+ # Prompt the LLM with the user's input and return the completion. #
475
+ ####################################################################
476
  def runLLM(prompt):
477
  logger = st.session_state.logger
478
  logger.info("### runLLM entered.")
 
494
  )
495
  result = modelOutput["choices"][0]["message"]["content"]
496
  result = str(modelOutput)
497
+ logger.debug(f"### llmResult: {result}")
498
  logger.info("### runLLM exited.")
499
  return result
500
 
501
+
502
+ ##########################################################################
503
+ # Build a llama-2 prompt from the user prompt and RAG input if selected. #
504
+ ##########################################################################
505
  def setPrompt(pprompt,ragFlag):
506
  logger = st.session_state.logger
507
+ logger.info(f"### setPrompt() entered. ragFlag: {ragFlag}")
508
  if ragFlag:
509
  ragPrompt = getRagData(pprompt)
510
  st.session_state.ragpTA = ragPrompt
511
  userPrompt = pprompt + "\n" + ragPrompt
512
  prompt = userPrompt
 
 
 
 
 
 
 
513
  userPrompt = "<prompt>" \
514
  + "Answer the following question or carry out the following instruction and also supplement " \
515
  + "the LLM processing of the question or instruction using the retrieved information from the knowledge base. " \
 
522
  + "</prompt>"
523
 
524
  else:
 
525
  userPrompt = pprompt
526
 
527
  fullPrompt = [
 
531
  "content": userPrompt
532
  }
533
  ]
534
+ logger.debug(f"### userPrompt: {userPrompt}")
 
535
  logger.info("setPrompt exited.")
536
  return fullPrompt
537
 
538
 
539
+ #################################################
540
+ # Format text for easier reading in text areas. #
541
+ #################################################
542
  def formatJson(jsonText):
543
  try:
544
+ logger.info(f"### formatJson jsonText: {jsonText}")
545
  if not isinstance(jsonText,str):
546
  jsonText = str(jsonText)
547
  jsonData = json.loads(jsonText)
548
  formattedJson = json.dumps(jsonData, indent=2)
549
  return formattedJson
550
  except json.JSONDecodeError as e:
551
+ logger.error(f"formatJson: iInvalid JSON text - {str(e)}")
552
  return jsonText
553
 
554
+
555
+ #####################################
556
+ # Run the LLM with the user prompt. #
557
+ #####################################
558
  def on_runLLMButton_Clicked():
559
  logger = st.session_state.logger
560
  logger.info("### on_runLLMButton_Clicked entered.")
561
  st.session_state.sysTAtext = st.session_state.sysTA
562
+ logger.debug(f"sysTAtext: {st.session_state.sysTAtext}")
563
 
564
  #st.session_state.userpTAtext = st.session_state.userpTA
565
  wrkList = setPrompt(st.session_state.userpTA,st.selectRag)
566
  st.session_state.userpTA = formatJson(wrkList)
567
+ logger.debug(f"userpTAtext: {st.session_state.userpTA}")
568
 
569
  #st.session_state.rspTAtext = runLLM(st.session_state.userpTAtext)
570
  rsp = runLLM(wrkList)
571
  st.session_state.rspTA = formatJson(rsp)
572
+ logger.debug(f"rspTAtext: {st.session_state.rspTA}")
573
 
574
  logger.info("### on_runLLMButton_Clicked exited.")
575
 
576
+
577
+ #########################################
578
+ # Get all the RAG data for user review. #
579
+ #########################################
580
  def on_getAllRagDataButton_Clicked():
581
  logger = st.session_state.logger
582
  logger.info("### on_getAllRagButton_Clicked entered.")
583
  st.session_state.ragpTA = formatJson(getAllRagData())
584
  logger.info("### on_getAllRagButton_Clicked exited.")
585
 
586
+
587
+ #######################################
588
+ # Reset all the input, output fields. #
589
+ #######################################
590
  def on_resetButton_Clicked():
591
  logger = st.session_state.logger
592
  logger.info("### on_Button_Clicked entered.")
 
594
  st.session_state.userpTA = ""
595
  st.session_state.ragpTA = ""
596
  st.session_state.rspTA = ""
597
+ logger.info("### on_Button_Clicked exited.")
598
 
599
+ ###########################################
600
+ # Display the sidebar with a checkbox and #
601
+ # text areas. #
602
+ ###########################################
603
  with st.sidebar:
604
  st.selectRag = st.checkbox("Enable RAG",value=False,key="selectRag",help=None,on_change=None,args=None,kwargs=None,disabled=False,label_visibility="visible")
605
  st.runLLMButton = st.button("Run LLM Prompt",key=None,help=None,on_click=on_runLLMButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
606
  st.getAllRagDataButton = st.button("Get All Rag Data",key=None,help=None,on_click=on_getAllRagDataButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
607
  st.resetButton = st.button("Reset",key=None,help=None,on_click=on_resetButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
608
 
609
+ logger.info("#### semsearch.py: streamlit pass through code..")
610
+
611
  except Exception as e:
612
  try:
613
  emsg = str(e)
startup.sh CHANGED
@@ -1,64 +1,32 @@
1
  #! /bin/bash
2
 
 
 
 
 
 
3
  exec &> /app/startup.log
4
 
5
  echo "#### startup.sh entered."
6
 
7
- #if [ -z "$1" ]; then
8
- # echo "#### Re-executing startup.sh asynchonously."
9
- # /app/startup.sh "no re-execute" &
10
- # exit 0
11
- #fi
12
 
13
- #echo "### find streamlit "; find / | grep -i streamlit
14
- echo "### pwd "; pwd
15
- #echo "### df -h"; df -h
16
- echo "### ls -al /app"; ls -al /app
17
- #echo "### ls -al /app/.streamlit/main.css"; ls -al /app/.streamlit/main.css
18
- #echo "### ls -l /app/weaviate"; ls -l /app/weaviate
19
- #echo "### ls -l /app/text2vec-transformers"; ls -l /app/text2vec-transformers
20
- #echo "### ls -l /data"; ls -l /data
21
-
22
- #mkdir -p /data/var/lib/weaviate
23
- #chmod -R 777 /data/var/lib/weaviate
24
- #echo "### ls -al /data/var/lib/weaviate"; ls -al /data/var/lib/weaviate
25
-
26
- # For huggingface space.
27
  echo "### Before mkdir -p ~/data/var/lib/weaviate"
28
  weaviateDir=~/data/var/lib/weaviate
29
  mkdir -p $weaviateDir
30
  chmod -R 777 $weaviateDir
31
 
32
- #ls -al ~/data/var/lib/weaviate
33
- #echo "### ls -al ~"; ls -al ~
34
- #sudo ln -s ~/data/var/lib/weaviate /data/var/lib/weaviate
35
- #else
36
- # echo "### /data/var/lib/weaviate already exists."
37
- #fi
38
 
39
- ################################################
40
  # Start tex2vec-transformers
41
  echo "#### Before /app/text2vec-transformers"
42
  cd /app/text2vec-transformers
43
  /app/text2vec-transformers/bin/uvicorn app:app --host 0.0.0.0 --port 8081 --log-level warning --timeout-keep-alive 1440 & #2>& 1 | tee /data/var/lib/weaviate/t2v.log &
44
  cd /app
45
 
46
- #sleep 5
47
- #echo "\n######## curl t2 "
48
- #for (( ; ; )) do curl localhost:8081/vectors -H 'Content-Type: application/json' -d '{"text": "foo bar"}'; sleep 61; done &
49
-
50
 
51
- ###############################################
52
  # Start the weaviate vector database server.
53
  echo "#### Before /app/weaviate"
54
 
55
- #echo "### pwd"; pwd
56
- #echo "### ls -al ~"; ls -al ~
57
-
58
- #echo "### ls -l /var/lib/weaviate"; ls -l /var/lib/weaviate
59
- #echo "### ls -l /data"; ls -l /data
60
- #echo "### ls -l /data/var/lib/weaviate"; ls -l /data/var/lib/weaviate
61
-
62
  export AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true \
63
  PERSISTENCE_DATA_PATH=$weaviateDir \
64
  DEFAULT_VECTORIZER_MODULE=text2vec-transformers \
@@ -66,22 +34,8 @@ export AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true \
66
  TRANSFORMERS_INFERENCE_API=http://127.0.0.1:8081 \
67
  LOG_LEVEL=warning \
68
  MODULES_CLIENT_TIMEOUT=600s
69
- #env
70
  /app/weaviate/weaviate --host 127.0.0.1 --port 8080 --scheme http --write-timeout 600s & #2>& 1 | tee /data/var/lib/weaviate/ws.log &
71
 
72
- echo "### ps -ef"; ps -ef
73
-
74
- #echo "#### Before sleep."
75
- #sleep 120
76
- #echo "#### startup.sh exiting."
77
-
78
- #echo "#### Before /app/semsearch.py"
79
- #python /app/semsearch.py & #2>& 1 | tee /data/var/lib/weaviate/ss.log &
80
- #streamlit run /app/semsearch.py &
81
-
82
- # Display timestamps.
83
- #for (( ; ; )) do date; sleep 60; done &
84
-
85
  echo "### Before wait."
86
  wait
87
 
 
1
  #! /bin/bash
2
 
3
+ #####################################
4
+ # Start text2vec-transformers and #
5
+ # Weaviate DB to run asynchronously #
6
+ # and wait. #
7
+ #####################################
8
  exec &> /app/startup.log
9
 
10
  echo "#### startup.sh entered."
11
 
 
 
 
 
 
12
 
13
+ # Make sure Weaviate DB directory exists.
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  echo "### Before mkdir -p ~/data/var/lib/weaviate"
15
  weaviateDir=~/data/var/lib/weaviate
16
  mkdir -p $weaviateDir
17
  chmod -R 777 $weaviateDir
18
 
 
 
 
 
 
 
19
 
 
20
  # Start tex2vec-transformers
21
  echo "#### Before /app/text2vec-transformers"
22
  cd /app/text2vec-transformers
23
  /app/text2vec-transformers/bin/uvicorn app:app --host 0.0.0.0 --port 8081 --log-level warning --timeout-keep-alive 1440 & #2>& 1 | tee /data/var/lib/weaviate/t2v.log &
24
  cd /app
25
 
 
 
 
 
26
 
 
27
  # Start the weaviate vector database server.
28
  echo "#### Before /app/weaviate"
29
 
 
 
 
 
 
 
 
30
  export AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true \
31
  PERSISTENCE_DATA_PATH=$weaviateDir \
32
  DEFAULT_VECTORIZER_MODULE=text2vec-transformers \
 
34
  TRANSFORMERS_INFERENCE_API=http://127.0.0.1:8081 \
35
  LOG_LEVEL=warning \
36
  MODULES_CLIENT_TIMEOUT=600s
 
37
  /app/weaviate/weaviate --host 127.0.0.1 --port 8080 --scheme http --write-timeout 600s & #2>& 1 | tee /data/var/lib/weaviate/ws.log &
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  echo "### Before wait."
40
  wait
41