Spaces:

MVPilgrim
/

SemanticSearchPOC

Running

App Files Files Community

MVPilgrim commited on Jun 14, 2024

Commit

2430b06

1 Parent(s): 00cc465

debug

Browse files

Files changed (2) hide show

app.py +86 -63
startup.sh +6 -52

app.py CHANGED Viewed

@@ -47,16 +47,12 @@ try:
     def runStartup():
         logger.info("### Running startup.sh")
         try:
-            #result = subprocess.run("/app/startup.sh",shell=False,capture_output=None,
-            # text=None,timeout=300)
-            #logger.info(f"startup.sh stdout:  {result.stdout}")
-            #logger.info(f"startup.sh stderr:  {result.stderr}")
-            #logger.info(f"Return code: {result.returncode}")
             subprocess.Popen(["/app/startup.sh"])
             time.sleep(180)
         except Exception as e:
             emsg = str(e)
-            logger.ERROR(f"subprocess.run  EXCEPTION. e: {emsg}")
             try:
                 with open("/app/startup.log", "r") as file:
                     content = file.read()
@@ -70,10 +66,9 @@ try:
         runStartup()
-    # Function to load the CSS file
     def load_css(file_name):
         logger.info("#### load_css entered.")
         with open(file_name) as f:
@@ -83,11 +78,10 @@ try:
         load_css(".streamlit/main.css")
         st.session_state.load_css = True
-    st.markdown("<h1 style='text-align: center; color: #666666;'>RAG Vector Database</h1>",
                 unsafe_allow_html=True)
-    st.markdown("<h1 style='text-align: center; color: #666666;'>RAG Vector Database</h1>",
-                unsafe_allow_html=True)
     pathString = "/app/inputDocs"
     chunks = []
     webpageDocNames = []
@@ -96,6 +90,7 @@ try:
     webpageTitles = []
     webpageChunksDocNames = []
     ############################################
     # Connect to the Weaviate vector database. #
     ############################################
@@ -126,7 +121,7 @@ try:
     # chunk it, collect chunks and document names.         #
     ########################################################
     if not client.collections.exists("Documents") or not client.collections.exists("Chunks") :
-        logger.info("#### Read and chunk input text files.")
         for filename in os.listdir(pathString):
             logger.debug(filename)
             path = Path(pathString + "/" + filename)
@@ -159,7 +154,7 @@ try:
             logger.info(f"### filename, title: {filename}, {title}")
             logger.info(f"### webpageDocNames: {webpageDocNames}")
-        logger.info("#### Read and chunk input text files exited.")
     #############################################################
@@ -167,8 +162,7 @@ try:
     # Each chunk schema points to its corresponding document.   #
     #############################################################
     if not client.collections.exists("Documents"):
-        logger.info("#### createWebpageCollection() entered.")
-        #client.collections.delete("Documents")
         class_obj = {
           "class": "Documents",
           "description": "For first attempt at loading a Weviate database.",
@@ -217,14 +211,14 @@ try:
         }
         wpCollection = client.collections.create_from_dict(class_obj)
         st.session_state.wpCollection = wpCollection
-        logger.info("#### createWebpageCollection() exited.")
     else:
         wpCollection = st.session_state.wpCollection
         st.session_state.wpCollection = wpCollection
     if not client.collections.exists("Chunks"):
-        logger.info("#### createChunksCollection() entered.")
         #client.collections.delete("Chunks")
         class_obj = {
             "class": "Chunks",
@@ -273,7 +267,7 @@ try:
         }
         wpChunksCollection = client.collections.create_from_dict(class_obj)
         st.session_state.wpChunksCollection = wpChunksCollection
-        logger.info("#### createChunksCollection() exited.")
     else:
         wpChunksCollection = client.collections.get("Chunks")
         st.session_state.wpChunksCollection = wpChunksCollection
@@ -283,9 +277,10 @@ try:
     # Create the actual document and chunks objects in the database. #
     ##################################################################
     if 'dbObjsCreated' not in st.session_state:
-        logger.info("#### Create db objects.")
         st.session_state.dbObjsCreated = True
         for i, className in enumerate(webpageDocNames):
             title = webpageTitles[i]
             logger.debug(f"## className, title: {className}, {title}")
             # Create Webpage Object
@@ -298,7 +293,7 @@ try:
                 "content": page_content
               }
             )
-            logger.info("#### page/doc/db/objects created.")
             logger.info("#### Create chunk db objects.")
             st.session_state.wpChunksCollection = wpChunksCollection
@@ -316,7 +311,7 @@ try:
                   }
                 )
             logger.info("#### Create chunk db objects created.")
-        logger.info("#### db objects created.")
     #######################
@@ -366,30 +361,34 @@ try:
                     verbose=False
                    )
         st.session_state.llm = llm
-        logger.info("### Initializing LLM exited.")
     else:
         llm = st.session_state.llm
     def getRagData(promptText):
         logger.info("#### getRagData() entered.")
         ###############################################################################
         # Initial the the sentence transformer and encode the query prompt.
-        logger.info(f"#### Encode text query prompt to create vectors. {promptText}")
         model = SentenceTransformer('/app/multi-qa-MiniLM-L6-cos-v1')
         vector = model.encode(promptText)
         wrks = str(vector)
-        logger.info(f"### vector: {wrks}")
         vectorList = []
-        logger.info("#### Print vectors.")
         for vec in vector:
             vectorList.append(vec)
         wrks = str(vectorList)
-        logger.info(f"vectorList: {wrks}")
         # Fetch chunks and print chunks.
-        logger.info("#### Retrieve semchunks from db using vectors from prompt.")
         wpChunksCollection = st.session_state.wpChunksCollection
         semChunks = wpChunksCollection.query.near_vector(
             near_vector=vectorList,
@@ -397,25 +396,28 @@ try:
             limit=3
         )
         wrks = str(semChunks)
-        logger.info(f"### semChunks[0]: {wrks}")
         # Print chunks, corresponding document and document title.
         ragData = ""
-        logger.info("#### Print individual retrieved chunks.")
         wpCollection = st.session_state.wpCollection
         for chunk in enumerate(semChunks.objects):
-            logger.info(f"#### chunk: {chunk}")
             ragData = ragData + chunk[1].properties['chunk'] + "\n"
             webpage_uuid = chunk[1].properties['references']['webpage']
-            logger.info(f"webpage_uuid: {webpage_uuid}")
             wpFromChunk = wpCollection.query.fetch_object_by_id(webpage_uuid)
-            logger.info(f"### wpFromChunk title: {wpFromChunk.properties['title']}")
         #collection = client.collections.get("Chunks")
-        logger.info("#### ragData: {ragData}")
         logger.info("#### getRagData() exited.")
         return  ragData
     def getAllRagData():
         logger.info("#### getAllRagData() entered.")
@@ -423,16 +425,17 @@ try:
         response = chunksCollection.query.fetch_objects()
         wstrObjs = str(response.objects)
-        logger.info(f"### response.objects: {wstrObjs}")
         for o in response.objects:
             wstr = o.properties
-            logger.info(f"### o.properties: {wstr}")
         return  wstrObjs
-    # Display UI
     col1, col2 = st.columns(2)
     with col1:
         if "sysTA" not in st.session_state:
             st.session_state.sysTA = st.text_area(label="System Prompt",placeholder="You are a helpful AI assistant", help="Instruct the LLM about how to handle the user prompt.")
@@ -465,7 +468,11 @@ try:
             st.session_state.rspTA = st.text_area(label="LLM Completion",value=st.session_state.rspTAtext,placeholder="LLM completion.",help="Output area for LLM completion (response).")
         else:
             st.session_state.rspTA = st.text_area(label="LLM Completion",value=st.session_state.rspTA,placeholder="LLM completion.",help="Output area for LLM completion (response).")
     def runLLM(prompt):
         logger = st.session_state.logger
         logger.info("### runLLM entered.")
@@ -487,25 +494,22 @@ try:
         )
         result = modelOutput["choices"][0]["message"]["content"]
         result = str(modelOutput)
-        logger.info(f"### llmResult: {result}")
         logger.info("### runLLM exited.")
         return result
     def setPrompt(pprompt,ragFlag):
         logger = st.session_state.logger
-        logger.info(f"\n### setPrompt() entered. ragFlag: {ragFlag}")
         if ragFlag:
             ragPrompt = getRagData(pprompt)
             st.session_state.ragpTA = ragPrompt
             userPrompt = pprompt + "\n" + ragPrompt
             prompt = userPrompt
-            #userPrompt = "This prompt is divided into two main sections. " \
-            #             "The first section starts with 'MAINPROMPT:' which is the actual question or instruction of the prompt. " \
-            #             + "The second section of the prompt starts with 'ADDITIONALCONTEXT:'. It contains additional information to evaluate along with " \
-            #             + "information within from the large language model itself. " \
-            #             + "Use it to clarify and supplement the prompt, but otherwise make sure to process the prompt in the standard manner. " \
-            #             + "MAINPROMPT: " + pprompt + " " \
-            #             + "ADDITIONALCONTEXT: " + ragPrompt
             userPrompt = "<prompt>" \
                            + "Answer the following question or carry out the following instruction and also supplement " \
                            + "the LLM processing of the question or instruction using the retrieved information from the knowledge base. " \
@@ -518,7 +522,6 @@ try:
                            + "</prompt>"
         else:
-            #userPrompt = st.session_state.sysTA + " " + pprompt
             userPrompt = pprompt
         fullPrompt = [
@@ -528,48 +531,62 @@ try:
               "content": userPrompt
           }
         ]
-        #fullPrompt = userPrompt
-        logger.info(f"### userPrompt: {userPrompt}")
         logger.info("setPrompt exited.")
         return fullPrompt
     def formatJson(jsonText):
         try:
-            logger.info(f"#### formatJson jsonText: {jsonText}")
             if not isinstance(jsonText,str):
                 jsonText = str(jsonText)
             jsonData = json.loads(jsonText)
             formattedJson = json.dumps(jsonData, indent=2)
             return formattedJson
         except json.JSONDecodeError as e:
-            logger.error(f"Invalid JSON text - {str(e)}")
             return jsonText
     def on_runLLMButton_Clicked():
         logger = st.session_state.logger
         logger.info("### on_runLLMButton_Clicked entered.")
         st.session_state.sysTAtext = st.session_state.sysTA
-        logger.info(f"sysTAtext: {st.session_state.sysTAtext}")
         #st.session_state.userpTAtext = st.session_state.userpTA
         wrkList = setPrompt(st.session_state.userpTA,st.selectRag)
         st.session_state.userpTA = formatJson(wrkList)
-        logger.info(f"userpTAtext: {st.session_state.userpTA}")
         #st.session_state.rspTAtext = runLLM(st.session_state.userpTAtext)
         rsp = runLLM(wrkList)
         st.session_state.rspTA = formatJson(rsp)
-        logger.info(f"rspTAtext: {st.session_state.rspTA}")
         logger.info("### on_runLLMButton_Clicked exited.")
     def on_getAllRagDataButton_Clicked():
         logger = st.session_state.logger
         logger.info("### on_getAllRagButton_Clicked entered.")
         st.session_state.ragpTA = formatJson(getAllRagData())
         logger.info("### on_getAllRagButton_Clicked exited.")
     def on_resetButton_Clicked():
         logger = st.session_state.logger
         logger.info("### on_Button_Clicked entered.")
@@ -577,14 +594,20 @@ try:
         st.session_state.userpTA = ""
         st.session_state.ragpTA  = ""
         st.session_state.rspTA   = ""
     with st.sidebar:
         st.selectRag           = st.checkbox("Enable RAG",value=False,key="selectRag",help=None,on_change=None,args=None,kwargs=None,disabled=False,label_visibility="visible")
         st.runLLMButton        = st.button("Run LLM Prompt",key=None,help=None,on_click=on_runLLMButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
         st.getAllRagDataButton = st.button("Get All Rag Data",key=None,help=None,on_click=on_getAllRagDataButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
         st.resetButton         = st.button("Reset",key=None,help=None,on_click=on_resetButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
-    logger.info("#### semsearch.py end of code.")
 except Exception as e:
     try:
         emsg = str(e)

     def runStartup():
         logger.info("### Running startup.sh")
         try:
             subprocess.Popen(["/app/startup.sh"])
+            # Wait for text2vec-transformers and Weaviate DB to initialize.
             time.sleep(180)
         except Exception as e:
             emsg = str(e)
+            logger.ERROR(f"### subprocess.run  EXCEPTION. e: {emsg}")
             try:
                 with open("/app/startup.log", "r") as file:
                     content = file.read()
         runStartup()
+    #########################################
+    # Function to load the CSS syling file. #
+    #########################################
     def load_css(file_name):
         logger.info("#### load_css entered.")
         with open(file_name) as f:
         load_css(".streamlit/main.css")
         st.session_state.load_css = True
+    # Display UI heading.
+    st.markdown("<h1 style='text-align: center; color: #666666;'>LLM with RAG Vector Database Proof of Concept</h1>",
                 unsafe_allow_html=True)
     pathString = "/app/inputDocs"
     chunks = []
     webpageDocNames = []
     webpageTitles = []
     webpageChunksDocNames = []
     ############################################
     # Connect to the Weaviate vector database. #
     ############################################
     # chunk it, collect chunks and document names.         #
     ########################################################
     if not client.collections.exists("Documents") or not client.collections.exists("Chunks") :
+        logger.info("#### Read and chunk input RAG document files.")
         for filename in os.listdir(pathString):
             logger.debug(filename)
             path = Path(pathString + "/" + filename)
             logger.info(f"### filename, title: {filename}, {title}")
             logger.info(f"### webpageDocNames: {webpageDocNames}")
+        logger.info("#### Read and chunk input RAG document files.")
     #############################################################
     # Each chunk schema points to its corresponding document.   #
     #############################################################
     if not client.collections.exists("Documents"):
+        logger.info("#### Create documents schema/collection started.")
         class_obj = {
           "class": "Documents",
           "description": "For first attempt at loading a Weviate database.",
         }
         wpCollection = client.collections.create_from_dict(class_obj)
         st.session_state.wpCollection = wpCollection
+        logger.info("#### Create documents schema/collection ended.")
     else:
         wpCollection = st.session_state.wpCollection
         st.session_state.wpCollection = wpCollection
+    # Create chunks in db.
     if not client.collections.exists("Chunks"):
+        logger.info("#### create document chunks schema/collection started.")
         #client.collections.delete("Chunks")
         class_obj = {
             "class": "Chunks",
         }
         wpChunksCollection = client.collections.create_from_dict(class_obj)
         st.session_state.wpChunksCollection = wpChunksCollection
+        logger.info("#### create document chunks schedma/collection ended.")
     else:
         wpChunksCollection = client.collections.get("Chunks")
         st.session_state.wpChunksCollection = wpChunksCollection
     # Create the actual document and chunks objects in the database. #
     ##################################################################
     if 'dbObjsCreated' not in st.session_state:
+        logger.info("#### Create db document and chunk objects started.")
         st.session_state.dbObjsCreated = True
         for i, className in enumerate(webpageDocNames):
+            logger.info("#### Creating document object.")
             title = webpageTitles[i]
             logger.debug(f"## className, title: {className}, {title}")
             # Create Webpage Object
                 "content": page_content
               }
             )
+            logger.info("#### Document object created.")
             logger.info("#### Create chunk db objects.")
             st.session_state.wpChunksCollection = wpChunksCollection
                   }
                 )
             logger.info("#### Create chunk db objects created.")
+        logger.info("#### Create db document and chunk objects ended.")
     #######################
                     verbose=False
                    )
         st.session_state.llm = llm
+        logger.info("### Initializing LLM completed.")
     else:
         llm = st.session_state.llm
+    #####################################################
+    # Get RAG data from vector db based on user prompt. #
+    #####################################################
     def getRagData(promptText):
         logger.info("#### getRagData() entered.")
         ###############################################################################
         # Initial the the sentence transformer and encode the query prompt.
+        logger.debug(f"#### Encode text query prompt to create vectors. {promptText}")
         model = SentenceTransformer('/app/multi-qa-MiniLM-L6-cos-v1')
         vector = model.encode(promptText)
         wrks = str(vector)
+        logger.debug(f"### vector: {wrks}")
         vectorList = []
+        logger.debug("#### Print vectors.")
         for vec in vector:
             vectorList.append(vec)
         wrks = str(vectorList)
+        logger.debug(f"vectorList: {wrks}")
         # Fetch chunks and print chunks.
+        logger.debug("#### Retrieve semchunks from db using vectors from prompt.")
         wpChunksCollection = st.session_state.wpChunksCollection
         semChunks = wpChunksCollection.query.near_vector(
             near_vector=vectorList,
             limit=3
         )
         wrks = str(semChunks)
+        logger.debug(f"### semChunks[0]: {wrks}")
         # Print chunks, corresponding document and document title.
         ragData = ""
+        logger.debug("#### Print individual retrieved chunks.")
         wpCollection = st.session_state.wpCollection
         for chunk in enumerate(semChunks.objects):
+            logger.debug(f"#### chunk: {chunk}")
             ragData = ragData + chunk[1].properties['chunk'] + "\n"
             webpage_uuid = chunk[1].properties['references']['webpage']
+            logger.debug(f"webpage_uuid: {webpage_uuid}")
             wpFromChunk = wpCollection.query.fetch_object_by_id(webpage_uuid)
+            logger.debug(f"### wpFromChunk title: {wpFromChunk.properties['title']}")
         #collection = client.collections.get("Chunks")
+        logger.debug("#### ragData: {ragData}")
         logger.info("#### getRagData() exited.")
         return  ragData
+    #################################################
+    # Retrieve all RAG data for the user to review. #
+    #################################################
     def getAllRagData():
         logger.info("#### getAllRagData() entered.")
         response = chunksCollection.query.fetch_objects()
         wstrObjs = str(response.objects)
+        logger.debug(f"### response.objects: {wstrObjs}")
         for o in response.objects:
             wstr = o.properties
+            logger.info(f"### o.properties: {wstr}")
+        logger.info("#### getAllRagData() exited.")
         return  wstrObjs
+    ##########################
+    # Display UI text areas. #
+    ##########################
     col1, col2 = st.columns(2)
     with col1:
         if "sysTA" not in st.session_state:
             st.session_state.sysTA = st.text_area(label="System Prompt",placeholder="You are a helpful AI assistant", help="Instruct the LLM about how to handle the user prompt.")
             st.session_state.rspTA = st.text_area(label="LLM Completion",value=st.session_state.rspTAtext,placeholder="LLM completion.",help="Output area for LLM completion (response).")
         else:
             st.session_state.rspTA = st.text_area(label="LLM Completion",value=st.session_state.rspTA,placeholder="LLM completion.",help="Output area for LLM completion (response).")
+    ####################################################################
+    # Prompt the LLM with the user's input and return the completion.  #
+    ####################################################################
     def runLLM(prompt):
         logger = st.session_state.logger
         logger.info("### runLLM entered.")
         )
         result = modelOutput["choices"][0]["message"]["content"]
         result = str(modelOutput)
+        logger.debug(f"### llmResult: {result}")
         logger.info("### runLLM exited.")
         return result
+    ##########################################################################
+    # Build a llama-2 prompt from the user prompt and RAG input if selected. #
+    ##########################################################################
     def setPrompt(pprompt,ragFlag):
         logger = st.session_state.logger
+        logger.info(f"### setPrompt() entered. ragFlag: {ragFlag}")
         if ragFlag:
             ragPrompt = getRagData(pprompt)
             st.session_state.ragpTA = ragPrompt
             userPrompt = pprompt + "\n" + ragPrompt
             prompt = userPrompt
             userPrompt = "<prompt>" \
                            + "Answer the following question or carry out the following instruction and also supplement " \
                            + "the LLM processing of the question or instruction using the retrieved information from the knowledge base. " \
                            + "</prompt>"
         else:
             userPrompt = pprompt
         fullPrompt = [
               "content": userPrompt
           }
         ]
+        logger.debug(f"### userPrompt: {userPrompt}")
         logger.info("setPrompt exited.")
         return fullPrompt
+    #################################################
+    # Format text for easier reading in text areas. #
+    #################################################
     def formatJson(jsonText):
         try:
+            logger.info(f"### formatJson jsonText: {jsonText}")
             if not isinstance(jsonText,str):
                 jsonText = str(jsonText)
             jsonData = json.loads(jsonText)
             formattedJson = json.dumps(jsonData, indent=2)
             return formattedJson
         except json.JSONDecodeError as e:
+            logger.error(f"formatJson: iInvalid JSON text - {str(e)}")
             return jsonText
+    #####################################
+    # Run the LLM with the user prompt. #
+    #####################################
     def on_runLLMButton_Clicked():
         logger = st.session_state.logger
         logger.info("### on_runLLMButton_Clicked entered.")
         st.session_state.sysTAtext = st.session_state.sysTA
+        logger.debug(f"sysTAtext: {st.session_state.sysTAtext}")
         #st.session_state.userpTAtext = st.session_state.userpTA
         wrkList = setPrompt(st.session_state.userpTA,st.selectRag)
         st.session_state.userpTA = formatJson(wrkList)
+        logger.debug(f"userpTAtext: {st.session_state.userpTA}")
         #st.session_state.rspTAtext = runLLM(st.session_state.userpTAtext)
         rsp = runLLM(wrkList)
         st.session_state.rspTA = formatJson(rsp)
+        logger.debug(f"rspTAtext: {st.session_state.rspTA}")
         logger.info("### on_runLLMButton_Clicked exited.")
+    #########################################
+    # Get all the RAG data for user review. #
+    #########################################
     def on_getAllRagDataButton_Clicked():
         logger = st.session_state.logger
         logger.info("### on_getAllRagButton_Clicked entered.")
         st.session_state.ragpTA = formatJson(getAllRagData())
         logger.info("### on_getAllRagButton_Clicked exited.")
+    #######################################
+    # Reset all the input, output fields. #
+    #######################################
     def on_resetButton_Clicked():
         logger = st.session_state.logger
         logger.info("### on_Button_Clicked entered.")
         st.session_state.userpTA = ""
         st.session_state.ragpTA  = ""
         st.session_state.rspTA   = ""
+        logger.info("### on_Button_Clicked exited.")
+    ###########################################
+    # Display the sidebar with a checkbox and #
+    # text areas.                             #
+    ###########################################
     with st.sidebar:
         st.selectRag           = st.checkbox("Enable RAG",value=False,key="selectRag",help=None,on_change=None,args=None,kwargs=None,disabled=False,label_visibility="visible")
         st.runLLMButton        = st.button("Run LLM Prompt",key=None,help=None,on_click=on_runLLMButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
         st.getAllRagDataButton = st.button("Get All Rag Data",key=None,help=None,on_click=on_getAllRagDataButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
         st.resetButton         = st.button("Reset",key=None,help=None,on_click=on_resetButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
+    logger.info("#### semsearch.py: streamlit pass through code..")
 except Exception as e:
     try:
         emsg = str(e)

startup.sh CHANGED Viewed

@@ -1,64 +1,32 @@
 #! /bin/bash
 exec &> /app/startup.log
 echo "#### startup.sh entered."
-#if [ -z "$1" ]; then
-#  echo "#### Re-executing startup.sh asynchonously."
-#  /app/startup.sh "no re-execute" &
-#  exit 0
-#fi
-#echo "### find streamlit "; find / | grep -i streamlit
-echo "### pwd "; pwd
-#echo "### df -h"; df -h
-echo "### ls -al /app"; ls -al /app
-#echo "### ls -al /app/.streamlit/main.css"; ls -al /app/.streamlit/main.css
-#echo "### ls -l /app/weaviate"; ls -l /app/weaviate
-#echo "### ls -l /app/text2vec-transformers"; ls -l /app/text2vec-transformers
-#echo "### ls -l /data"; ls -l /data
-#mkdir -p /data/var/lib/weaviate
-#chmod -R 777 /data/var/lib/weaviate
-#echo "### ls -al /data/var/lib/weaviate"; ls -al /data/var/lib/weaviate
-# For huggingface space.
 echo "### Before mkdir -p ~/data/var/lib/weaviate"
 weaviateDir=~/data/var/lib/weaviate
 mkdir -p $weaviateDir
 chmod -R 777 $weaviateDir
-#ls -al ~/data/var/lib/weaviate
-#echo "### ls -al ~"; ls -al ~
-#sudo ln -s ~/data/var/lib/weaviate /data/var/lib/weaviate
-#else
-#  echo "### /data/var/lib/weaviate already exists."
-#fi
-################################################
 # Start tex2vec-transformers
 echo "#### Before /app/text2vec-transformers"
 cd /app/text2vec-transformers
 /app/text2vec-transformers/bin/uvicorn app:app --host 0.0.0.0 --port 8081 --log-level warning --timeout-keep-alive 1440 &  #2>& 1 | tee /data/var/lib/weaviate/t2v.log &
 cd /app
-#sleep 5
-#echo "\n######## curl t2 "
-#for (( ; ; )) do curl localhost:8081/vectors -H 'Content-Type: application/json' -d '{"text": "foo bar"}'; sleep 61; done &
-###############################################
 # Start the weaviate vector database server.
 echo "#### Before /app/weaviate"
-#echo "### pwd"; pwd
-#echo "### ls -al ~"; ls -al ~
-#echo "### ls -l /var/lib/weaviate"; ls -l /var/lib/weaviate
-#echo "### ls -l /data"; ls -l /data
-#echo "### ls -l /data/var/lib/weaviate"; ls -l /data/var/lib/weaviate
 export AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true \
        PERSISTENCE_DATA_PATH=$weaviateDir \
        DEFAULT_VECTORIZER_MODULE=text2vec-transformers \
@@ -66,22 +34,8 @@ export AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true \
 	   TRANSFORMERS_INFERENCE_API=http://127.0.0.1:8081 \
 	   LOG_LEVEL=warning \
        MODULES_CLIENT_TIMEOUT=600s
-#env
 /app/weaviate/weaviate --host 127.0.0.1 --port 8080 --scheme http --write-timeout 600s &  #2>& 1 | tee /data/var/lib/weaviate/ws.log &
-echo "### ps -ef"; ps -ef
-#echo "#### Before sleep."
-#sleep 120
-#echo "#### startup.sh exiting."
-#echo "#### Before /app/semsearch.py"
-#python /app/semsearch.py &  #2>& 1 | tee /data/var/lib/weaviate/ss.log &
-#streamlit run /app/semsearch.py &
-# Display timestamps.
-#for (( ; ; )) do date; sleep 60; done &
 echo "### Before wait."
 wait

 #! /bin/bash
+#####################################
+# Start text2vec-transformers and   #
+# Weaviate DB to run asynchronously #
+# and wait.                         #
+#####################################
 exec &> /app/startup.log
 echo "#### startup.sh entered."
+# Make sure Weaviate DB directory exists.
 echo "### Before mkdir -p ~/data/var/lib/weaviate"
 weaviateDir=~/data/var/lib/weaviate
 mkdir -p $weaviateDir
 chmod -R 777 $weaviateDir
 # Start tex2vec-transformers
 echo "#### Before /app/text2vec-transformers"
 cd /app/text2vec-transformers
 /app/text2vec-transformers/bin/uvicorn app:app --host 0.0.0.0 --port 8081 --log-level warning --timeout-keep-alive 1440 &  #2>& 1 | tee /data/var/lib/weaviate/t2v.log &
 cd /app
 # Start the weaviate vector database server.
 echo "#### Before /app/weaviate"
 export AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true \
        PERSISTENCE_DATA_PATH=$weaviateDir \
        DEFAULT_VECTORIZER_MODULE=text2vec-transformers \
 	   TRANSFORMERS_INFERENCE_API=http://127.0.0.1:8081 \
 	   LOG_LEVEL=warning \
        MODULES_CLIENT_TIMEOUT=600s
 /app/weaviate/weaviate --host 127.0.0.1 --port 8080 --scheme http --write-timeout 600s &  #2>& 1 | tee /data/var/lib/weaviate/ws.log &
 echo "### Before wait."
 wait