Spaces:

MVPilgrim
/

SemanticSearchPOC

Sleeping

App Files Files Community

MVPilgrim commited on Jun 1, 2024

Commit

f38ba8b

1 Parent(s): d1418cc

debug

Browse files

Files changed (2) hide show

app.py +78 -64
startup.sh +4 -4

app.py CHANGED Viewed

@@ -24,12 +24,16 @@ import subprocess
 st.markdown("<h1 style='text-align: center; color: #666666;'>Vector Database RAG Proof of Concept</h1>", unsafe_allow_html=True)
 st.markdown("<h6 style='text-align: center; color: #666666;'>V1</h6>", unsafe_allow_html=True)
-weaviate_logger = logging.getLogger("httpx")
-weaviate_logger.setLevel(logging.WARNING)
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=logging.INFO)
 def runStartup():
@@ -43,7 +47,9 @@ def runStartup():
     except:
         logger.error(f"subprocess.run() encountered error.")
     logger.info("### startup.sh Exited")
-runStartup()
 # Function to load the CSS file
 def load_css(file_name):
@@ -51,7 +57,9 @@ def load_css(file_name):
         st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
 # Load the custom CSS
-load_css(".streamlit/main.css")
 st.markdown("<h1 style='text-align: center; color: #666666;'>Vector Database RAG Proof of Concept</h1>", unsafe_allow_html=True)
 st.markdown("<h6 style='text-align: center; color: #666666;'>V1</h6>", unsafe_allow_html=True)
@@ -72,21 +80,24 @@ webpageChunksDocNames = []
 ######################################################
 # Connect to the Weaviate vector database.
-logger.info("#### Create Weaviate db client connection.")
-client = weaviate.WeaviateClient(
-    connection_params=ConnectionParams.from_params(
-        http_host="localhost",
-        http_port="8080",
-        http_secure=False,
-        grpc_host="localhost",
-        grpc_port="50051",
-        grpc_secure=False
-    ),
-    additional_config=AdditionalConfig(
-        timeout=Timeout(init=60, query=1800, insert=1800),  # Values in seconds
     )
-)
-client.connect()
 #######################################################
@@ -273,47 +284,50 @@ if not client.collections.exists("Chunks") :
 #################################################################
 # Initialize the LLM.
 model_path = "/app/llama-2-7b-chat.Q4_0.gguf"
-llm = Llama(model_path,
-            #*,
-            n_gpu_layers=0,
-            split_mode=llama_cpp.LLAMA_SPLIT_MODE_LAYER,
-            main_gpu=0,
-            tensor_split=None,
-            vocab_only=False,
-            use_mmap=True,
-            use_mlock=False,
-            kv_overrides=None,
-            seed=llama_cpp.LLAMA_DEFAULT_SEED,
-            n_ctx=512,
-            n_batch=512,
-            n_threads=8,
-            n_threads_batch=16,
-            rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED,
-            pooling_type=llama_cpp.LLAMA_POOLING_TYPE_UNSPECIFIED,
-            rope_freq_base=0.0,
-            rope_freq_scale=0.0,
-            yarn_ext_factor=-1.0,
-            yarn_attn_factor=1.0,
-            yarn_beta_fast=32.0,
-            yarn_beta_slow=1.0,
-            yarn_orig_ctx=0,
-            logits_all=False,
-            embedding=False,
-            offload_kqv=True,
-            last_n_tokens_size=64,
-            lora_base=None,
-            lora_scale=1.0,
-            lora_path=None,
-            numa=False,
-            chat_format=None,
-            chat_handler=None,
-            draft_model=None,
-            tokenizer=None,
-            type_k=None,
-            type_v=None,
-            verbose=True
-           )
 def getRagData(promptText):
     ###############################################################################
@@ -418,7 +432,7 @@ def setPrompt(pprompt,ragFlag):
     return userPrompt
-def on_submitButton_clicked(b):
     logger.debug("\n### on_submitButton_clicked")
     st.session_state.sysTAtext = st.session_state.sysTA
     logger.info(f"sysTAtext: {st.session_state.sysTAtext}")

 st.markdown("<h1 style='text-align: center; color: #666666;'>Vector Database RAG Proof of Concept</h1>", unsafe_allow_html=True)
 st.markdown("<h6 style='text-align: center; color: #666666;'>V1</h6>", unsafe_allow_html=True)
+if 'logging' not in st.session_state:
+    weaviate_logger = logging.getLogger("httpx")
+    weaviate_logger.setLevel(logging.WARNING)
+    logger = logging.getLogger(__name__)
+    logging.basicConfig(level=logging.INFO)
+    st.session_state.weaviate_logger = weaviate_logger
+    st.session_state.logger = logger
+else:
+    weaviate_logger = st.session_state.weaviate_logger
+    logger = st.session_state.logger
 def runStartup():
     except:
         logger.error(f"subprocess.run() encountered error.")
     logger.info("### startup.sh Exited")
+if 'runStartup' not in st.session_state:
+    runStartup()
+    st.session_state.runStartup = True
 # Function to load the CSS file
 def load_css(file_name):
         st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
 # Load the custom CSS
+if 'load_css' not in st.session_state:
+    load_css(".streamlit/main.css")
+    st.session_state.load_css = True
 st.markdown("<h1 style='text-align: center; color: #666666;'>Vector Database RAG Proof of Concept</h1>", unsafe_allow_html=True)
 st.markdown("<h6 style='text-align: center; color: #666666;'>V1</h6>", unsafe_allow_html=True)
 ######################################################
 # Connect to the Weaviate vector database.
+if 'client' not in st.session_state:
+    logger.info("#### Create Weaviate db client connection.")
+    client = weaviate.WeaviateClient(
+        connection_params=ConnectionParams.from_params(
+            http_host="localhost",
+            http_port="8080",
+            http_secure=False,
+            grpc_host="localhost",
+            grpc_port="50051",
+            grpc_secure=False
+        ),
+        additional_config=AdditionalConfig(
+            timeout=Timeout(init=60, query=1800, insert=1800),  # Values in seconds
+        )
     )
+    client.connect()
+else:
+    client = st.session_state.client
 #######################################################
 #################################################################
 # Initialize the LLM.
 model_path = "/app/llama-2-7b-chat.Q4_0.gguf"
+if 'llm' not in st.session_state:
+    llm = Llama(model_path,
+                #*,
+                n_gpu_layers=0,
+                split_mode=llama_cpp.LLAMA_SPLIT_MODE_LAYER,
+                main_gpu=0,
+                tensor_split=None,
+                vocab_only=False,
+                use_mmap=True,
+                use_mlock=False,
+                kv_overrides=None,
+                seed=llama_cpp.LLAMA_DEFAULT_SEED,
+                n_ctx=512,
+                n_batch=512,
+                n_threads=8,
+                n_threads_batch=16,
+                rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED,
+                pooling_type=llama_cpp.LLAMA_POOLING_TYPE_UNSPECIFIED,
+                rope_freq_base=0.0,
+                rope_freq_scale=0.0,
+                yarn_ext_factor=-1.0,
+                yarn_attn_factor=1.0,
+                yarn_beta_fast=32.0,
+                yarn_beta_slow=1.0,
+                yarn_orig_ctx=0,
+                logits_all=False,
+                embedding=False,
+                offload_kqv=True,
+                last_n_tokens_size=64,
+                lora_base=None,
+                lora_scale=1.0,
+                lora_path=None,
+                numa=False,
+                chat_format=None,
+                chat_handler=None,
+                draft_model=None,
+                tokenizer=None,
+                type_k=None,
+                type_v=None,
+                verbose=True
+               )
+    st.session_state.llm = llm
+else:
+    llm = st.session_state.llm
 def getRagData(promptText):
     ###############################################################################
     return userPrompt
+def on_submitButton_clicked():
     logger.debug("\n### on_submitButton_clicked")
     st.session_state.sysTAtext = st.session_state.sysTA
     logger.info(f"sysTAtext: {st.session_state.sysTAtext}")

startup.sh CHANGED Viewed

@@ -1,11 +1,11 @@
 #! /bin/bash
 echo "#### startup.sh entered."
-echo "### find streamlit "; find / | grep -i streamlit
 echo "### pwd "; pwd
-echo "### df -h"; df -h
 echo "### ls -al /app"; ls -al /app
-echo "### ls -al /app/.streamlit/main.css"; ls -al /app/.streamlit/main.css
 #echo "### ls -l /app/weaviate"; ls -l /app/weaviate
 #echo "### ls -l /app/text2vec-transformers"; ls -l /app/text2vec-transformers
 #echo "### ls -l /data"; ls -l /data
@@ -57,7 +57,7 @@ export AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true \
 	   TRANSFORMERS_INFERENCE_API=http://127.0.0.1:8081 \
 	   LOG_LEVEL=warning \
        MODULES_CLIENT_TIMEOUT=600s
-env
 /app/weaviate/weaviate --host 127.0.0.1 --port 8080 --scheme http --write-timeout 600s &  #2>& 1 | tee /data/var/lib/weaviate/ws.log &
 echo "#### Before sleep."

 #! /bin/bash
 echo "#### startup.sh entered."
+#echo "### find streamlit "; find / | grep -i streamlit
 echo "### pwd "; pwd
+#echo "### df -h"; df -h
 echo "### ls -al /app"; ls -al /app
+#echo "### ls -al /app/.streamlit/main.css"; ls -al /app/.streamlit/main.css
 #echo "### ls -l /app/weaviate"; ls -l /app/weaviate
 #echo "### ls -l /app/text2vec-transformers"; ls -l /app/text2vec-transformers
 #echo "### ls -l /data"; ls -l /data
 	   TRANSFORMERS_INFERENCE_API=http://127.0.0.1:8081 \
 	   LOG_LEVEL=warning \
        MODULES_CLIENT_TIMEOUT=600s
+#env
 /app/weaviate/weaviate --host 127.0.0.1 --port 8080 --scheme http --write-timeout 600s &  #2>& 1 | tee /data/var/lib/weaviate/ws.log &
 echo "#### Before sleep."