MVPilgrim commited on
Commit
f38ba8b
·
1 Parent(s): d1418cc
Files changed (2) hide show
  1. app.py +78 -64
  2. startup.sh +4 -4
app.py CHANGED
@@ -24,12 +24,16 @@ import subprocess
24
  st.markdown("<h1 style='text-align: center; color: #666666;'>Vector Database RAG Proof of Concept</h1>", unsafe_allow_html=True)
25
  st.markdown("<h6 style='text-align: center; color: #666666;'>V1</h6>", unsafe_allow_html=True)
26
 
27
-
28
- weaviate_logger = logging.getLogger("httpx")
29
- weaviate_logger.setLevel(logging.WARNING)
30
-
31
- logger = logging.getLogger(__name__)
32
- logging.basicConfig(level=logging.INFO)
 
 
 
 
33
 
34
 
35
  def runStartup():
@@ -43,7 +47,9 @@ def runStartup():
43
  except:
44
  logger.error(f"subprocess.run() encountered error.")
45
  logger.info("### startup.sh Exited")
46
- runStartup()
 
 
47
 
48
  # Function to load the CSS file
49
  def load_css(file_name):
@@ -51,7 +57,9 @@ def load_css(file_name):
51
  st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
52
 
53
  # Load the custom CSS
54
- load_css(".streamlit/main.css")
 
 
55
 
56
  st.markdown("<h1 style='text-align: center; color: #666666;'>Vector Database RAG Proof of Concept</h1>", unsafe_allow_html=True)
57
  st.markdown("<h6 style='text-align: center; color: #666666;'>V1</h6>", unsafe_allow_html=True)
@@ -72,21 +80,24 @@ webpageChunksDocNames = []
72
 
73
  ######################################################
74
  # Connect to the Weaviate vector database.
75
- logger.info("#### Create Weaviate db client connection.")
76
- client = weaviate.WeaviateClient(
77
- connection_params=ConnectionParams.from_params(
78
- http_host="localhost",
79
- http_port="8080",
80
- http_secure=False,
81
- grpc_host="localhost",
82
- grpc_port="50051",
83
- grpc_secure=False
84
- ),
85
- additional_config=AdditionalConfig(
86
- timeout=Timeout(init=60, query=1800, insert=1800), # Values in seconds
 
 
87
  )
88
- )
89
- client.connect()
 
90
 
91
 
92
  #######################################################
@@ -273,47 +284,50 @@ if not client.collections.exists("Chunks") :
273
  #################################################################
274
  # Initialize the LLM.
275
  model_path = "/app/llama-2-7b-chat.Q4_0.gguf"
276
- llm = Llama(model_path,
277
- #*,
278
- n_gpu_layers=0,
279
- split_mode=llama_cpp.LLAMA_SPLIT_MODE_LAYER,
280
- main_gpu=0,
281
- tensor_split=None,
282
- vocab_only=False,
283
- use_mmap=True,
284
- use_mlock=False,
285
- kv_overrides=None,
286
- seed=llama_cpp.LLAMA_DEFAULT_SEED,
287
- n_ctx=512,
288
- n_batch=512,
289
- n_threads=8,
290
- n_threads_batch=16,
291
- rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED,
292
- pooling_type=llama_cpp.LLAMA_POOLING_TYPE_UNSPECIFIED,
293
- rope_freq_base=0.0,
294
- rope_freq_scale=0.0,
295
- yarn_ext_factor=-1.0,
296
- yarn_attn_factor=1.0,
297
- yarn_beta_fast=32.0,
298
- yarn_beta_slow=1.0,
299
- yarn_orig_ctx=0,
300
- logits_all=False,
301
- embedding=False,
302
- offload_kqv=True,
303
- last_n_tokens_size=64,
304
- lora_base=None,
305
- lora_scale=1.0,
306
- lora_path=None,
307
- numa=False,
308
- chat_format=None,
309
- chat_handler=None,
310
- draft_model=None,
311
- tokenizer=None,
312
- type_k=None,
313
- type_v=None,
314
- verbose=True
315
- )
316
-
 
 
 
317
 
318
  def getRagData(promptText):
319
  ###############################################################################
@@ -418,7 +432,7 @@ def setPrompt(pprompt,ragFlag):
418
  return userPrompt
419
 
420
 
421
- def on_submitButton_clicked(b):
422
  logger.debug("\n### on_submitButton_clicked")
423
  st.session_state.sysTAtext = st.session_state.sysTA
424
  logger.info(f"sysTAtext: {st.session_state.sysTAtext}")
 
24
  st.markdown("<h1 style='text-align: center; color: #666666;'>Vector Database RAG Proof of Concept</h1>", unsafe_allow_html=True)
25
  st.markdown("<h6 style='text-align: center; color: #666666;'>V1</h6>", unsafe_allow_html=True)
26
 
27
+ if 'logging' not in st.session_state:
28
+ weaviate_logger = logging.getLogger("httpx")
29
+ weaviate_logger.setLevel(logging.WARNING)
30
+ logger = logging.getLogger(__name__)
31
+ logging.basicConfig(level=logging.INFO)
32
+ st.session_state.weaviate_logger = weaviate_logger
33
+ st.session_state.logger = logger
34
+ else:
35
+ weaviate_logger = st.session_state.weaviate_logger
36
+ logger = st.session_state.logger
37
 
38
 
39
  def runStartup():
 
47
  except:
48
  logger.error(f"subprocess.run() encountered error.")
49
  logger.info("### startup.sh Exited")
50
+ if 'runStartup' not in st.session_state:
51
+ runStartup()
52
+ st.session_state.runStartup = True
53
 
54
  # Function to load the CSS file
55
  def load_css(file_name):
 
57
  st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
58
 
59
  # Load the custom CSS
60
+ if 'load_css' not in st.session_state:
61
+ load_css(".streamlit/main.css")
62
+ st.session_state.load_css = True
63
 
64
  st.markdown("<h1 style='text-align: center; color: #666666;'>Vector Database RAG Proof of Concept</h1>", unsafe_allow_html=True)
65
  st.markdown("<h6 style='text-align: center; color: #666666;'>V1</h6>", unsafe_allow_html=True)
 
80
 
81
  ######################################################
82
  # Connect to the Weaviate vector database.
83
+ if 'client' not in st.session_state:
84
+ logger.info("#### Create Weaviate db client connection.")
85
+ client = weaviate.WeaviateClient(
86
+ connection_params=ConnectionParams.from_params(
87
+ http_host="localhost",
88
+ http_port="8080",
89
+ http_secure=False,
90
+ grpc_host="localhost",
91
+ grpc_port="50051",
92
+ grpc_secure=False
93
+ ),
94
+ additional_config=AdditionalConfig(
95
+ timeout=Timeout(init=60, query=1800, insert=1800), # Values in seconds
96
+ )
97
  )
98
+ client.connect()
99
+ else:
100
+ client = st.session_state.client
101
 
102
 
103
  #######################################################
 
284
  #################################################################
285
  # Initialize the LLM.
286
  model_path = "/app/llama-2-7b-chat.Q4_0.gguf"
287
+ if 'llm' not in st.session_state:
288
+ llm = Llama(model_path,
289
+ #*,
290
+ n_gpu_layers=0,
291
+ split_mode=llama_cpp.LLAMA_SPLIT_MODE_LAYER,
292
+ main_gpu=0,
293
+ tensor_split=None,
294
+ vocab_only=False,
295
+ use_mmap=True,
296
+ use_mlock=False,
297
+ kv_overrides=None,
298
+ seed=llama_cpp.LLAMA_DEFAULT_SEED,
299
+ n_ctx=512,
300
+ n_batch=512,
301
+ n_threads=8,
302
+ n_threads_batch=16,
303
+ rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED,
304
+ pooling_type=llama_cpp.LLAMA_POOLING_TYPE_UNSPECIFIED,
305
+ rope_freq_base=0.0,
306
+ rope_freq_scale=0.0,
307
+ yarn_ext_factor=-1.0,
308
+ yarn_attn_factor=1.0,
309
+ yarn_beta_fast=32.0,
310
+ yarn_beta_slow=1.0,
311
+ yarn_orig_ctx=0,
312
+ logits_all=False,
313
+ embedding=False,
314
+ offload_kqv=True,
315
+ last_n_tokens_size=64,
316
+ lora_base=None,
317
+ lora_scale=1.0,
318
+ lora_path=None,
319
+ numa=False,
320
+ chat_format=None,
321
+ chat_handler=None,
322
+ draft_model=None,
323
+ tokenizer=None,
324
+ type_k=None,
325
+ type_v=None,
326
+ verbose=True
327
+ )
328
+ st.session_state.llm = llm
329
+ else:
330
+ llm = st.session_state.llm
331
 
332
  def getRagData(promptText):
333
  ###############################################################################
 
432
  return userPrompt
433
 
434
 
435
+ def on_submitButton_clicked():
436
  logger.debug("\n### on_submitButton_clicked")
437
  st.session_state.sysTAtext = st.session_state.sysTA
438
  logger.info(f"sysTAtext: {st.session_state.sysTAtext}")
startup.sh CHANGED
@@ -1,11 +1,11 @@
1
  #! /bin/bash
2
 
3
  echo "#### startup.sh entered."
4
- echo "### find streamlit "; find / | grep -i streamlit
5
  echo "### pwd "; pwd
6
- echo "### df -h"; df -h
7
  echo "### ls -al /app"; ls -al /app
8
- echo "### ls -al /app/.streamlit/main.css"; ls -al /app/.streamlit/main.css
9
  #echo "### ls -l /app/weaviate"; ls -l /app/weaviate
10
  #echo "### ls -l /app/text2vec-transformers"; ls -l /app/text2vec-transformers
11
  #echo "### ls -l /data"; ls -l /data
@@ -57,7 +57,7 @@ export AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true \
57
  TRANSFORMERS_INFERENCE_API=http://127.0.0.1:8081 \
58
  LOG_LEVEL=warning \
59
  MODULES_CLIENT_TIMEOUT=600s
60
- env
61
  /app/weaviate/weaviate --host 127.0.0.1 --port 8080 --scheme http --write-timeout 600s & #2>& 1 | tee /data/var/lib/weaviate/ws.log &
62
 
63
  echo "#### Before sleep."
 
1
  #! /bin/bash
2
 
3
  echo "#### startup.sh entered."
4
+ #echo "### find streamlit "; find / | grep -i streamlit
5
  echo "### pwd "; pwd
6
+ #echo "### df -h"; df -h
7
  echo "### ls -al /app"; ls -al /app
8
+ #echo "### ls -al /app/.streamlit/main.css"; ls -al /app/.streamlit/main.css
9
  #echo "### ls -l /app/weaviate"; ls -l /app/weaviate
10
  #echo "### ls -l /app/text2vec-transformers"; ls -l /app/text2vec-transformers
11
  #echo "### ls -l /data"; ls -l /data
 
57
  TRANSFORMERS_INFERENCE_API=http://127.0.0.1:8081 \
58
  LOG_LEVEL=warning \
59
  MODULES_CLIENT_TIMEOUT=600s
60
+ #env
61
  /app/weaviate/weaviate --host 127.0.0.1 --port 8080 --scheme http --write-timeout 600s & #2>& 1 | tee /data/var/lib/weaviate/ws.log &
62
 
63
  echo "#### Before sleep."