MVPilgrim commited on
Commit
00cc465
·
1 Parent(s): c53ceaa
Files changed (1) hide show
  1. app.py +47 -29
app.py CHANGED
@@ -21,7 +21,11 @@ import subprocess
21
  import time
22
 
23
 
 
24
  try:
 
 
 
25
  if 'logging' not in st.session_state:
26
  weaviate_logger = logging.getLogger("httpx")
27
  weaviate_logger.setLevel(logging.WARNING)
@@ -36,6 +40,10 @@ try:
36
 
37
  logger.info("###################### PROGRAM ENTRY ###########################")
38
 
 
 
 
 
39
  def runStartup():
40
  logger.info("### Running startup.sh")
41
  try:
@@ -63,10 +71,7 @@ try:
63
 
64
 
65
 
66
- ######################################################################
67
- # MAINLINE
68
- #
69
- logger.info("#### MAINLINE ENTERED.")
70
 
71
  # Function to load the CSS file
72
  def load_css(file_name):
@@ -74,17 +79,15 @@ try:
74
  with open(file_name) as f:
75
  st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
76
  logger.info("#### load_css exited.")
77
-
78
- # Load the custom CSS
79
  if 'load_css' not in st.session_state:
80
  load_css(".streamlit/main.css")
81
  st.session_state.load_css = True
82
 
83
- st.markdown("<h1 style='text-align: center; color: #666666;'>Vector Database RAG Proof of Concept</h1>", \
 
 
84
  unsafe_allow_html=True)
85
- st.markdown("<h6 style='text-align: center; color: #666666;'>V1</h6>", unsafe_allow_html=True)
86
 
87
- #pathString = "/Users/660565/KPSAllInOne/ProgramFilesX86/WebCopy/DownloadedWebSites/LLMPOC_HTML"
88
  pathString = "/app/inputDocs"
89
  chunks = []
90
  webpageDocNames = []
@@ -93,9 +96,9 @@ try:
93
  webpageTitles = []
94
  webpageChunksDocNames = []
95
 
96
- ######################################################
97
- # Connect to the Weaviate vector database.
98
- #if 'client' not in st.session_state:
99
  if 'client' not in st.session_state:
100
  logger.info("#### Create Weaviate db client connection.")
101
  client = weaviate.WeaviateClient(
@@ -118,9 +121,10 @@ try:
118
  client = st.session_state.client
119
 
120
 
121
- #######################################################
122
- # Read each text input file, parse it into a document,
123
- # chunk it, collect chunks and document name.
 
124
  if not client.collections.exists("Documents") or not client.collections.exists("Chunks") :
125
  logger.info("#### Read and chunk input text files.")
126
  for filename in os.listdir(pathString):
@@ -158,11 +162,10 @@ try:
158
  logger.info("#### Read and chunk input text files exited.")
159
 
160
 
161
-
162
- ######################################################
163
- # Create database webpage and chunks collections.
164
- #wpCollection = createWebpageCollection()
165
- #wpChunksCollection = createChunksCollection()
166
  if not client.collections.exists("Documents"):
167
  logger.info("#### createWebpageCollection() entered.")
168
  #client.collections.delete("Documents")
@@ -216,7 +219,6 @@ try:
216
  st.session_state.wpCollection = wpCollection
217
  logger.info("#### createWebpageCollection() exited.")
218
  else:
219
- #wpCollection = client.collections.get("Documents")
220
  wpCollection = st.session_state.wpCollection
221
  st.session_state.wpCollection = wpCollection
222
 
@@ -277,8 +279,9 @@ try:
277
  st.session_state.wpChunksCollection = wpChunksCollection
278
 
279
 
280
- ###########################################################
281
- # Create document and chunks objects in the database.
 
282
  if 'dbObjsCreated' not in st.session_state:
283
  logger.info("#### Create db objects.")
284
  st.session_state.dbObjsCreated = True
@@ -315,8 +318,10 @@ try:
315
  logger.info("#### Create chunk db objects created.")
316
  logger.info("#### db objects created.")
317
 
318
- #################################################################
319
- # Initialize the LLM.
 
 
320
  model_path = "/app/llama-2-7b-chat.Q4_0.gguf"
321
  if 'llm' not in st.session_state:
322
  logger.info("### Initializing LLM.")
@@ -528,14 +533,18 @@ try:
528
  logger.info("setPrompt exited.")
529
  return fullPrompt
530
 
 
531
  def formatJson(jsonText):
532
  try:
 
 
 
533
  jsonData = json.loads(jsonText)
534
  formattedJson = json.dumps(jsonData, indent=2)
535
  return formattedJson
536
  except json.JSONDecodeError as e:
537
  logger.error(f"Invalid JSON text - {str(e)}")
538
- return None
539
 
540
  def on_runLLMButton_Clicked():
541
  logger = st.session_state.logger
@@ -545,12 +554,12 @@ try:
545
 
546
  #st.session_state.userpTAtext = st.session_state.userpTA
547
  wrkList = setPrompt(st.session_state.userpTA,st.selectRag)
548
- st.session_state.userpTA = formatJson(str(wrkList))
549
  logger.info(f"userpTAtext: {st.session_state.userpTA}")
550
 
551
  #st.session_state.rspTAtext = runLLM(st.session_state.userpTAtext)
552
  rsp = runLLM(wrkList)
553
- st.session_state.rspTA = formatJson(str(rsp))
554
  logger.info(f"rspTAtext: {st.session_state.rspTA}")
555
 
556
  logger.info("### on_runLLMButton_Clicked exited.")
@@ -558,13 +567,22 @@ try:
558
  def on_getAllRagDataButton_Clicked():
559
  logger = st.session_state.logger
560
  logger.info("### on_getAllRagButton_Clicked entered.")
561
- st.session_state.ragpTA = formatJson(str(getAllRagData()))
562
  logger.info("### on_getAllRagButton_Clicked exited.")
563
 
 
 
 
 
 
 
 
 
564
  with st.sidebar:
565
  st.selectRag = st.checkbox("Enable RAG",value=False,key="selectRag",help=None,on_change=None,args=None,kwargs=None,disabled=False,label_visibility="visible")
566
  st.runLLMButton = st.button("Run LLM Prompt",key=None,help=None,on_click=on_runLLMButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
567
  st.getAllRagDataButton = st.button("Get All Rag Data",key=None,help=None,on_click=on_getAllRagDataButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
 
568
 
569
  logger.info("#### semsearch.py end of code.")
570
  except Exception as e:
 
21
  import time
22
 
23
 
24
+
25
  try:
26
+ #############################################
27
+ # Logging setup including weaviate logging. #
28
+ #############################################
29
  if 'logging' not in st.session_state:
30
  weaviate_logger = logging.getLogger("httpx")
31
  weaviate_logger.setLevel(logging.WARNING)
 
40
 
41
  logger.info("###################### PROGRAM ENTRY ###########################")
42
 
43
+ ##########################################################################
44
+ # Asynchonously run startup.sh which run text2vec-transformers #
45
+ # asynchronously and the Weaviate Vector Database server asynchronously. #
46
+ ##########################################################################
47
  def runStartup():
48
  logger.info("### Running startup.sh")
49
  try:
 
71
 
72
 
73
 
74
+
 
 
 
75
 
76
  # Function to load the CSS file
77
  def load_css(file_name):
 
79
  with open(file_name) as f:
80
  st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
81
  logger.info("#### load_css exited.")
 
 
82
  if 'load_css' not in st.session_state:
83
  load_css(".streamlit/main.css")
84
  st.session_state.load_css = True
85
 
86
+ st.markdown("<h1 style='text-align: center; color: #666666;'>RAG Vector Database</h1>",
87
+ unsafe_allow_html=True)
88
+ st.markdown("<h1 style='text-align: center; color: #666666;'>RAG Vector Database</h1>",
89
  unsafe_allow_html=True)
 
90
 
 
91
  pathString = "/app/inputDocs"
92
  chunks = []
93
  webpageDocNames = []
 
96
  webpageTitles = []
97
  webpageChunksDocNames = []
98
 
99
+ ############################################
100
+ # Connect to the Weaviate vector database. #
101
+ ############################################
102
  if 'client' not in st.session_state:
103
  logger.info("#### Create Weaviate db client connection.")
104
  client = weaviate.WeaviateClient(
 
121
  client = st.session_state.client
122
 
123
 
124
+ ########################################################
125
+ # Read each text input file, parse it into a document, #
126
+ # chunk it, collect chunks and document names. #
127
+ ########################################################
128
  if not client.collections.exists("Documents") or not client.collections.exists("Chunks") :
129
  logger.info("#### Read and chunk input text files.")
130
  for filename in os.listdir(pathString):
 
162
  logger.info("#### Read and chunk input text files exited.")
163
 
164
 
165
+ #############################################################
166
+ # Create database documents and chunks schemas/collections. #
167
+ # Each chunk schema points to its corresponding document. #
168
+ #############################################################
 
169
  if not client.collections.exists("Documents"):
170
  logger.info("#### createWebpageCollection() entered.")
171
  #client.collections.delete("Documents")
 
219
  st.session_state.wpCollection = wpCollection
220
  logger.info("#### createWebpageCollection() exited.")
221
  else:
 
222
  wpCollection = st.session_state.wpCollection
223
  st.session_state.wpCollection = wpCollection
224
 
 
279
  st.session_state.wpChunksCollection = wpChunksCollection
280
 
281
 
282
+ ##################################################################
283
+ # Create the actual document and chunks objects in the database. #
284
+ ##################################################################
285
  if 'dbObjsCreated' not in st.session_state:
286
  logger.info("#### Create db objects.")
287
  st.session_state.dbObjsCreated = True
 
318
  logger.info("#### Create chunk db objects created.")
319
  logger.info("#### db objects created.")
320
 
321
+
322
+ #######################
323
+ # Initialize the LLM. #
324
+ #######################
325
  model_path = "/app/llama-2-7b-chat.Q4_0.gguf"
326
  if 'llm' not in st.session_state:
327
  logger.info("### Initializing LLM.")
 
533
  logger.info("setPrompt exited.")
534
  return fullPrompt
535
 
536
+
537
  def formatJson(jsonText):
538
  try:
539
+ logger.info(f"#### formatJson jsonText: {jsonText}")
540
+ if not isinstance(jsonText,str):
541
+ jsonText = str(jsonText)
542
  jsonData = json.loads(jsonText)
543
  formattedJson = json.dumps(jsonData, indent=2)
544
  return formattedJson
545
  except json.JSONDecodeError as e:
546
  logger.error(f"Invalid JSON text - {str(e)}")
547
+ return jsonText
548
 
549
  def on_runLLMButton_Clicked():
550
  logger = st.session_state.logger
 
554
 
555
  #st.session_state.userpTAtext = st.session_state.userpTA
556
  wrkList = setPrompt(st.session_state.userpTA,st.selectRag)
557
+ st.session_state.userpTA = formatJson(wrkList)
558
  logger.info(f"userpTAtext: {st.session_state.userpTA}")
559
 
560
  #st.session_state.rspTAtext = runLLM(st.session_state.userpTAtext)
561
  rsp = runLLM(wrkList)
562
+ st.session_state.rspTA = formatJson(rsp)
563
  logger.info(f"rspTAtext: {st.session_state.rspTA}")
564
 
565
  logger.info("### on_runLLMButton_Clicked exited.")
 
567
  def on_getAllRagDataButton_Clicked():
568
  logger = st.session_state.logger
569
  logger.info("### on_getAllRagButton_Clicked entered.")
570
+ st.session_state.ragpTA = formatJson(getAllRagData())
571
  logger.info("### on_getAllRagButton_Clicked exited.")
572
 
573
+ def on_resetButton_Clicked():
574
+ logger = st.session_state.logger
575
+ logger.info("### on_Button_Clicked entered.")
576
+ st.session_state.sysTA = ""
577
+ st.session_state.userpTA = ""
578
+ st.session_state.ragpTA = ""
579
+ st.session_state.rspTA = ""
580
+
581
  with st.sidebar:
582
  st.selectRag = st.checkbox("Enable RAG",value=False,key="selectRag",help=None,on_change=None,args=None,kwargs=None,disabled=False,label_visibility="visible")
583
  st.runLLMButton = st.button("Run LLM Prompt",key=None,help=None,on_click=on_runLLMButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
584
  st.getAllRagDataButton = st.button("Get All Rag Data",key=None,help=None,on_click=on_getAllRagDataButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
585
+ st.resetButton = st.button("Reset",key=None,help=None,on_click=on_resetButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
586
 
587
  logger.info("#### semsearch.py end of code.")
588
  except Exception as e: