Spaces:
Running
Running
MVPilgrim
commited on
Commit
·
00cc465
1
Parent(s):
c53ceaa
debug
Browse files
app.py
CHANGED
@@ -21,7 +21,11 @@ import subprocess
|
|
21 |
import time
|
22 |
|
23 |
|
|
|
24 |
try:
|
|
|
|
|
|
|
25 |
if 'logging' not in st.session_state:
|
26 |
weaviate_logger = logging.getLogger("httpx")
|
27 |
weaviate_logger.setLevel(logging.WARNING)
|
@@ -36,6 +40,10 @@ try:
|
|
36 |
|
37 |
logger.info("###################### PROGRAM ENTRY ###########################")
|
38 |
|
|
|
|
|
|
|
|
|
39 |
def runStartup():
|
40 |
logger.info("### Running startup.sh")
|
41 |
try:
|
@@ -63,10 +71,7 @@ try:
|
|
63 |
|
64 |
|
65 |
|
66 |
-
|
67 |
-
# MAINLINE
|
68 |
-
#
|
69 |
-
logger.info("#### MAINLINE ENTERED.")
|
70 |
|
71 |
# Function to load the CSS file
|
72 |
def load_css(file_name):
|
@@ -74,17 +79,15 @@ try:
|
|
74 |
with open(file_name) as f:
|
75 |
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
|
76 |
logger.info("#### load_css exited.")
|
77 |
-
|
78 |
-
# Load the custom CSS
|
79 |
if 'load_css' not in st.session_state:
|
80 |
load_css(".streamlit/main.css")
|
81 |
st.session_state.load_css = True
|
82 |
|
83 |
-
st.markdown("<h1 style='text-align: center; color: #666666;'>Vector Database
|
|
|
|
|
84 |
unsafe_allow_html=True)
|
85 |
-
st.markdown("<h6 style='text-align: center; color: #666666;'>V1</h6>", unsafe_allow_html=True)
|
86 |
|
87 |
-
#pathString = "/Users/660565/KPSAllInOne/ProgramFilesX86/WebCopy/DownloadedWebSites/LLMPOC_HTML"
|
88 |
pathString = "/app/inputDocs"
|
89 |
chunks = []
|
90 |
webpageDocNames = []
|
@@ -93,9 +96,9 @@ try:
|
|
93 |
webpageTitles = []
|
94 |
webpageChunksDocNames = []
|
95 |
|
96 |
-
|
97 |
-
# Connect to the Weaviate vector database.
|
98 |
-
|
99 |
if 'client' not in st.session_state:
|
100 |
logger.info("#### Create Weaviate db client connection.")
|
101 |
client = weaviate.WeaviateClient(
|
@@ -118,9 +121,10 @@ try:
|
|
118 |
client = st.session_state.client
|
119 |
|
120 |
|
121 |
-
|
122 |
-
# Read each text input file, parse it into a document,
|
123 |
-
# chunk it, collect chunks and document
|
|
|
124 |
if not client.collections.exists("Documents") or not client.collections.exists("Chunks") :
|
125 |
logger.info("#### Read and chunk input text files.")
|
126 |
for filename in os.listdir(pathString):
|
@@ -158,11 +162,10 @@ try:
|
|
158 |
logger.info("#### Read and chunk input text files exited.")
|
159 |
|
160 |
|
161 |
-
|
162 |
-
|
163 |
-
#
|
164 |
-
|
165 |
-
#wpChunksCollection = createChunksCollection()
|
166 |
if not client.collections.exists("Documents"):
|
167 |
logger.info("#### createWebpageCollection() entered.")
|
168 |
#client.collections.delete("Documents")
|
@@ -216,7 +219,6 @@ try:
|
|
216 |
st.session_state.wpCollection = wpCollection
|
217 |
logger.info("#### createWebpageCollection() exited.")
|
218 |
else:
|
219 |
-
#wpCollection = client.collections.get("Documents")
|
220 |
wpCollection = st.session_state.wpCollection
|
221 |
st.session_state.wpCollection = wpCollection
|
222 |
|
@@ -277,8 +279,9 @@ try:
|
|
277 |
st.session_state.wpChunksCollection = wpChunksCollection
|
278 |
|
279 |
|
280 |
-
|
281 |
-
# Create document and chunks objects in the database.
|
|
|
282 |
if 'dbObjsCreated' not in st.session_state:
|
283 |
logger.info("#### Create db objects.")
|
284 |
st.session_state.dbObjsCreated = True
|
@@ -315,8 +318,10 @@ try:
|
|
315 |
logger.info("#### Create chunk db objects created.")
|
316 |
logger.info("#### db objects created.")
|
317 |
|
318 |
-
|
319 |
-
|
|
|
|
|
320 |
model_path = "/app/llama-2-7b-chat.Q4_0.gguf"
|
321 |
if 'llm' not in st.session_state:
|
322 |
logger.info("### Initializing LLM.")
|
@@ -528,14 +533,18 @@ try:
|
|
528 |
logger.info("setPrompt exited.")
|
529 |
return fullPrompt
|
530 |
|
|
|
531 |
def formatJson(jsonText):
|
532 |
try:
|
|
|
|
|
|
|
533 |
jsonData = json.loads(jsonText)
|
534 |
formattedJson = json.dumps(jsonData, indent=2)
|
535 |
return formattedJson
|
536 |
except json.JSONDecodeError as e:
|
537 |
logger.error(f"Invalid JSON text - {str(e)}")
|
538 |
-
return
|
539 |
|
540 |
def on_runLLMButton_Clicked():
|
541 |
logger = st.session_state.logger
|
@@ -545,12 +554,12 @@ try:
|
|
545 |
|
546 |
#st.session_state.userpTAtext = st.session_state.userpTA
|
547 |
wrkList = setPrompt(st.session_state.userpTA,st.selectRag)
|
548 |
-
st.session_state.userpTA = formatJson(
|
549 |
logger.info(f"userpTAtext: {st.session_state.userpTA}")
|
550 |
|
551 |
#st.session_state.rspTAtext = runLLM(st.session_state.userpTAtext)
|
552 |
rsp = runLLM(wrkList)
|
553 |
-
st.session_state.rspTA = formatJson(
|
554 |
logger.info(f"rspTAtext: {st.session_state.rspTA}")
|
555 |
|
556 |
logger.info("### on_runLLMButton_Clicked exited.")
|
@@ -558,13 +567,22 @@ try:
|
|
558 |
def on_getAllRagDataButton_Clicked():
|
559 |
logger = st.session_state.logger
|
560 |
logger.info("### on_getAllRagButton_Clicked entered.")
|
561 |
-
st.session_state.ragpTA = formatJson(
|
562 |
logger.info("### on_getAllRagButton_Clicked exited.")
|
563 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
564 |
with st.sidebar:
|
565 |
st.selectRag = st.checkbox("Enable RAG",value=False,key="selectRag",help=None,on_change=None,args=None,kwargs=None,disabled=False,label_visibility="visible")
|
566 |
st.runLLMButton = st.button("Run LLM Prompt",key=None,help=None,on_click=on_runLLMButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
|
567 |
st.getAllRagDataButton = st.button("Get All Rag Data",key=None,help=None,on_click=on_getAllRagDataButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
|
|
|
568 |
|
569 |
logger.info("#### semsearch.py end of code.")
|
570 |
except Exception as e:
|
|
|
21 |
import time
|
22 |
|
23 |
|
24 |
+
|
25 |
try:
|
26 |
+
#############################################
|
27 |
+
# Logging setup including weaviate logging. #
|
28 |
+
#############################################
|
29 |
if 'logging' not in st.session_state:
|
30 |
weaviate_logger = logging.getLogger("httpx")
|
31 |
weaviate_logger.setLevel(logging.WARNING)
|
|
|
40 |
|
41 |
logger.info("###################### PROGRAM ENTRY ###########################")
|
42 |
|
43 |
+
##########################################################################
|
44 |
+
# Asynchonously run startup.sh which run text2vec-transformers #
|
45 |
+
# asynchronously and the Weaviate Vector Database server asynchronously. #
|
46 |
+
##########################################################################
|
47 |
def runStartup():
|
48 |
logger.info("### Running startup.sh")
|
49 |
try:
|
|
|
71 |
|
72 |
|
73 |
|
74 |
+
|
|
|
|
|
|
|
75 |
|
76 |
# Function to load the CSS file
|
77 |
def load_css(file_name):
|
|
|
79 |
with open(file_name) as f:
|
80 |
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
|
81 |
logger.info("#### load_css exited.")
|
|
|
|
|
82 |
if 'load_css' not in st.session_state:
|
83 |
load_css(".streamlit/main.css")
|
84 |
st.session_state.load_css = True
|
85 |
|
86 |
+
st.markdown("<h1 style='text-align: center; color: #666666;'>RAG Vector Database</h1>",
|
87 |
+
unsafe_allow_html=True)
|
88 |
+
st.markdown("<h1 style='text-align: center; color: #666666;'>RAG Vector Database</h1>",
|
89 |
unsafe_allow_html=True)
|
|
|
90 |
|
|
|
91 |
pathString = "/app/inputDocs"
|
92 |
chunks = []
|
93 |
webpageDocNames = []
|
|
|
96 |
webpageTitles = []
|
97 |
webpageChunksDocNames = []
|
98 |
|
99 |
+
############################################
|
100 |
+
# Connect to the Weaviate vector database. #
|
101 |
+
############################################
|
102 |
if 'client' not in st.session_state:
|
103 |
logger.info("#### Create Weaviate db client connection.")
|
104 |
client = weaviate.WeaviateClient(
|
|
|
121 |
client = st.session_state.client
|
122 |
|
123 |
|
124 |
+
########################################################
|
125 |
+
# Read each text input file, parse it into a document, #
|
126 |
+
# chunk it, collect chunks and document names. #
|
127 |
+
########################################################
|
128 |
if not client.collections.exists("Documents") or not client.collections.exists("Chunks") :
|
129 |
logger.info("#### Read and chunk input text files.")
|
130 |
for filename in os.listdir(pathString):
|
|
|
162 |
logger.info("#### Read and chunk input text files exited.")
|
163 |
|
164 |
|
165 |
+
#############################################################
|
166 |
+
# Create database documents and chunks schemas/collections. #
|
167 |
+
# Each chunk schema points to its corresponding document. #
|
168 |
+
#############################################################
|
|
|
169 |
if not client.collections.exists("Documents"):
|
170 |
logger.info("#### createWebpageCollection() entered.")
|
171 |
#client.collections.delete("Documents")
|
|
|
219 |
st.session_state.wpCollection = wpCollection
|
220 |
logger.info("#### createWebpageCollection() exited.")
|
221 |
else:
|
|
|
222 |
wpCollection = st.session_state.wpCollection
|
223 |
st.session_state.wpCollection = wpCollection
|
224 |
|
|
|
279 |
st.session_state.wpChunksCollection = wpChunksCollection
|
280 |
|
281 |
|
282 |
+
##################################################################
|
283 |
+
# Create the actual document and chunks objects in the database. #
|
284 |
+
##################################################################
|
285 |
if 'dbObjsCreated' not in st.session_state:
|
286 |
logger.info("#### Create db objects.")
|
287 |
st.session_state.dbObjsCreated = True
|
|
|
318 |
logger.info("#### Create chunk db objects created.")
|
319 |
logger.info("#### db objects created.")
|
320 |
|
321 |
+
|
322 |
+
#######################
|
323 |
+
# Initialize the LLM. #
|
324 |
+
#######################
|
325 |
model_path = "/app/llama-2-7b-chat.Q4_0.gguf"
|
326 |
if 'llm' not in st.session_state:
|
327 |
logger.info("### Initializing LLM.")
|
|
|
533 |
logger.info("setPrompt exited.")
|
534 |
return fullPrompt
|
535 |
|
536 |
+
|
537 |
def formatJson(jsonText):
|
538 |
try:
|
539 |
+
logger.info(f"#### formatJson jsonText: {jsonText}")
|
540 |
+
if not isinstance(jsonText,str):
|
541 |
+
jsonText = str(jsonText)
|
542 |
jsonData = json.loads(jsonText)
|
543 |
formattedJson = json.dumps(jsonData, indent=2)
|
544 |
return formattedJson
|
545 |
except json.JSONDecodeError as e:
|
546 |
logger.error(f"Invalid JSON text - {str(e)}")
|
547 |
+
return jsonText
|
548 |
|
549 |
def on_runLLMButton_Clicked():
|
550 |
logger = st.session_state.logger
|
|
|
554 |
|
555 |
#st.session_state.userpTAtext = st.session_state.userpTA
|
556 |
wrkList = setPrompt(st.session_state.userpTA,st.selectRag)
|
557 |
+
st.session_state.userpTA = formatJson(wrkList)
|
558 |
logger.info(f"userpTAtext: {st.session_state.userpTA}")
|
559 |
|
560 |
#st.session_state.rspTAtext = runLLM(st.session_state.userpTAtext)
|
561 |
rsp = runLLM(wrkList)
|
562 |
+
st.session_state.rspTA = formatJson(rsp)
|
563 |
logger.info(f"rspTAtext: {st.session_state.rspTA}")
|
564 |
|
565 |
logger.info("### on_runLLMButton_Clicked exited.")
|
|
|
567 |
def on_getAllRagDataButton_Clicked():
|
568 |
logger = st.session_state.logger
|
569 |
logger.info("### on_getAllRagButton_Clicked entered.")
|
570 |
+
st.session_state.ragpTA = formatJson(getAllRagData())
|
571 |
logger.info("### on_getAllRagButton_Clicked exited.")
|
572 |
|
573 |
+
def on_resetButton_Clicked():
|
574 |
+
logger = st.session_state.logger
|
575 |
+
logger.info("### on_Button_Clicked entered.")
|
576 |
+
st.session_state.sysTA = ""
|
577 |
+
st.session_state.userpTA = ""
|
578 |
+
st.session_state.ragpTA = ""
|
579 |
+
st.session_state.rspTA = ""
|
580 |
+
|
581 |
with st.sidebar:
|
582 |
st.selectRag = st.checkbox("Enable RAG",value=False,key="selectRag",help=None,on_change=None,args=None,kwargs=None,disabled=False,label_visibility="visible")
|
583 |
st.runLLMButton = st.button("Run LLM Prompt",key=None,help=None,on_click=on_runLLMButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
|
584 |
st.getAllRagDataButton = st.button("Get All Rag Data",key=None,help=None,on_click=on_getAllRagDataButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
|
585 |
+
st.resetButton = st.button("Reset",key=None,help=None,on_click=on_resetButton_Clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
|
586 |
|
587 |
logger.info("#### semsearch.py end of code.")
|
588 |
except Exception as e:
|