Spaces:
Running
Running
MVPilgrim
commited on
Commit
·
8ca4dcf
1
Parent(s):
43e405a
debug
Browse files- Dockerfile +5 -1
- app.py +26 -10
Dockerfile
CHANGED
@@ -61,15 +61,19 @@ ENV PATH="/usr/bin/local:/app/text2vec-transformers:/app/text2vec-transformers/b
|
|
61 |
#RUN pip install nltk==3.8.1 optimum==1.13.2 onnxruntime==1.16.1 onnx==1.14.1
|
62 |
RUN ./custom_prerequisites.py
|
63 |
|
64 |
-
|
65 |
##############################
|
66 |
RUN useradd -m -u 1000 user
|
67 |
|
|
|
|
|
|
|
|
|
68 |
#############################################
|
69 |
# Specify /data volume.
|
70 |
#VOLUME /data
|
71 |
|
72 |
WORKDIR /app
|
|
|
73 |
|
74 |
##############################################################################
|
75 |
# Start the weaviate vector database, text2vec-transformers and the semantic search app.
|
|
|
61 |
#RUN pip install nltk==3.8.1 optimum==1.13.2 onnxruntime==1.16.1 onnx==1.14.1
|
62 |
RUN ./custom_prerequisites.py
|
63 |
|
|
|
64 |
##############################
|
65 |
RUN useradd -m -u 1000 user
|
66 |
|
67 |
+
chmod -R 755 /app
|
68 |
+
chown -R user /app
|
69 |
+
chgrp -R user /app
|
70 |
+
|
71 |
#############################################
|
72 |
# Specify /data volume.
|
73 |
#VOLUME /data
|
74 |
|
75 |
WORKDIR /app
|
76 |
+
USER user
|
77 |
|
78 |
##############################################################################
|
79 |
# Start the weaviate vector database, text2vec-transformers and the semantic search app.
|
app.py
CHANGED
@@ -36,7 +36,7 @@ try:
|
|
36 |
|
37 |
|
38 |
def runStartup():
|
39 |
-
logger.
|
40 |
result = ""
|
41 |
try:
|
42 |
#result = subprocess.run("/app/startup.sh",shell=False,capture_output=None,text=None,timeout=300)
|
@@ -55,7 +55,7 @@ try:
|
|
55 |
except Exception as e2:
|
56 |
emsg = str(e2)
|
57 |
logger.error(f"#### Displaying startup.log EXCEPTION. e2: {emsg}")
|
58 |
-
logger.
|
59 |
if 'runStartup' not in st.session_state:
|
60 |
st.session_state.runStartup = True
|
61 |
runStartup()
|
@@ -69,8 +69,10 @@ try:
|
|
69 |
|
70 |
# Function to load the CSS file
|
71 |
def load_css(file_name):
|
|
|
72 |
with open(file_name) as f:
|
73 |
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
|
|
|
74 |
|
75 |
# Load the custom CSS
|
76 |
if 'load_css' not in st.session_state:
|
@@ -93,7 +95,7 @@ try:
|
|
93 |
# Connect to the Weaviate vector database.
|
94 |
#if 'client' not in st.session_state:
|
95 |
if 'client' not in st.session_state:
|
96 |
-
logger.
|
97 |
client = weaviate.WeaviateClient(
|
98 |
connection_params=ConnectionParams.from_params(
|
99 |
http_host="localhost",
|
@@ -109,6 +111,7 @@ try:
|
|
109 |
)
|
110 |
client.connect()
|
111 |
st.session_state.client = client
|
|
|
112 |
else:
|
113 |
client = st.session_state.client
|
114 |
|
@@ -116,8 +119,8 @@ try:
|
|
116 |
#######################################################
|
117 |
# Read each text input file, parse it into a document,
|
118 |
# chunk it, collect chunks and document name.
|
119 |
-
if not client.collections.exists("Documents") or not client.collections.exists("
|
120 |
-
logger.
|
121 |
for filename in os.listdir(pathString):
|
122 |
logger.info(filename)
|
123 |
path = Path(pathString + "/" + filename)
|
@@ -150,6 +153,7 @@ try:
|
|
150 |
|
151 |
logger.debug(f"### filename, title: {filename}, {title}")
|
152 |
logger.debug(f"### webpageDocNames: {webpageDocNames}")
|
|
|
153 |
|
154 |
|
155 |
|
@@ -158,7 +162,7 @@ try:
|
|
158 |
#wpCollection = createWebpageCollection()
|
159 |
#wpChunkCollection = createChunksCollection()
|
160 |
if not client.collections.exists("Documents"):
|
161 |
-
logger.
|
162 |
#client.collections.delete("Documents")
|
163 |
class_obj = {
|
164 |
"class": "Documents",
|
@@ -207,10 +211,11 @@ try:
|
|
207 |
]
|
208 |
}
|
209 |
wpCollection = client.collections.create_from_dict(class_obj)
|
|
|
210 |
|
211 |
|
212 |
if not client.collections.exists("Chunks"):
|
213 |
-
logger.
|
214 |
#client.collections.delete("Chunks")
|
215 |
class_obj = {
|
216 |
"class": "Chunks",
|
@@ -258,12 +263,13 @@ try:
|
|
258 |
]
|
259 |
}
|
260 |
wpChunkCollection = client.collections.create_from_dict(class_obj)
|
|
|
261 |
|
262 |
|
263 |
###########################################################
|
264 |
# Create document and chunks objects in the database.
|
265 |
if not client.collections.exists("Documents") :
|
266 |
-
logger.
|
267 |
for i, className in enumerate(webpageDocNames):
|
268 |
title = webpageTitles[i]
|
269 |
logger.debug(f"## className, title: {className}, {title}")
|
@@ -277,9 +283,10 @@ try:
|
|
277 |
"content": page_content
|
278 |
}
|
279 |
)
|
|
|
280 |
|
281 |
if not client.collections.exists("Chunks") :
|
282 |
-
logger.
|
283 |
# Insert the chunks for the document.
|
284 |
for i2, chunk in enumerate(webpageChunks[i]):
|
285 |
chunk_uuid = wpChunkCollection.data.insert(
|
@@ -293,12 +300,14 @@ try:
|
|
293 |
}
|
294 |
}
|
295 |
)
|
|
|
296 |
|
297 |
|
298 |
#################################################################
|
299 |
# Initialize the LLM.
|
300 |
model_path = "/app/llama-2-7b-chat.Q4_0.gguf"
|
301 |
if 'llm' not in st.session_state:
|
|
|
302 |
llm = Llama(model_path,
|
303 |
#*,
|
304 |
n_gpu_layers=0,
|
@@ -340,6 +349,7 @@ try:
|
|
340 |
verbose=True
|
341 |
)
|
342 |
st.session_state.llm = llm
|
|
|
343 |
else:
|
344 |
llm = st.session_state.llm
|
345 |
|
@@ -415,6 +425,7 @@ try:
|
|
415 |
st.session_state.rspTA = st.text_area(label="rspTA",value=st.session_state.rspTA)
|
416 |
|
417 |
def runLLM(prompt):
|
|
|
418 |
max_tokens = 1000
|
419 |
temperature = 0.3
|
420 |
top_p = 0.1
|
@@ -430,6 +441,7 @@ try:
|
|
430 |
stop=stop,
|
431 |
)
|
432 |
result = modelOutput["choices"][0]["text"].strip()
|
|
|
433 |
return(result)
|
434 |
|
435 |
def setPrompt(pprompt,ragFlag):
|
@@ -445,11 +457,13 @@ try:
|
|
445 |
else:
|
446 |
userPrompt = pprompt
|
447 |
#prompt = f""" <s> [INST] <<SYS>> {systemTextArea.value} </SYS>> Q: {userPrompt} A: [/INST]"""
|
|
|
448 |
return userPrompt
|
449 |
|
450 |
|
451 |
def on_submitButton_clicked():
|
452 |
logger = st.session_state.logger
|
|
|
453 |
logger.debug("\n### on_submitButton_clicked")
|
454 |
st.session_state.sysTAtext = st.session_state.sysTA
|
455 |
logger.info(f"sysTAtext: {st.session_state.sysTAtext}")
|
@@ -461,13 +475,15 @@ try:
|
|
461 |
st.session_state.rspTAtext = runLLM(st.session_state.userpTAtext)
|
462 |
st.session_state.rspTA = st.session_state.rspTAtext
|
463 |
logger.info(f"rspTAtext: {st.session_state.rspTAtext}")
|
|
|
|
|
464 |
|
465 |
|
466 |
with st.sidebar:
|
467 |
st.selectRag = st.checkbox("Enable Query With RAG",value=False,key="selectRag",help=None,on_change=None,args=None,kwargs=None,disabled=False,label_visibility="visible")
|
468 |
st.submitButton = st.button("Run LLM Query",key=None,help=None,on_click=on_submitButton_clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
|
469 |
|
470 |
-
logger.info("#### semsearch.py
|
471 |
except Exception as e:
|
472 |
try:
|
473 |
emsg = str(e)
|
|
|
36 |
|
37 |
|
38 |
def runStartup():
|
39 |
+
logger.debug("### Running startup.sh")
|
40 |
result = ""
|
41 |
try:
|
42 |
#result = subprocess.run("/app/startup.sh",shell=False,capture_output=None,text=None,timeout=300)
|
|
|
55 |
except Exception as e2:
|
56 |
emsg = str(e2)
|
57 |
logger.error(f"#### Displaying startup.log EXCEPTION. e2: {emsg}")
|
58 |
+
logger.debug("### Running startup.sh complete")
|
59 |
if 'runStartup' not in st.session_state:
|
60 |
st.session_state.runStartup = True
|
61 |
runStartup()
|
|
|
69 |
|
70 |
# Function to load the CSS file
|
71 |
def load_css(file_name):
|
72 |
+
logger.debug("#### load_css entered.")
|
73 |
with open(file_name) as f:
|
74 |
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
|
75 |
+
logger.debug("#### load_css exited.")
|
76 |
|
77 |
# Load the custom CSS
|
78 |
if 'load_css' not in st.session_state:
|
|
|
95 |
# Connect to the Weaviate vector database.
|
96 |
#if 'client' not in st.session_state:
|
97 |
if 'client' not in st.session_state:
|
98 |
+
logger.debug("#### Create Weaviate db client connection.")
|
99 |
client = weaviate.WeaviateClient(
|
100 |
connection_params=ConnectionParams.from_params(
|
101 |
http_host="localhost",
|
|
|
111 |
)
|
112 |
client.connect()
|
113 |
st.session_state.client = client
|
114 |
+
logger.debug("#### Create Weaviate db client connection exited.")
|
115 |
else:
|
116 |
client = st.session_state.client
|
117 |
|
|
|
119 |
#######################################################
|
120 |
# Read each text input file, parse it into a document,
|
121 |
# chunk it, collect chunks and document name.
|
122 |
+
if not client.collections.exists("Documents") or not client.collections.exists("Chunks) :
|
123 |
+
logger.debug("#### Read and chunk input text files.")
|
124 |
for filename in os.listdir(pathString):
|
125 |
logger.info(filename)
|
126 |
path = Path(pathString + "/" + filename)
|
|
|
153 |
|
154 |
logger.debug(f"### filename, title: {filename}, {title}")
|
155 |
logger.debug(f"### webpageDocNames: {webpageDocNames}")
|
156 |
+
logger.debug("#### Read and chunk input text files exited.")
|
157 |
|
158 |
|
159 |
|
|
|
162 |
#wpCollection = createWebpageCollection()
|
163 |
#wpChunkCollection = createChunksCollection()
|
164 |
if not client.collections.exists("Documents"):
|
165 |
+
logger.debug("#### createWebpageCollection() entered.")
|
166 |
#client.collections.delete("Documents")
|
167 |
class_obj = {
|
168 |
"class": "Documents",
|
|
|
211 |
]
|
212 |
}
|
213 |
wpCollection = client.collections.create_from_dict(class_obj)
|
214 |
+
logger.debug("#### createWebpageCollection() exited.")
|
215 |
|
216 |
|
217 |
if not client.collections.exists("Chunks"):
|
218 |
+
logger.debug("#### createChunksCollection() entered.")
|
219 |
#client.collections.delete("Chunks")
|
220 |
class_obj = {
|
221 |
"class": "Chunks",
|
|
|
263 |
]
|
264 |
}
|
265 |
wpChunkCollection = client.collections.create_from_dict(class_obj)
|
266 |
+
logger.debug("#### createChunksCollection() exited.")
|
267 |
|
268 |
|
269 |
###########################################################
|
270 |
# Create document and chunks objects in the database.
|
271 |
if not client.collections.exists("Documents") :
|
272 |
+
logger.debug("#### Create page/doc db objects.")
|
273 |
for i, className in enumerate(webpageDocNames):
|
274 |
title = webpageTitles[i]
|
275 |
logger.debug(f"## className, title: {className}, {title}")
|
|
|
283 |
"content": page_content
|
284 |
}
|
285 |
)
|
286 |
+
logger.debug("#### Create page/doc/db/objects exited.")
|
287 |
|
288 |
if not client.collections.exists("Chunks") :
|
289 |
+
logger.debug("#### Create chunk db objects.")
|
290 |
# Insert the chunks for the document.
|
291 |
for i2, chunk in enumerate(webpageChunks[i]):
|
292 |
chunk_uuid = wpChunkCollection.data.insert(
|
|
|
300 |
}
|
301 |
}
|
302 |
)
|
303 |
+
logger.debug("#### Create chunk db objects exited.")
|
304 |
|
305 |
|
306 |
#################################################################
|
307 |
# Initialize the LLM.
|
308 |
model_path = "/app/llama-2-7b-chat.Q4_0.gguf"
|
309 |
if 'llm' not in st.session_state:
|
310 |
+
logger.debug("### Initializing LLM.")
|
311 |
llm = Llama(model_path,
|
312 |
#*,
|
313 |
n_gpu_layers=0,
|
|
|
349 |
verbose=True
|
350 |
)
|
351 |
st.session_state.llm = llm
|
352 |
+
logger.debug("### Initializing LLM exited.")
|
353 |
else:
|
354 |
llm = st.session_state.llm
|
355 |
|
|
|
425 |
st.session_state.rspTA = st.text_area(label="rspTA",value=st.session_state.rspTA)
|
426 |
|
427 |
def runLLM(prompt):
|
428 |
+
echo "### runLLM entered."
|
429 |
max_tokens = 1000
|
430 |
temperature = 0.3
|
431 |
top_p = 0.1
|
|
|
441 |
stop=stop,
|
442 |
)
|
443 |
result = modelOutput["choices"][0]["text"].strip()
|
444 |
+
logger.debug("### runLLM exited.")
|
445 |
return(result)
|
446 |
|
447 |
def setPrompt(pprompt,ragFlag):
|
|
|
457 |
else:
|
458 |
userPrompt = pprompt
|
459 |
#prompt = f""" <s> [INST] <<SYS>> {systemTextArea.value} </SYS>> Q: {userPrompt} A: [/INST]"""
|
460 |
+
logger.debug("setPrompt exited.")
|
461 |
return userPrompt
|
462 |
|
463 |
|
464 |
def on_submitButton_clicked():
|
465 |
logger = st.session_state.logger
|
466 |
+
logger.debug("### on_submitButton_clicked entered.")
|
467 |
logger.debug("\n### on_submitButton_clicked")
|
468 |
st.session_state.sysTAtext = st.session_state.sysTA
|
469 |
logger.info(f"sysTAtext: {st.session_state.sysTAtext}")
|
|
|
475 |
st.session_state.rspTAtext = runLLM(st.session_state.userpTAtext)
|
476 |
st.session_state.rspTA = st.session_state.rspTAtext
|
477 |
logger.info(f"rspTAtext: {st.session_state.rspTAtext}")
|
478 |
+
|
479 |
+
logger.debug("### on_submitButton_clicked exited.")
|
480 |
|
481 |
|
482 |
with st.sidebar:
|
483 |
st.selectRag = st.checkbox("Enable Query With RAG",value=False,key="selectRag",help=None,on_change=None,args=None,kwargs=None,disabled=False,label_visibility="visible")
|
484 |
st.submitButton = st.button("Run LLM Query",key=None,help=None,on_click=on_submitButton_clicked,args=None,kwargs=None,type="secondary",disabled=False,use_container_width=False)
|
485 |
|
486 |
+
logger.info("#### semsearch.py end of code.")
|
487 |
except Exception as e:
|
488 |
try:
|
489 |
emsg = str(e)
|