MVPilgrim commited on
Commit
c4f0f2d
·
1 Parent(s): 9f5bb83
Files changed (2) hide show
  1. app.py +12 -12
  2. pylint.out +2 -0
app.py CHANGED
@@ -12,7 +12,6 @@ from tokenizers import Tokenizer
12
  import json
13
  import os
14
  import re
15
- import logging
16
 
17
  import llama_cpp
18
  from llama_cpp import Llama
@@ -37,9 +36,9 @@ try:
37
 
38
  def runStartup():
39
  logger.info("### Running startup.sh")
40
- result = ""
41
  try:
42
- #result = subprocess.run("/app/startup.sh",shell=False,capture_output=None,text=None,timeout=300)
 
43
  #logger.info(f"startup.sh stdout: {result.stdout}")
44
  #logger.info(f"startup.sh stderr: {result.stderr}")
45
  #logger.info(f"Return code: {result.returncode}")
@@ -79,10 +78,11 @@ try:
79
  load_css(".streamlit/main.css")
80
  st.session_state.load_css = True
81
 
82
- st.markdown("<h1 style='text-align: center; color: #666666;'>Vector Database RAG Proof of Concept</h1>", unsafe_allow_html=True)
 
83
  st.markdown("<h6 style='text-align: center; color: #666666;'>V1</h6>", unsafe_allow_html=True)
84
 
85
- #pathString = "/Users/660565/KPSAllInOne/ProgramFilesX86/WebCopy/DownloadedWebSites/LLMPOC_HTML"
86
  pathString = "/app/inputDocs"
87
  chunks = []
88
  webpageDocNames = []
@@ -126,7 +126,7 @@ try:
126
  path = Path(pathString + "/" + filename)
127
  filename = filename.rstrip(".html")
128
  webpageDocNames.append(filename)
129
- htmlLoader = BSHTMLLoader(path,"utf-8")
130
  htmlData = htmlLoader.load()
131
 
132
  title = htmlData[0].metadata['title']
@@ -134,8 +134,8 @@ try:
134
 
135
  # Clean data. Remove multiple newlines, etc.
136
  page_content = re.sub(r'\n+', '\n',page_content)
137
-
138
- page_contentArray.append(page_content);
139
  webpageTitles.append(title)
140
  max_tokens = 1000
141
  tokenizer = Tokenizer.from_pretrained("bert-base-uncased")
@@ -298,7 +298,7 @@ try:
298
  if not client.collections.exists("Chunks") :
299
  logger.info("#### Create chunk db objects.")
300
  # Insert the chunks for the document.
301
- for i2, chunk in enumerate(webpageChunks[i]):
302
  chunk_uuid = wpChunksCollection.data.insert(
303
  {
304
  "title": title,
@@ -418,7 +418,7 @@ try:
418
  if "userpTA" not in st.session_state:
419
  st.session_state.userpTA = st.text_area(label="userpTA",value="fdsaf fsdafdsa")
420
  elif "userpTAtext" in st.session_state:
421
- st.session_state.userpTA = st.text_area(label="userpTA",value=st.session_state.userpTAtext)
422
  else:
423
  st.session_state.userpTA = st.text_area(label="userpTA",value=st.session_state.userpTA)
424
 
@@ -458,7 +458,7 @@ try:
458
  result = modelOutput["choices"][0]["text"].strip()
459
  logger.info(f"### llmResult: {result}")
460
  logger.info("### runLLM exited.")
461
- return(result)
462
 
463
  def setPrompt(pprompt,ragFlag):
464
  logger = st.session_state.logger
@@ -469,7 +469,7 @@ try:
469
  prompt = userPrompt
470
  userPrompt = "Using this information: " + ragPrompt \
471
  + "process the following statement or question and produce a response" \
472
- + intialPrompt
473
  else:
474
  userPrompt = st.session_state.sysTA + " " + pprompt
475
  #prompt = f""" <s> [INST] <<SYS>> {systemTextArea.value} </SYS>> Q: {userPrompt} A: [/INST]"""
 
12
  import json
13
  import os
14
  import re
 
15
 
16
  import llama_cpp
17
  from llama_cpp import Llama
 
36
 
37
  def runStartup():
38
  logger.info("### Running startup.sh")
 
39
  try:
40
+ #result = subprocess.run("/app/startup.sh",shell=False,capture_output=None,
41
+ # text=None,timeout=300)
42
  #logger.info(f"startup.sh stdout: {result.stdout}")
43
  #logger.info(f"startup.sh stderr: {result.stderr}")
44
  #logger.info(f"Return code: {result.returncode}")
 
78
  load_css(".streamlit/main.css")
79
  st.session_state.load_css = True
80
 
81
+ st.markdown("<h1 style='text-align: center; color: #666666;'>Vector Database RAG Proof of Concept</h1>", \
82
+ unsafe_allow_html=True)
83
  st.markdown("<h6 style='text-align: center; color: #666666;'>V1</h6>", unsafe_allow_html=True)
84
 
85
+ #pathString = "/Users/660565/KPSAllInOne/ProgramFilesX86/WebCopy/DownloadedWebSites/LLMPOC_HTML"
86
  pathString = "/app/inputDocs"
87
  chunks = []
88
  webpageDocNames = []
 
126
  path = Path(pathString + "/" + filename)
127
  filename = filename.rstrip(".html")
128
  webpageDocNames.append(filename)
129
+ htmlLoader = BSHTMLLoader(path,"utf-8")
130
  htmlData = htmlLoader.load()
131
 
132
  title = htmlData[0].metadata['title']
 
134
 
135
  # Clean data. Remove multiple newlines, etc.
136
  page_content = re.sub(r'\n+', '\n',page_content)
137
+
138
+ page_contentArray.append(page_content)
139
  webpageTitles.append(title)
140
  max_tokens = 1000
141
  tokenizer = Tokenizer.from_pretrained("bert-base-uncased")
 
298
  if not client.collections.exists("Chunks") :
299
  logger.info("#### Create chunk db objects.")
300
  # Insert the chunks for the document.
301
+ for i2, chunk in enumerate(webpageChunks):
302
  chunk_uuid = wpChunksCollection.data.insert(
303
  {
304
  "title": title,
 
418
  if "userpTA" not in st.session_state:
419
  st.session_state.userpTA = st.text_area(label="userpTA",value="fdsaf fsdafdsa")
420
  elif "userpTAtext" in st.session_state:
421
+ st.session_state.userpTA = st.text_area (label="userpTA",value=st.session_state.userpTAtext)
422
  else:
423
  st.session_state.userpTA = st.text_area(label="userpTA",value=st.session_state.userpTA)
424
 
 
458
  result = modelOutput["choices"][0]["text"].strip()
459
  logger.info(f"### llmResult: {result}")
460
  logger.info("### runLLM exited.")
461
+ return result
462
 
463
  def setPrompt(pprompt,ragFlag):
464
  logger = st.session_state.logger
 
469
  prompt = userPrompt
470
  userPrompt = "Using this information: " + ragPrompt \
471
  + "process the following statement or question and produce a response" \
472
+ + pprompt
473
  else:
474
  userPrompt = st.session_state.sysTA + " " + pprompt
475
  #prompt = f""" <s> [INST] <<SYS>> {systemTextArea.value} </SYS>> Q: {userPrompt} A: [/INST]"""
pylint.out ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ************* Module app
2
+ app.py:138:51: E0001: Parsing failed: 'invalid syntax (app, line 138)' (syntax-error)