MVPilgrim commited on
Commit
7fea130
·
1 Parent(s): 13aea45
Files changed (1) hide show
  1. app.py +50 -48
app.py CHANGED
@@ -27,7 +27,7 @@ try:
27
  weaviate_logger = logging.getLogger("httpx")
28
  weaviate_logger.setLevel(logging.WARNING)
29
  logger = logging.getLogger(__name__)
30
- logging.basicConfig(level=logging.DEBUG)
31
  st.session_state.weaviate_logger = weaviate_logger
32
  st.session_state.logger = logger
33
  else:
@@ -36,7 +36,7 @@ try:
36
 
37
 
38
  def runStartup():
39
- logger.debug("### Running startup.sh")
40
  result = ""
41
  try:
42
  #result = subprocess.run("/app/startup.sh",shell=False,capture_output=None,text=None,timeout=300)
@@ -47,15 +47,15 @@ try:
47
  time.sleep(180)
48
  except Exception as e:
49
  emsg = str(e)
50
- logger.error(f"subprocess.run EXCEPTION. e: {emsg}")
51
  try:
52
  with open("/app/startup.log", "r") as file:
53
  content = file.read()
54
  print(content)
55
  except Exception as e2:
56
  emsg = str(e2)
57
- logger.error(f"#### Displaying startup.log EXCEPTION. e2: {emsg}")
58
- logger.debug("### Running startup.sh complete")
59
  if 'runStartup' not in st.session_state:
60
  st.session_state.runStartup = True
61
  runStartup()
@@ -65,14 +65,14 @@ try:
65
  ######################################################################
66
  # MAINLINE
67
  #
68
- logger.info("#### MAINLINE ENTERED.")
69
 
70
  # Function to load the CSS file
71
  def load_css(file_name):
72
- logger.debug("#### load_css entered.")
73
  with open(file_name) as f:
74
  st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
75
- logger.debug("#### load_css exited.")
76
 
77
  # Load the custom CSS
78
  if 'load_css' not in st.session_state:
@@ -95,7 +95,7 @@ try:
95
  # Connect to the Weaviate vector database.
96
  #if 'client' not in st.session_state:
97
  if 'client' not in st.session_state:
98
- logger.debug("#### Create Weaviate db client connection.")
99
  client = weaviate.WeaviateClient(
100
  connection_params=ConnectionParams.from_params(
101
  http_host="localhost",
@@ -111,7 +111,7 @@ try:
111
  )
112
  client.connect()
113
  st.session_state.client = client
114
- logger.debug("#### Create Weaviate db client connection exited.")
115
  else:
116
  client = st.session_state.client
117
 
@@ -120,9 +120,9 @@ try:
120
  # Read each text input file, parse it into a document,
121
  # chunk it, collect chunks and document name.
122
  if not client.collections.exists("Documents") or not client.collections.exists("Chunks") :
123
- logger.debug("#### Read and chunk input text files.")
124
  for filename in os.listdir(pathString):
125
- logger.info(filename)
126
  path = Path(pathString + "/" + filename)
127
  filename = filename.rstrip(".html")
128
  webpageDocNames.append(filename)
@@ -139,21 +139,21 @@ try:
139
  webpageTitles.append(title)
140
  max_tokens = 1000
141
  tokenizer = Tokenizer.from_pretrained("bert-base-uncased")
142
- logger.debug(f"### tokenizer: {tokenizer}")
143
  splitter = HuggingFaceTextSplitter(tokenizer, trim_chunks=True)
144
  chunksOnePage = splitter.chunks(page_content, chunk_capacity=50)
145
 
146
  chunks = []
147
  for chnk in chunksOnePage:
148
- logger.debug(f"#### chnk in file: {chnk}")
149
  chunks.append(chnk)
150
- logger.debug(f"chunks: {chunks}")
151
  webpageChunks.append(chunks)
152
  webpageChunksDocNames.append(filename + "Chunks")
153
 
154
- logger.debug(f"### filename, title: {filename}, {title}")
155
- logger.debug(f"### webpageDocNames: {webpageDocNames}")
156
- logger.debug("#### Read and chunk input text files exited.")
157
 
158
 
159
 
@@ -162,7 +162,7 @@ try:
162
  #wpCollection = createWebpageCollection()
163
  #wpChunkCollection = createChunksCollection()
164
  if not client.collections.exists("Documents"):
165
- logger.debug("#### createWebpageCollection() entered.")
166
  #client.collections.delete("Documents")
167
  class_obj = {
168
  "class": "Documents",
@@ -211,11 +211,11 @@ try:
211
  ]
212
  }
213
  wpCollection = client.collections.create_from_dict(class_obj)
214
- logger.debug("#### createWebpageCollection() exited.")
215
 
216
 
217
  if not client.collections.exists("Chunks"):
218
- logger.debug("#### createChunksCollection() entered.")
219
  #client.collections.delete("Chunks")
220
  class_obj = {
221
  "class": "Chunks",
@@ -263,16 +263,16 @@ try:
263
  ]
264
  }
265
  wpChunkCollection = client.collections.create_from_dict(class_obj)
266
- logger.debug("#### createChunksCollection() exited.")
267
 
268
 
269
  ###########################################################
270
  # Create document and chunks objects in the database.
271
  if not client.collections.exists("Documents") :
272
- logger.debug("#### Create page/doc db objects.")
273
  for i, className in enumerate(webpageDocNames):
274
  title = webpageTitles[i]
275
- logger.debug(f"## className, title: {className}, {title}")
276
  # Create Webpage Object
277
  page_content = page_contentArray[i]
278
  # Insert the document.
@@ -283,10 +283,10 @@ try:
283
  "content": page_content
284
  }
285
  )
286
- logger.debug("#### Create page/doc/db/objects exited.")
287
 
288
  if not client.collections.exists("Chunks") :
289
- logger.debug("#### Create chunk db objects.")
290
  # Insert the chunks for the document.
291
  for i2, chunk in enumerate(webpageChunks[i]):
292
  chunk_uuid = wpChunkCollection.data.insert(
@@ -300,14 +300,14 @@ try:
300
  }
301
  }
302
  )
303
- logger.debug("#### Create chunk db objects exited.")
304
 
305
 
306
  #################################################################
307
  # Initialize the LLM.
308
  model_path = "/app/llama-2-7b-chat.Q4_0.gguf"
309
  if 'llm' not in st.session_state:
310
- logger.debug("### Initializing LLM.")
311
  llm = Llama(model_path,
312
  #*,
313
  n_gpu_layers=0,
@@ -349,45 +349,46 @@ try:
349
  verbose=True
350
  )
351
  st.session_state.llm = llm
352
- logger.debug("### Initializing LLM exited.")
353
  else:
354
  llm = st.session_state.llm
355
 
356
  def getRagData(promptText):
357
- logger.info("#### getRagData() entered.")
358
  ###############################################################################
359
  # Initial the the sentence transformer and encode the query prompt.
360
- logger.info(f"#### Encode text query prompt to create vectors. {text}")
361
  model = SentenceTransformer('/app/multi-qa-MiniLM-L6-cos-v1')
362
 
363
  vector = model.encode(promptText)
364
  vectorList = []
365
 
366
- logger.debug("#### Print vectors.")
367
  for vec in vector:
368
  vectorList.append(vec)
369
- logger.debug(f"vectorList: {vectorList[2]}")
370
 
371
  # Fetch chunks and print chunks.
372
- logger.info("#### Retrieve semchunks from db using vectors from prompt.")
373
  semChunks = wpChunkCollection.query.near_vector(
374
  near_vector=vectorList,
375
  distance=0.7,
376
  limit=3
377
  )
378
- logger.debug(f"### semChunks[0]: {semChunks}")
379
 
380
  # Print chunks, corresponding document and document title.
381
  ragData = ""
382
- logger.info("#### Print individual retrieved chunks.")
383
  for chunk in enumerate(semChunks.objects):
384
- logger.info(f"#### chunk: {chunk}")
385
  ragData = ragData + "\n" + chunk[0]
386
  webpage_uuid = chunk[1].properties['references']['webpage']
387
- logger.info(f"webpage_uuid: {webpage_uuid}")
388
  wpFromChunk = wpCollection.query.fetch_object_by_id(webpage_uuid)
389
- logger.info(f"### wpFromChunk title: {wpFromChunk.properties['title']}")
390
  #collection = client.collections.get("Chunks")
 
391
  return ragData
392
 
393
 
@@ -426,7 +427,7 @@ try:
426
 
427
  def runLLM(prompt):
428
  logger = st.session_state.logger
429
- logger.debug("### runLLM entered.")
430
 
431
  max_tokens = 1000
432
  temperature = 0.3
@@ -443,7 +444,8 @@ try:
443
  stop=stop,
444
  )
445
  result = modelOutput["choices"][0]["text"].strip()
446
- logger.debug("### runLLM exited.")
 
447
  return(result)
448
 
449
  def setPrompt(pprompt,ragFlag):
@@ -459,14 +461,14 @@ try:
459
  else:
460
  userPrompt = pprompt
461
  #prompt = f""" <s> [INST] <<SYS>> {systemTextArea.value} </SYS>> Q: {userPrompt} A: [/INST]"""
462
- logger.debug("setPrompt exited.")
 
463
  return userPrompt
464
 
465
 
466
  def on_submitButton_clicked():
467
  logger = st.session_state.logger
468
- logger.debug("### on_submitButton_clicked entered.")
469
- logger.debug("\n### on_submitButton_clicked")
470
  st.session_state.sysTAtext = st.session_state.sysTA
471
  logger.info(f"sysTAtext: {st.session_state.sysTAtext}")
472
 
@@ -478,7 +480,7 @@ try:
478
  st.session_state.rspTA = st.session_state.rspTAtext
479
  logger.info(f"rspTAtext: {st.session_state.rspTAtext}")
480
 
481
- logger.debug("### on_submitButton_clicked exited.")
482
 
483
 
484
  with st.sidebar:
@@ -489,10 +491,10 @@ try:
489
  except Exception as e:
490
  try:
491
  emsg = str(e)
492
- logger.error(f"Program-wide EXCEPTION. e: {emsg}")
493
  with open("/app/startup.log", "r") as file:
494
  content = file.read()
495
- print(content)
496
  except Exception as e2:
497
  emsg = str(e2)
498
- logger.error(f"#### Displaying startup.log EXCEPTION. e2: {emsg}")
 
27
  weaviate_logger = logging.getLogger("httpx")
28
  weaviate_logger.setLevel(logging.WARNING)
29
  logger = logging.getLogger(__name__)
30
+ logging.basicConfig(level=logging.INFO)
31
  st.session_state.weaviate_logger = weaviate_logger
32
  st.session_state.logger = logger
33
  else:
 
36
 
37
 
38
  def runStartup():
39
+ logger.INFO("### Running startup.sh")
40
  result = ""
41
  try:
42
  #result = subprocess.run("/app/startup.sh",shell=False,capture_output=None,text=None,timeout=300)
 
47
  time.sleep(180)
48
  except Exception as e:
49
  emsg = str(e)
50
+ logger.ERROR(f"subprocess.run EXCEPTION. e: {emsg}")
51
  try:
52
  with open("/app/startup.log", "r") as file:
53
  content = file.read()
54
  print(content)
55
  except Exception as e2:
56
  emsg = str(e2)
57
+ logger.ERROR(f"#### Displaying startup.log EXCEPTION. e2: {emsg}")
58
+ logger.INFO("### Running startup.sh complete")
59
  if 'runStartup' not in st.session_state:
60
  st.session_state.runStartup = True
61
  runStartup()
 
65
  ######################################################################
66
  # MAINLINE
67
  #
68
+ logger.INFO("#### MAINLINE ENTERED.")
69
 
70
  # Function to load the CSS file
71
  def load_css(file_name):
72
+ logger.INFO("#### load_css entered.")
73
  with open(file_name) as f:
74
  st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
75
+ logger.INFO("#### load_css exited.")
76
 
77
  # Load the custom CSS
78
  if 'load_css' not in st.session_state:
 
95
  # Connect to the Weaviate vector database.
96
  #if 'client' not in st.session_state:
97
  if 'client' not in st.session_state:
98
+ logger.INFO("#### Create Weaviate db client connection.")
99
  client = weaviate.WeaviateClient(
100
  connection_params=ConnectionParams.from_params(
101
  http_host="localhost",
 
111
  )
112
  client.connect()
113
  st.session_state.client = client
114
+ logger.INFO("#### Create Weaviate db client connection exited.")
115
  else:
116
  client = st.session_state.client
117
 
 
120
  # Read each text input file, parse it into a document,
121
  # chunk it, collect chunks and document name.
122
  if not client.collections.exists("Documents") or not client.collections.exists("Chunks") :
123
+ logger.INFO("#### Read and chunk input text files.")
124
  for filename in os.listdir(pathString):
125
+ logger.DEBUG(filename)
126
  path = Path(pathString + "/" + filename)
127
  filename = filename.rstrip(".html")
128
  webpageDocNames.append(filename)
 
139
  webpageTitles.append(title)
140
  max_tokens = 1000
141
  tokenizer = Tokenizer.from_pretrained("bert-base-uncased")
142
+ logger.DEBUG(f"### tokenizer: {tokenizer}")
143
  splitter = HuggingFaceTextSplitter(tokenizer, trim_chunks=True)
144
  chunksOnePage = splitter.chunks(page_content, chunk_capacity=50)
145
 
146
  chunks = []
147
  for chnk in chunksOnePage:
148
+ logger.DEBUG(f"#### chnk in file: {chnk}")
149
  chunks.append(chnk)
150
+ logger.DEBUG(f"chunks: {chunks}")
151
  webpageChunks.append(chunks)
152
  webpageChunksDocNames.append(filename + "Chunks")
153
 
154
+ logger.DEBUG(f"### filename, title: {filename}, {title}")
155
+ logger.DEBUG(f"### webpageDocNames: {webpageDocNames}")
156
+ logger.INFO("#### Read and chunk input text files exited.")
157
 
158
 
159
 
 
162
  #wpCollection = createWebpageCollection()
163
  #wpChunkCollection = createChunksCollection()
164
  if not client.collections.exists("Documents"):
165
+ logger.INFO("#### createWebpageCollection() entered.")
166
  #client.collections.delete("Documents")
167
  class_obj = {
168
  "class": "Documents",
 
211
  ]
212
  }
213
  wpCollection = client.collections.create_from_dict(class_obj)
214
+ logger.INFO("#### createWebpageCollection() exited.")
215
 
216
 
217
  if not client.collections.exists("Chunks"):
218
+ logger.INFO("#### createChunksCollection() entered.")
219
  #client.collections.delete("Chunks")
220
  class_obj = {
221
  "class": "Chunks",
 
263
  ]
264
  }
265
  wpChunkCollection = client.collections.create_from_dict(class_obj)
266
+ logger.INFO("#### createChunksCollection() exited.")
267
 
268
 
269
  ###########################################################
270
  # Create document and chunks objects in the database.
271
  if not client.collections.exists("Documents") :
272
+ logger.INFO("#### Create page/doc db objects.")
273
  for i, className in enumerate(webpageDocNames):
274
  title = webpageTitles[i]
275
+ logger.DEBUG(f"## className, title: {className}, {title}")
276
  # Create Webpage Object
277
  page_content = page_contentArray[i]
278
  # Insert the document.
 
283
  "content": page_content
284
  }
285
  )
286
+ logger.INFO("#### Create page/doc/db/objects exited.")
287
 
288
  if not client.collections.exists("Chunks") :
289
+ logger.INFO("#### Create chunk db objects.")
290
  # Insert the chunks for the document.
291
  for i2, chunk in enumerate(webpageChunks[i]):
292
  chunk_uuid = wpChunkCollection.data.insert(
 
300
  }
301
  }
302
  )
303
+ logger.INFO("#### Create chunk db objects exited.")
304
 
305
 
306
  #################################################################
307
  # Initialize the LLM.
308
  model_path = "/app/llama-2-7b-chat.Q4_0.gguf"
309
  if 'llm' not in st.session_state:
310
+ logger.INFO("### Initializing LLM.")
311
  llm = Llama(model_path,
312
  #*,
313
  n_gpu_layers=0,
 
349
  verbose=True
350
  )
351
  st.session_state.llm = llm
352
+ logger.INFO("### Initializing LLM exited.")
353
  else:
354
  llm = st.session_state.llm
355
 
356
  def getRagData(promptText):
357
+ logger.INFO("#### getRagData() entered.")
358
  ###############################################################################
359
  # Initial the the sentence transformer and encode the query prompt.
360
+ logger.DEBUG(f"#### Encode text query prompt to create vectors. {text}")
361
  model = SentenceTransformer('/app/multi-qa-MiniLM-L6-cos-v1')
362
 
363
  vector = model.encode(promptText)
364
  vectorList = []
365
 
366
+ logger.DEBUG("#### Print vectors.")
367
  for vec in vector:
368
  vectorList.append(vec)
369
+ logger.DEBUG(f"vectorList: {vectorList[2]}")
370
 
371
  # Fetch chunks and print chunks.
372
+ logger.DEBUG("#### Retrieve semchunks from db using vectors from prompt.")
373
  semChunks = wpChunkCollection.query.near_vector(
374
  near_vector=vectorList,
375
  distance=0.7,
376
  limit=3
377
  )
378
+ logger.DEBUG(f"### semChunks[0]: {semChunks}")
379
 
380
  # Print chunks, corresponding document and document title.
381
  ragData = ""
382
+ logger.DEBUG("#### Print individual retrieved chunks.")
383
  for chunk in enumerate(semChunks.objects):
384
+ logger.INFO(f"#### chunk: {chunk}")
385
  ragData = ragData + "\n" + chunk[0]
386
  webpage_uuid = chunk[1].properties['references']['webpage']
387
+ logger.INFO(f"webpage_uuid: {webpage_uuid}")
388
  wpFromChunk = wpCollection.query.fetch_object_by_id(webpage_uuid)
389
+ logger.INFO(f"### wpFromChunk title: {wpFromChunk.properties['title']}")
390
  #collection = client.collections.get("Chunks")
391
+ logger.INFO("#### getRagData() exited.")
392
  return ragData
393
 
394
 
 
427
 
428
  def runLLM(prompt):
429
  logger = st.session_state.logger
430
+ logger.INFO("### runLLM entered.")
431
 
432
  max_tokens = 1000
433
  temperature = 0.3
 
444
  stop=stop,
445
  )
446
  result = modelOutput["choices"][0]["text"].strip()
447
+ logger.INFO(f"### llmResult: {result}")
448
+ logger.INFO("### runLLM exited.")
449
  return(result)
450
 
451
  def setPrompt(pprompt,ragFlag):
 
461
  else:
462
  userPrompt = pprompt
463
  #prompt = f""" <s> [INST] <<SYS>> {systemTextArea.value} </SYS>> Q: {userPrompt} A: [/INST]"""
464
+ logger.INFO("setPrompt exited.")
465
+ logger.INFO(f"### userPrompt: {userPrompt}")
466
  return userPrompt
467
 
468
 
469
  def on_submitButton_clicked():
470
  logger = st.session_state.logger
471
+ logger.INFO("### on_submitButton_clicked entered.")
 
472
  st.session_state.sysTAtext = st.session_state.sysTA
473
  logger.info(f"sysTAtext: {st.session_state.sysTAtext}")
474
 
 
480
  st.session_state.rspTA = st.session_state.rspTAtext
481
  logger.info(f"rspTAtext: {st.session_state.rspTAtext}")
482
 
483
+ logger.INFO("### on_submitButton_clicked exited.")
484
 
485
 
486
  with st.sidebar:
 
491
  except Exception as e:
492
  try:
493
  emsg = str(e)
494
+ logger.ERROR(f"Program-wide EXCEPTION. e: {emsg}")
495
  with open("/app/startup.log", "r") as file:
496
  content = file.read()
497
+ logger.DEBUG(content)
498
  except Exception as e2:
499
  emsg = str(e2)
500
+ logger.ERROR(f"#### Displaying startup.log EXCEPTION. e2: {emsg}")