MVPilgrim commited on
Commit
2b6582c
·
1 Parent(s): 0f87433
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -141,20 +141,20 @@ try:
141
  webpageTitles.append(title)
142
  max_tokens = 1000
143
  tokenizer = Tokenizer.from_pretrained("bert-base-uncased")
144
- logger.debug(f"### tokenizer: {tokenizer}")
145
  splitter = HuggingFaceTextSplitter(tokenizer, trim_chunks=True)
146
  chunksOnePage = splitter.chunks(page_content, chunk_capacity=50)
147
 
148
  chunks = []
149
  for chnk in chunksOnePage:
150
- logger.debug(f"#### chnk in file: {chnk}")
151
  chunks.append(chnk)
152
- logger.debug(f"chunks: {chunks}")
153
  webpageChunks.append(chunks)
154
  webpageChunksDocNames.append(filename + "Chunks")
155
 
156
- logger.debug(f"### filename, title: {filename}, {title}")
157
- logger.debug(f"### webpageDocNames: {webpageDocNames}")
158
  logger.info("#### Read and chunk input text files exited.")
159
 
160
 
@@ -281,6 +281,7 @@ try:
281
 
282
  ###########################################################
283
  # Create document and chunks objects in the database.
 
284
  if 'wpCollectionLoaded' not in st.session_state:
285
  logger.info("#### Create page/doc db objects.")
286
  for i, className in enumerate(webpageDocNames):
@@ -289,7 +290,7 @@ try:
289
  # Create Webpage Object
290
  page_content = page_contentArray[i]
291
  # Insert the document.
292
- wpCollectionObj_uuid = wpCollection.data.insert(
293
  {
294
  "name": className,
295
  "title": title,
@@ -311,7 +312,7 @@ try:
311
  "chunk_index": i2,
312
  "references":
313
  {
314
- "webpage": wpCollectionObj_uuid
315
  }
316
  }
317
  )
 
141
  webpageTitles.append(title)
142
  max_tokens = 1000
143
  tokenizer = Tokenizer.from_pretrained("bert-base-uncased")
144
+ logger.info(f"### tokenizer: {tokenizer}")
145
  splitter = HuggingFaceTextSplitter(tokenizer, trim_chunks=True)
146
  chunksOnePage = splitter.chunks(page_content, chunk_capacity=50)
147
 
148
  chunks = []
149
  for chnk in chunksOnePage:
150
+ logger.info(f"#### chnk in file: {chnk}")
151
  chunks.append(chnk)
152
+ logger.info(f"chunks: {chunks}")
153
  webpageChunks.append(chunks)
154
  webpageChunksDocNames.append(filename + "Chunks")
155
 
156
+ logger.info(f"### filename, title: {filename}, {title}")
157
+ logger.info(f"### webpageDocNames: {webpageDocNames}")
158
  logger.info("#### Read and chunk input text files exited.")
159
 
160
 
 
281
 
282
  ###########################################################
283
  # Create document and chunks objects in the database.
284
+ wpCollectionObj_uuid = []
285
  if 'wpCollectionLoaded' not in st.session_state:
286
  logger.info("#### Create page/doc db objects.")
287
  for i, className in enumerate(webpageDocNames):
 
290
  # Create Webpage Object
291
  page_content = page_contentArray[i]
292
  # Insert the document.
293
+ wpCollectionObj_uuid[i] = wpCollection.data.insert(
294
  {
295
  "name": className,
296
  "title": title,
 
312
  "chunk_index": i2,
313
  "references":
314
  {
315
+ "webpage": wpCollectionObj_uuid[i2]
316
  }
317
  }
318
  )