adibak commited on
Commit
b4b5aad
·
1 Parent(s): 8982261

added page range slider by modifying get_pdf_content and app.py

Browse files
Files changed (2) hide show
  1. app.py +10 -1
  2. helpers/file_manager.py +11 -4
app.py CHANGED
@@ -222,6 +222,12 @@ with st.sidebar:
222
  value='2024-05-01-preview',
223
  )
224
 
 
 
 
 
 
 
225
 
226
  def build_ui():
227
  """
@@ -284,7 +290,10 @@ def set_up_chat_ui():
284
  if prompt['files']:
285
  # Apparently, Streamlit stores uploaded files in memory and clears on browser close
286
  # https://docs.streamlit.io/knowledge-base/using-streamlit/where-file-uploader-store-when-deleted
287
- st.session_state[ADDITIONAL_INFO] = filem.get_pdf_contents(prompt['files'][0])
 
 
 
288
  print(f'{prompt["files"]=}')
289
 
290
  provider, llm_name = llm_helper.get_provider_model(
 
222
  value='2024-05-01-preview',
223
  )
224
 
225
+ page_range_slider = st.slider(label=('4: Specify a page range to examine:\n\n'
226
+ '(min=1, max=50)'),
227
+ min_value=1, max_value=50,
228
+ value=(1, 50))
229
+ st.session_state['page_range'] = page_range_slider
230
+
231
 
232
  def build_ui():
233
  """
 
290
  if prompt['files']:
291
  # Apparently, Streamlit stores uploaded files in memory and clears on browser close
292
  # https://docs.streamlit.io/knowledge-base/using-streamlit/where-file-uploader-store-when-deleted
293
+ page_range = st.session_state.get('page_range', (1, 50)) # fallback default
294
+ st.session_state[ADDITIONAL_INFO] = filem.get_pdf_contents(
295
+ prompt['files'][0], page_range
296
+ )
297
  print(f'{prompt["files"]=}')
298
 
299
  provider, llm_name = llm_helper.get_provider_model(
helpers/file_manager.py CHANGED
@@ -19,6 +19,7 @@ logger = logging.getLogger(__name__)
19
 
20
  def get_pdf_contents(
21
  pdf_file: st.runtime.uploaded_file_manager.UploadedFile,
 
22
  max_pages: int = GlobalConfig.MAX_PAGE_COUNT
23
  ) -> str:
24
  """
@@ -30,11 +31,17 @@ def get_pdf_contents(
30
  """
31
 
32
  reader = PdfReader(pdf_file)
33
- n_pages = min(max_pages, len(reader.pages))
34
- text = ''
35
 
36
- for page in range(n_pages):
37
- page = reader.pages[page]
 
 
 
 
 
 
 
 
38
  text += page.extract_text()
39
 
40
  return text
 
19
 
20
  def get_pdf_contents(
21
  pdf_file: st.runtime.uploaded_file_manager.UploadedFile,
22
+ page_range: tuple[int, int],
23
  max_pages: int = GlobalConfig.MAX_PAGE_COUNT
24
  ) -> str:
25
  """
 
31
  """
32
 
33
  reader = PdfReader(pdf_file)
 
 
34
 
35
+ total_pages = len(reader.pages)
36
+ n_pages = min(max_pages, total_pages)
37
+
38
+ start, end = page_range
39
+ start = max(1, start)
40
+ end = min(n_pages, end)
41
+
42
+ text = ''
43
+ for page_num in range(start - 1, end):
44
+ page = reader.pages[page_num]
45
  text += page.extract_text()
46
 
47
  return text