adibak commited on
Commit
1e2c128
·
1 Parent(s): da5eb4d

remove upload zone, allow chat uploads, set & update slider range

Browse files
Files changed (2) hide show
  1. app.py +31 -101
  2. helpers/file_manager.py +7 -1
app.py CHANGED
@@ -13,10 +13,8 @@ import httpx
13
  import huggingface_hub
14
  import json5
15
  import ollama
16
- from pypdf import PdfReader
17
  import requests
18
  import streamlit as st
19
- from streamlit_float import * # for floating UI elements
20
  from dotenv import load_dotenv
21
  from langchain_community.chat_message_histories import StreamlitChatMessageHistory
22
  from langchain_core.messages import HumanMessage
@@ -31,7 +29,6 @@ load_dotenv()
31
 
32
  RUN_IN_OFFLINE_MODE = os.getenv('RUN_IN_OFFLINE_MODE', 'False').lower() == 'true'
33
 
34
- float_init() # Initialize streamlit_float
35
 
36
  @st.cache_data
37
  def _load_strings() -> dict:
@@ -145,6 +142,7 @@ DOWNLOAD_FILE_KEY = 'download_file_name'
145
  IS_IT_REFINEMENT = 'is_it_refinement'
146
  ADDITIONAL_INFO = 'additional_info'
147
 
 
148
  logger = logging.getLogger(__name__)
149
 
150
  texts = list(GlobalConfig.PPTX_TEMPLATE_FILES.keys())
@@ -224,6 +222,11 @@ with st.sidebar:
224
  value='2024-05-01-preview',
225
  )
226
 
 
 
 
 
 
227
  def build_ui():
228
  """
229
  Display the input elements for content generation.
@@ -251,47 +254,12 @@ def build_ui():
251
 
252
  set_up_chat_ui()
253
 
254
- def apply_custom_css():
255
- # Custom CSS so that the file upload area is kind of transparent, remains near the bottom but is
256
- # a little enlarged for ease of use, and the extra things that are normally part of st.file_uploader,
257
- # i.e. the "Drag and Drop File Here" label, the pdf's name and size label, upload icon, and browse files button,
258
- # are hidden. What this CSS does is produce a simple 'zone' that the user can click or drop a file on.
259
- st.markdown(
260
- '''
261
- <style>
262
-
263
- div[data-testid="stFileUploader"]{
264
- position:relative;
265
- opacity:0.5;
266
- width:200%;
267
- height:100px;
268
- left:-105%;
269
- }
270
- section[data-testid="stFileUploaderDropzone"]{
271
- position:absolute;
272
- width:100%;
273
- height:100%;
274
- top:0;
275
- }
276
- div[data-testid="stFileUploaderDropzoneInstructions"]{
277
- display:none;
278
- }
279
- div[data-testid="stFileUploaderFile"]{
280
- display:none;
281
- }
282
- div[data-testid="stFileUploaderFileName"]{
283
- display:none;
284
- }
285
- </style>
286
- ''',
287
- unsafe_allow_html=True
288
- )
289
 
290
  def set_up_chat_ui():
291
  """
292
  Prepare the chat interface and related functionality.
293
  """
294
-
295
  with st.expander('Usage Instructions'):
296
  st.markdown(GlobalConfig.CHAT_USAGE_INSTRUCTIONS)
297
 
@@ -310,63 +278,28 @@ def set_up_chat_ui():
310
  for msg in history.messages:
311
  st.chat_message(msg.type).code(msg.content, language='json')
312
 
313
- # container to hold chat field
314
- prompt_container = st.container()
315
- with prompt_container:
316
- # Chat input below the uploader
317
- prompt = st.chat_input(
318
- placeholder=APP_TEXT['chat_placeholder'],
319
- max_chars=GlobalConfig.LLM_MODEL_MAX_INPUT_LENGTH,
320
- file_type=['pdf', ],
321
- )
322
- # make it stick near bottom
323
- prompt_container.float("bottom:40px;width:50%;z-index:999;font-size:10pt;")
324
-
325
- # some CSS to simplify the look of the upload area
326
- apply_custom_css()
327
-
328
- # container to hold uploader
329
- upload_container = st.container()
330
- with upload_container:
331
- uploaded_pdf = st.file_uploader(
332
- "",
333
- type=["pdf"],
334
- label_visibility="visible",
335
- )
336
-
337
- # PDF Processing and Slider Logic
338
- if uploaded_pdf:
339
- reader = PdfReader(uploaded_pdf)
340
- total_pages = len(reader.pages)
341
- st.session_state["pdf_page_count"] = total_pages
342
-
343
- # Slider for page range
344
- max_slider = min(50, total_pages) # enforce 50 page limit
345
-
346
- with st.sidebar:
347
- # display the pdf's name
348
- st.text(f"PDF Uploaded: {uploaded_pdf.name}")
349
-
350
- st.slider(
351
- label="4: Specify a page range to examine:",
352
- min_value=1,
353
- max_value=max_slider,
354
- value=(1, max_slider),
355
- key="page_range"
356
- )
357
-
358
- # make container stay near bottom too, but surround the chat and have dotted border for the visual cue
359
- upload_container.float("border-style:dashed solid;bottom:10px;width:150%;height:100px;font-size:10pt;left:0;")
360
-
361
- if prompt:
362
- prompt_text = prompt
363
-
364
- # if the user uploaded a pdf and specified a range, get the contents
365
- if uploaded_pdf and "page_range" in st.session_state:
366
- st.session_state[ADDITIONAL_INFO] = filem.get_pdf_contents(
367
- uploaded_pdf,
368
- st.session_state["page_range"]
369
- )
370
 
371
  provider, llm_name = llm_helper.get_provider_model(
372
  llm_provider_to_use,
@@ -654,17 +587,14 @@ def _display_download_button(file_path: pathlib.Path):
654
 
655
  :param file_path: The path of the .pptx file.
656
  """
 
657
  with open(file_path, 'rb') as download_file:
658
- print("entered")
659
- print(f"filepath={file_path}")
660
  st.download_button(
661
  'Download PPTX file ⬇️',
662
  data=download_file,
663
  file_name='Presentation.pptx',
664
  key=datetime.datetime.now()
665
  )
666
-
667
- print("download")
668
 
669
 
670
  def main():
@@ -676,4 +606,4 @@ def main():
676
 
677
 
678
  if __name__ == '__main__':
679
- main()
 
13
  import huggingface_hub
14
  import json5
15
  import ollama
 
16
  import requests
17
  import streamlit as st
 
18
  from dotenv import load_dotenv
19
  from langchain_community.chat_message_histories import StreamlitChatMessageHistory
20
  from langchain_core.messages import HumanMessage
 
29
 
30
  RUN_IN_OFFLINE_MODE = os.getenv('RUN_IN_OFFLINE_MODE', 'False').lower() == 'true'
31
 
 
32
 
33
  @st.cache_data
34
  def _load_strings() -> dict:
 
142
  IS_IT_REFINEMENT = 'is_it_refinement'
143
  ADDITIONAL_INFO = 'additional_info'
144
 
145
+
146
  logger = logging.getLogger(__name__)
147
 
148
  texts = list(GlobalConfig.PPTX_TEMPLATE_FILES.keys())
 
222
  value='2024-05-01-preview',
223
  )
224
 
225
+ page_range_slider = st.slider("7: Specify a page range:",
226
+ 1, 50, [1, 50])
227
+ st.session_state["page_range_slider"] = page_range_slider
228
+
229
+
230
  def build_ui():
231
  """
232
  Display the input elements for content generation.
 
254
 
255
  set_up_chat_ui()
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
  def set_up_chat_ui():
259
  """
260
  Prepare the chat interface and related functionality.
261
  """
262
+ print(f"slider={st.session_state["page_range_slider"][0], st.session_state["page_range_slider"][1]}")
263
  with st.expander('Usage Instructions'):
264
  st.markdown(GlobalConfig.CHAT_USAGE_INSTRUCTIONS)
265
 
 
278
  for msg in history.messages:
279
  st.chat_message(msg.type).code(msg.content, language='json')
280
 
281
+ if prompt := st.chat_input(
282
+ placeholder=APP_TEXT['chat_placeholder'],
283
+ max_chars=GlobalConfig.LLM_MODEL_MAX_INPUT_LENGTH,
284
+ accept_file=True,
285
+ file_type=['pdf', ],
286
+ ):
287
+ prompt_text = prompt.text or ''
288
+ if prompt['files']:
289
+ uploaded_pdf = prompt['files'][0]
290
+ # pdf_length = filem.get_pdf_length(uploaded_pdf)
291
+ # valid_pdf_length = min(50, pdf_length)
292
+
293
+ # st.session_state["page_range_slider"] = list(st.session_state["page_range_slider"])
294
+ # st.session_state["page_range_slider"][1] = valid_pdf_length
295
+ # print(f"length={pdf_length}, validated={valid_pdf_length}={st.session_state["page_range_slider"][-1]}")
296
+
297
+ # print(f"fname={uploaded_pdf.name}")
298
+ # Apparently, Streamlit stores uploaded files in memory and clears on browser close
299
+ # https://docs.streamlit.io/knowledge-base/using-streamlit/where-file-uploader-store-when-deleted
300
+ st.session_state[ADDITIONAL_INFO] = filem.get_pdf_contents(uploaded_pdf,
301
+ st.session_state["page_range_slider"])
302
+ print(f"extracting={st.session_state["page_range_slider"]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
  provider, llm_name = llm_helper.get_provider_model(
305
  llm_provider_to_use,
 
587
 
588
  :param file_path: The path of the .pptx file.
589
  """
590
+
591
  with open(file_path, 'rb') as download_file:
 
 
592
  st.download_button(
593
  'Download PPTX file ⬇️',
594
  data=download_file,
595
  file_name='Presentation.pptx',
596
  key=datetime.datetime.now()
597
  )
 
 
598
 
599
 
600
  def main():
 
606
 
607
 
608
  if __name__ == '__main__':
609
+ main()
helpers/file_manager.py CHANGED
@@ -32,12 +32,18 @@ def get_pdf_contents(
32
  """
33
 
34
  reader = PdfReader(pdf_file)
 
35
 
36
  start, end = page_range # set start and end per the range (user-specified values)
 
 
 
 
 
37
 
38
  text = ''
39
  for page_num in range(start - 1, end):
40
  page = reader.pages[page_num]
41
  text += page.extract_text()
42
 
43
- return text
 
32
  """
33
 
34
  reader = PdfReader(pdf_file)
35
+ n_pages = len(reader.pages)
36
 
37
  start, end = page_range # set start and end per the range (user-specified values)
38
+ start = max(1, start)
39
+ end = min(n_pages, end)
40
+ if start >= end:
41
+ start = 1
42
+ print(f"starting at {start}, ending {end}")
43
 
44
  text = ''
45
  for page_num in range(start - 1, end):
46
  page = reader.pages[page_num]
47
  text += page.extract_text()
48
 
49
+ return text