Spaces:
Running
Running
adibak
commited on
Commit
·
1e2c128
1
Parent(s):
da5eb4d
remove upload zone, allow chat uploads, set & update slider range
Browse files- app.py +31 -101
- helpers/file_manager.py +7 -1
app.py
CHANGED
@@ -13,10 +13,8 @@ import httpx
|
|
13 |
import huggingface_hub
|
14 |
import json5
|
15 |
import ollama
|
16 |
-
from pypdf import PdfReader
|
17 |
import requests
|
18 |
import streamlit as st
|
19 |
-
from streamlit_float import * # for floating UI elements
|
20 |
from dotenv import load_dotenv
|
21 |
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
|
22 |
from langchain_core.messages import HumanMessage
|
@@ -31,7 +29,6 @@ load_dotenv()
|
|
31 |
|
32 |
RUN_IN_OFFLINE_MODE = os.getenv('RUN_IN_OFFLINE_MODE', 'False').lower() == 'true'
|
33 |
|
34 |
-
float_init() # Initialize streamlit_float
|
35 |
|
36 |
@st.cache_data
|
37 |
def _load_strings() -> dict:
|
@@ -145,6 +142,7 @@ DOWNLOAD_FILE_KEY = 'download_file_name'
|
|
145 |
IS_IT_REFINEMENT = 'is_it_refinement'
|
146 |
ADDITIONAL_INFO = 'additional_info'
|
147 |
|
|
|
148 |
logger = logging.getLogger(__name__)
|
149 |
|
150 |
texts = list(GlobalConfig.PPTX_TEMPLATE_FILES.keys())
|
@@ -224,6 +222,11 @@ with st.sidebar:
|
|
224 |
value='2024-05-01-preview',
|
225 |
)
|
226 |
|
|
|
|
|
|
|
|
|
|
|
227 |
def build_ui():
|
228 |
"""
|
229 |
Display the input elements for content generation.
|
@@ -251,47 +254,12 @@ def build_ui():
|
|
251 |
|
252 |
set_up_chat_ui()
|
253 |
|
254 |
-
def apply_custom_css():
|
255 |
-
# Custom CSS so that the file upload area is kind of transparent, remains near the bottom but is
|
256 |
-
# a little enlarged for ease of use, and the extra things that are normally part of st.file_uploader,
|
257 |
-
# i.e. the "Drag and Drop File Here" label, the pdf's name and size label, upload icon, and browse files button,
|
258 |
-
# are hidden. What this CSS does is produce a simple 'zone' that the user can click or drop a file on.
|
259 |
-
st.markdown(
|
260 |
-
'''
|
261 |
-
<style>
|
262 |
-
|
263 |
-
div[data-testid="stFileUploader"]{
|
264 |
-
position:relative;
|
265 |
-
opacity:0.5;
|
266 |
-
width:200%;
|
267 |
-
height:100px;
|
268 |
-
left:-105%;
|
269 |
-
}
|
270 |
-
section[data-testid="stFileUploaderDropzone"]{
|
271 |
-
position:absolute;
|
272 |
-
width:100%;
|
273 |
-
height:100%;
|
274 |
-
top:0;
|
275 |
-
}
|
276 |
-
div[data-testid="stFileUploaderDropzoneInstructions"]{
|
277 |
-
display:none;
|
278 |
-
}
|
279 |
-
div[data-testid="stFileUploaderFile"]{
|
280 |
-
display:none;
|
281 |
-
}
|
282 |
-
div[data-testid="stFileUploaderFileName"]{
|
283 |
-
display:none;
|
284 |
-
}
|
285 |
-
</style>
|
286 |
-
''',
|
287 |
-
unsafe_allow_html=True
|
288 |
-
)
|
289 |
|
290 |
def set_up_chat_ui():
|
291 |
"""
|
292 |
Prepare the chat interface and related functionality.
|
293 |
"""
|
294 |
-
|
295 |
with st.expander('Usage Instructions'):
|
296 |
st.markdown(GlobalConfig.CHAT_USAGE_INSTRUCTIONS)
|
297 |
|
@@ -310,63 +278,28 @@ def set_up_chat_ui():
|
|
310 |
for msg in history.messages:
|
311 |
st.chat_message(msg.type).code(msg.content, language='json')
|
312 |
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
)
|
336 |
-
|
337 |
-
# PDF Processing and Slider Logic
|
338 |
-
if uploaded_pdf:
|
339 |
-
reader = PdfReader(uploaded_pdf)
|
340 |
-
total_pages = len(reader.pages)
|
341 |
-
st.session_state["pdf_page_count"] = total_pages
|
342 |
-
|
343 |
-
# Slider for page range
|
344 |
-
max_slider = min(50, total_pages) # enforce 50 page limit
|
345 |
-
|
346 |
-
with st.sidebar:
|
347 |
-
# display the pdf's name
|
348 |
-
st.text(f"PDF Uploaded: {uploaded_pdf.name}")
|
349 |
-
|
350 |
-
st.slider(
|
351 |
-
label="4: Specify a page range to examine:",
|
352 |
-
min_value=1,
|
353 |
-
max_value=max_slider,
|
354 |
-
value=(1, max_slider),
|
355 |
-
key="page_range"
|
356 |
-
)
|
357 |
-
|
358 |
-
# make container stay near bottom too, but surround the chat and have dotted border for the visual cue
|
359 |
-
upload_container.float("border-style:dashed solid;bottom:10px;width:150%;height:100px;font-size:10pt;left:0;")
|
360 |
-
|
361 |
-
if prompt:
|
362 |
-
prompt_text = prompt
|
363 |
-
|
364 |
-
# if the user uploaded a pdf and specified a range, get the contents
|
365 |
-
if uploaded_pdf and "page_range" in st.session_state:
|
366 |
-
st.session_state[ADDITIONAL_INFO] = filem.get_pdf_contents(
|
367 |
-
uploaded_pdf,
|
368 |
-
st.session_state["page_range"]
|
369 |
-
)
|
370 |
|
371 |
provider, llm_name = llm_helper.get_provider_model(
|
372 |
llm_provider_to_use,
|
@@ -654,17 +587,14 @@ def _display_download_button(file_path: pathlib.Path):
|
|
654 |
|
655 |
:param file_path: The path of the .pptx file.
|
656 |
"""
|
|
|
657 |
with open(file_path, 'rb') as download_file:
|
658 |
-
print("entered")
|
659 |
-
print(f"filepath={file_path}")
|
660 |
st.download_button(
|
661 |
'Download PPTX file ⬇️',
|
662 |
data=download_file,
|
663 |
file_name='Presentation.pptx',
|
664 |
key=datetime.datetime.now()
|
665 |
)
|
666 |
-
|
667 |
-
print("download")
|
668 |
|
669 |
|
670 |
def main():
|
@@ -676,4 +606,4 @@ def main():
|
|
676 |
|
677 |
|
678 |
if __name__ == '__main__':
|
679 |
-
main()
|
|
|
13 |
import huggingface_hub
|
14 |
import json5
|
15 |
import ollama
|
|
|
16 |
import requests
|
17 |
import streamlit as st
|
|
|
18 |
from dotenv import load_dotenv
|
19 |
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
|
20 |
from langchain_core.messages import HumanMessage
|
|
|
29 |
|
30 |
RUN_IN_OFFLINE_MODE = os.getenv('RUN_IN_OFFLINE_MODE', 'False').lower() == 'true'
|
31 |
|
|
|
32 |
|
33 |
@st.cache_data
|
34 |
def _load_strings() -> dict:
|
|
|
142 |
IS_IT_REFINEMENT = 'is_it_refinement'
|
143 |
ADDITIONAL_INFO = 'additional_info'
|
144 |
|
145 |
+
|
146 |
logger = logging.getLogger(__name__)
|
147 |
|
148 |
texts = list(GlobalConfig.PPTX_TEMPLATE_FILES.keys())
|
|
|
222 |
value='2024-05-01-preview',
|
223 |
)
|
224 |
|
225 |
+
page_range_slider = st.slider("7: Specify a page range:",
|
226 |
+
1, 50, [1, 50])
|
227 |
+
st.session_state["page_range_slider"] = page_range_slider
|
228 |
+
|
229 |
+
|
230 |
def build_ui():
|
231 |
"""
|
232 |
Display the input elements for content generation.
|
|
|
254 |
|
255 |
set_up_chat_ui()
|
256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
|
258 |
def set_up_chat_ui():
|
259 |
"""
|
260 |
Prepare the chat interface and related functionality.
|
261 |
"""
|
262 |
+
print(f"slider={st.session_state["page_range_slider"][0], st.session_state["page_range_slider"][1]}")
|
263 |
with st.expander('Usage Instructions'):
|
264 |
st.markdown(GlobalConfig.CHAT_USAGE_INSTRUCTIONS)
|
265 |
|
|
|
278 |
for msg in history.messages:
|
279 |
st.chat_message(msg.type).code(msg.content, language='json')
|
280 |
|
281 |
+
if prompt := st.chat_input(
|
282 |
+
placeholder=APP_TEXT['chat_placeholder'],
|
283 |
+
max_chars=GlobalConfig.LLM_MODEL_MAX_INPUT_LENGTH,
|
284 |
+
accept_file=True,
|
285 |
+
file_type=['pdf', ],
|
286 |
+
):
|
287 |
+
prompt_text = prompt.text or ''
|
288 |
+
if prompt['files']:
|
289 |
+
uploaded_pdf = prompt['files'][0]
|
290 |
+
# pdf_length = filem.get_pdf_length(uploaded_pdf)
|
291 |
+
# valid_pdf_length = min(50, pdf_length)
|
292 |
+
|
293 |
+
# st.session_state["page_range_slider"] = list(st.session_state["page_range_slider"])
|
294 |
+
# st.session_state["page_range_slider"][1] = valid_pdf_length
|
295 |
+
# print(f"length={pdf_length}, validated={valid_pdf_length}={st.session_state["page_range_slider"][-1]}")
|
296 |
+
|
297 |
+
# print(f"fname={uploaded_pdf.name}")
|
298 |
+
# Apparently, Streamlit stores uploaded files in memory and clears on browser close
|
299 |
+
# https://docs.streamlit.io/knowledge-base/using-streamlit/where-file-uploader-store-when-deleted
|
300 |
+
st.session_state[ADDITIONAL_INFO] = filem.get_pdf_contents(uploaded_pdf,
|
301 |
+
st.session_state["page_range_slider"])
|
302 |
+
print(f"extracting={st.session_state["page_range_slider"]}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
|
304 |
provider, llm_name = llm_helper.get_provider_model(
|
305 |
llm_provider_to_use,
|
|
|
587 |
|
588 |
:param file_path: The path of the .pptx file.
|
589 |
"""
|
590 |
+
|
591 |
with open(file_path, 'rb') as download_file:
|
|
|
|
|
592 |
st.download_button(
|
593 |
'Download PPTX file ⬇️',
|
594 |
data=download_file,
|
595 |
file_name='Presentation.pptx',
|
596 |
key=datetime.datetime.now()
|
597 |
)
|
|
|
|
|
598 |
|
599 |
|
600 |
def main():
|
|
|
606 |
|
607 |
|
608 |
if __name__ == '__main__':
|
609 |
+
main()
|
helpers/file_manager.py
CHANGED
@@ -32,12 +32,18 @@ def get_pdf_contents(
|
|
32 |
"""
|
33 |
|
34 |
reader = PdfReader(pdf_file)
|
|
|
35 |
|
36 |
start, end = page_range # set start and end per the range (user-specified values)
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
text = ''
|
39 |
for page_num in range(start - 1, end):
|
40 |
page = reader.pages[page_num]
|
41 |
text += page.extract_text()
|
42 |
|
43 |
-
return text
|
|
|
32 |
"""
|
33 |
|
34 |
reader = PdfReader(pdf_file)
|
35 |
+
n_pages = len(reader.pages)
|
36 |
|
37 |
start, end = page_range # set start and end per the range (user-specified values)
|
38 |
+
start = max(1, start)
|
39 |
+
end = min(n_pages, end)
|
40 |
+
if start >= end:
|
41 |
+
start = 1
|
42 |
+
print(f"starting at {start}, ending {end}")
|
43 |
|
44 |
text = ''
|
45 |
for page_num in range(start - 1, end):
|
46 |
page = reader.pages[page_num]
|
47 |
text += page.extract_text()
|
48 |
|
49 |
+
return text
|