PDFExtractor / application /services /streamlit_function.py
Vela
Created a PdfExtraction application with basic functionality
f7d4608
raw
history blame
2.95 kB
import streamlit as st
from typing import Union, List
from application.utils import logger
logger = logger.get_logger()
PAGE_TITLE = "PDF Extractor"
PAGE_LAYOUT = "wide"
# PAGE_ICON = "src/frontend/images/page_icon.jpg"
# GITHUB_LINK = "https://github.com/Vela-Test1993/yuvabe-care-companion-ai"
# ABOUT_US = "An AI-powered assistant for personalized healthcare guidance."
def config_homepage(page_title=PAGE_TITLE):
"""
Configures the Streamlit homepage with essential settings.
This function sets up the page title, icon, layout, and sidebar state.
It also defines custom menu items for better navigation.
Args:
page_title (str): The title displayed on the browser tab (default is PAGE_TITLE).
Key Features:
- Ensures `st.set_page_config()` is called only once to avoid errors.
- Uses constants for improved maintainability and consistency.
- Provides links for help, bug reporting, and an 'About' section.
Example:
>>> config_homepage("My Custom App")
"""
if "page_config_set" not in st.session_state:
st.set_page_config(
page_title=page_title,
# page_icon=PAGE_ICON,
layout=PAGE_LAYOUT,
initial_sidebar_state="collapsed",
# menu_items={
# "Get help": GITHUB_LINK,
# "Report a bug": GITHUB_LINK,
# "About": ABOUT_US
# }
)
# st.session_state.page_config_set = True
def upload_file(
file_types: Union[str, List[str]] = "pdf",
label: str = "πŸ“€ Upload a file",
help_text: str = "Upload your file for processing.",
allow_multiple: bool = False,
):
"""
Streamlit file uploader widget with options.
Args:
file_types (str or list): Allowed file type(s), e.g., "pdf" or ["pdf", "docx"].
label (str): Label displayed above the uploader.
help_text (str): Tooltip help text.
allow_multiple (bool): Allow multiple file uploads.
Returns:
Uploaded file(s): A single file object or a list of file objects.
"""
if isinstance(file_types, str):
file_types = [file_types]
uploaded_files = st.file_uploader(
label=label,
type=file_types,
help=help_text,
accept_multiple_files=allow_multiple
)
if st.button("Submit"):
st.session_state.pdf_file = uploaded_files
return uploaded_files
# def extract_text_from_pdf(file) -> str:
# """
# Extracts and returns the full text content from a PDF file.
# :param file: PDF file object (BytesIO or UploadedFile from Streamlit)
# :return: Extracted text as a string
# """
# text = ""
# with fitz.open(stream=file.read(), filetype="pdf") as doc:
# for page in doc:
# text += page.get_text()
# return text.strip()