PDFExtractor / application /services /streamlit_function.py
Vela
modified gemini service module add API call file handling
d1ca23a
raw
history blame
4.49 kB
import streamlit as st
from typing import Union, List
import pandas as pd
from io import BytesIO
import json
import os
from application.utils import logger
logger = logger.get_logger()
PAGE_TITLE = "PDF Extractor"
PAGE_LAYOUT = "wide"
# PAGE_ICON = "src/frontend/images/page_icon.jpg"
# GITHUB_LINK = "https://github.com/Vela-Test1993/yuvabe-care-companion-ai"
# ABOUT_US = "An AI-powered assistant for personalized healthcare guidance."
def config_homepage(page_title=PAGE_TITLE):
"""
Configures the Streamlit homepage with essential settings.
This function sets up the page title, icon, layout, and sidebar state.
It also defines custom menu items for better navigation.
Args:
page_title (str): The title displayed on the browser tab (default is PAGE_TITLE).
Key Features:
- Ensures `st.set_page_config()` is called only once to avoid errors.
- Uses constants for improved maintainability and consistency.
- Provides links for help, bug reporting, and an 'About' section.
Example:
>>> config_homepage("My Custom App")
"""
if "page_config_set" not in st.session_state:
st.set_page_config(
page_title=page_title,
# page_icon=PAGE_ICON,
layout=PAGE_LAYOUT,
initial_sidebar_state="collapsed",
# menu_items={
# "Get help": GITHUB_LINK,
# "Report a bug": GITHUB_LINK,
# "About": ABOUT_US
# }
)
# st.session_state.page_config_set = True
def upload_file(
file_types: Union[str, List[str]] = "pdf",
label: str = "πŸ“€ Upload a file",
help_text: str = "Upload your file for processing.",
allow_multiple: bool = False,
):
"""
Streamlit file uploader widget with options.
Args:
file_types (str or list): Allowed file type(s), e.g., "pdf" or ["pdf", "docx"].
label (str): Label displayed above the uploader.
help_text (str): Tooltip help text.
allow_multiple (bool): Allow multiple file uploads.
Returns:
Uploaded file(s): A single file object or a list of file objects.
"""
if isinstance(file_types, str):
file_types = [file_types]
uploaded_files = st.file_uploader(
label=label,
type=file_types,
help=help_text,
accept_multiple_files=allow_multiple
)
if st.button("Submit"):
st.session_state.pdf_file = uploaded_files
return uploaded_files
def export_results_to_excel(results: dict, sheet_name: str, filename: str = "output.xlsx") -> BytesIO:
"""
Converts a dictionary result into a formatted Excel file.
Appends to a file in the 'data/' folder if it already exists,
and returns an in-memory Excel file for download.
Args:
results (dict): The data to export.
sheet_name (str): The sheet name to write to.
filename (str): The Excel file name (with or without '.xlsx').
Returns:
BytesIO: In-memory Excel file for Streamlit download.
"""
try:
df = pd.json_normalize(results, sep='_')
df.replace({None: "", "NULL": ""}, inplace=True)
except Exception as e:
df = pd.DataFrame([{"error": f"Could not parse result: {str(e)}"}])
# Ensure correct file extension and path
filename = f"{filename}.xlsx" if not filename.endswith(".xlsx") else filename
full_path = os.path.join("data", filename)
os.makedirs("data", exist_ok=True) # Ensure the folder exists
# Save to physical file
if os.path.exists(full_path):
with pd.ExcelWriter(full_path, engine="openpyxl", mode="a", if_sheet_exists="overlay") as writer:
book = writer.book
if sheet_name in book.sheetnames:
sheet = book[sheet_name]
start_row = sheet.max_row
else:
start_row = 0
df.to_excel(writer, sheet_name=sheet_name, index=False, header=start_row == 0, startrow=start_row)
else:
df.to_excel(full_path, index=False, engine="openpyxl", sheet_name=sheet_name)
# Prepare in-memory Excel for download
output_stream = BytesIO()
with pd.ExcelWriter(output_stream, engine="openpyxl") as writer:
df.to_excel(writer, index=False, sheet_name=sheet_name)
output_stream.seek(0)
return output_stream