Spaces:
Sleeping
Sleeping
import streamlit as st | |
from typing import Union, List | |
import pandas as pd | |
from io import BytesIO | |
import json | |
import os | |
from openpyxl import load_workbook | |
from application.utils import logger | |
logger = logger.get_logger() | |
PAGE_TITLE = "PDF Extractor" | |
PAGE_LAYOUT = "wide" | |
# PAGE_ICON = "src/frontend/images/page_icon.jpg" | |
# GITHUB_LINK = "https://github.com/Vela-Test1993/yuvabe-care-companion-ai" | |
# ABOUT_US = "An AI-powered assistant for personalized healthcare guidance." | |
def config_homepage(page_title=PAGE_TITLE): | |
""" | |
Configures the Streamlit homepage with essential settings. | |
This function sets up the page title, icon, layout, and sidebar state. | |
It also defines custom menu items for better navigation. | |
Args: | |
page_title (str): The title displayed on the browser tab (default is PAGE_TITLE). | |
Key Features: | |
- Ensures `st.set_page_config()` is called only once to avoid errors. | |
- Uses constants for improved maintainability and consistency. | |
- Provides links for help, bug reporting, and an 'About' section. | |
Example: | |
>>> config_homepage("My Custom App") | |
""" | |
if "page_config_set" not in st.session_state: | |
st.set_page_config( | |
page_title=page_title, | |
# page_icon=PAGE_ICON, | |
layout=PAGE_LAYOUT, | |
initial_sidebar_state="collapsed", | |
# menu_items={ | |
# "Get help": GITHUB_LINK, | |
# "Report a bug": GITHUB_LINK, | |
# "About": ABOUT_US | |
# } | |
) | |
# st.session_state.page_config_set = True | |
def upload_file( | |
file_types: Union[str, List[str]] = "pdf", | |
label: str = "π€ Upload a file", | |
help_text: str = "Upload your file for processing.", | |
allow_multiple: bool = True, | |
): | |
""" | |
Streamlit file uploader widget with options. | |
Args: | |
file_types (str or list): Allowed file type(s), e.g., "pdf" or ["pdf", "docx"]. | |
label (str): Label displayed above the uploader. | |
help_text (str): Tooltip help text. | |
allow_multiple (bool): Allow multiple file uploads. | |
Returns: | |
Uploaded file(s): A single file object or a list of file objects. | |
""" | |
if isinstance(file_types, str): | |
file_types = [file_types] | |
uploaded_files = st.file_uploader( | |
label=label, | |
type=file_types, | |
help=help_text, | |
accept_multiple_files=allow_multiple | |
) | |
if st.button("Submit"): | |
st.session_state.pdf_file = uploaded_files | |
return uploaded_files | |
def export_results_to_excel(results: dict, sheet_name: str, filename: str = "output.xlsx", column: str = None) -> BytesIO: | |
""" | |
Converts a dictionary result into a formatted Excel file. | |
Appends to a file in the 'data/' folder if it already exists, | |
and returns an in-memory Excel file for download. | |
Args: | |
results (dict): The data to export. | |
sheet_name (str): The sheet name to write to. | |
filename (str): The Excel file name (with or without '.xlsx'). | |
Returns: | |
BytesIO: In-memory Excel file for Streamlit download. | |
""" | |
try: | |
if not results: | |
logger.error("Results object is None or empty.") | |
return None | |
filename = filename if filename.endswith(".xlsx") else f"{filename}.xlsx" | |
data = results.get(column, {}) | |
logger.info(f"Exporting data for column '{column}' to {filename}") | |
if not isinstance(data, dict): | |
logger.error(f"Expected dictionary for column '{column}', but got {type(data)}") | |
return None | |
df = pd.DataFrame(data.items(), columns=[column, "Value"]) | |
df.fillna(0, inplace=True) | |
os.makedirs("data", exist_ok=True) | |
physical_path = os.path.join("data", filename) | |
file_exists = os.path.exists(physical_path) | |
start_row = 0 | |
start_column = 0 | |
if file_exists: | |
book = load_workbook(physical_path) | |
if sheet_name in book.sheetnames: | |
sheet = book[sheet_name] | |
start_row = sheet.max_row | |
start_column = sheet.max_column | |
else: | |
start_row = 0 | |
if file_exists: | |
with pd.ExcelWriter(physical_path, engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer: | |
df.to_excel(writer, sheet_name=sheet_name, index=False, header=True, startrow=0, startcol=start_column) | |
else: | |
with pd.ExcelWriter(physical_path, engine='openpyxl', mode='w') as writer: | |
df.to_excel(writer, sheet_name=sheet_name, index=False, header=True, startrow=0) | |
output_stream = BytesIO() | |
with pd.ExcelWriter(output_stream, engine='openpyxl') as writer: | |
df.to_excel(writer, sheet_name=sheet_name, index=False) | |
output_stream.seek(0) | |
logger.info(f"Data exported to {filename} successfully.") | |
return output_stream | |
except Exception as e: | |
logger.error(f"Error creating Excel export: {e}") | |
return None |