Spaces:

VelaTest
/

PDFExtractor

Sleeping

File size: 5,201 Bytes

import streamlit as st
from typing import Union, List
import pandas as pd
from io import BytesIO
import json
import os
from openpyxl import load_workbook
from application.utils import logger

logger = logger.get_logger()

PAGE_TITLE = "PDF Extractor"
PAGE_LAYOUT = "wide"
# PAGE_ICON = "src/frontend/images/page_icon.jpg"
# GITHUB_LINK = "https://github.com/Vela-Test1993/yuvabe-care-companion-ai"
# ABOUT_US = "An AI-powered assistant for personalized healthcare guidance."


def config_homepage(page_title=PAGE_TITLE):
    """

    Configures the Streamlit homepage with essential settings.



    This function sets up the page title, icon, layout, and sidebar state.

    It also defines custom menu items for better navigation.



    Args:

        page_title (str): The title displayed on the browser tab (default is PAGE_TITLE).



    Key Features:

    - Ensures `st.set_page_config()` is called only once to avoid errors.

    - Uses constants for improved maintainability and consistency.

    - Provides links for help, bug reporting, and an 'About' section.



    Example:

        >>> config_homepage("My Custom App")

    """
    if "page_config_set" not in st.session_state:
        st.set_page_config(
            page_title=page_title,
            # page_icon=PAGE_ICON,
            layout=PAGE_LAYOUT,
            initial_sidebar_state="collapsed",
            # menu_items={
            #     "Get help": GITHUB_LINK,
            #     "Report a bug": GITHUB_LINK,
            #     "About": ABOUT_US
            # }
        )
        # st.session_state.page_config_set = True

def upload_file(

    file_types: Union[str, List[str]] = "pdf",

    label: str = "📤 Upload a file",

    help_text: str = "Upload your file for processing.",

    allow_multiple: bool = True,

):
    """

    Streamlit file uploader widget with options.



    Args:

        file_types (str or list): Allowed file type(s), e.g., "pdf" or ["pdf", "docx"].

        label (str): Label displayed above the uploader.

        help_text (str): Tooltip help text.

        allow_multiple (bool): Allow multiple file uploads.



    Returns:

        Uploaded file(s): A single file object or a list of file objects.

    """
    if isinstance(file_types, str):
        file_types = [file_types]

    uploaded_files = st.file_uploader(
        label=label,
        type=file_types,
        help=help_text,
        accept_multiple_files=allow_multiple
    )

    if st.button("Submit"):
        st.session_state.pdf_file = uploaded_files
        return uploaded_files
    
def export_results_to_excel(results: dict, sheet_name: str, filename: str = "output.xlsx", column: str = None) -> BytesIO:

    """

    Converts a dictionary result into a formatted Excel file.

    Appends to a file in the 'data/' folder if it already exists,

    and returns an in-memory Excel file for download.



    Args:

        results (dict): The data to export.

        sheet_name (str): The sheet name to write to.

        filename (str): The Excel file name (with or without '.xlsx').



    Returns:

        BytesIO: In-memory Excel file for Streamlit download.

    """
    try:
        if not results:
            logger.error("Results object is None or empty.")
            return None

        filename = filename if filename.endswith(".xlsx") else f"{filename}.xlsx"
        data = results.get(column, {})

        logger.info(f"Exporting data for column '{column}' to {filename}")

        if not isinstance(data, dict):
            logger.error(f"Expected dictionary for column '{column}', but got {type(data)}")
            return None

        df = pd.DataFrame(data.items(), columns=[column, "Value"])
        df.fillna(0, inplace=True)

        os.makedirs("data", exist_ok=True)
        physical_path = os.path.join("data", filename)

        file_exists = os.path.exists(physical_path)
        start_row = 0
        start_column = 0

        if file_exists:
            book = load_workbook(physical_path)
            if sheet_name in book.sheetnames:
                sheet = book[sheet_name]
                start_row = sheet.max_row
                start_column = sheet.max_column
            else:
                start_row = 0

        if file_exists:
            with pd.ExcelWriter(physical_path, engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer:
                df.to_excel(writer, sheet_name=sheet_name, index=False, header=True, startrow=0, startcol=start_column)
        else:
            with pd.ExcelWriter(physical_path, engine='openpyxl', mode='w') as writer:
                df.to_excel(writer, sheet_name=sheet_name, index=False, header=True, startrow=0)

        output_stream = BytesIO()
        with pd.ExcelWriter(output_stream, engine='openpyxl') as writer:
            df.to_excel(writer, sheet_name=sheet_name, index=False)

        output_stream.seek(0)
        logger.info(f"Data exported to {filename} successfully.")
        return output_stream

    except Exception as e:
        logger.error(f"Error creating Excel export: {e}")
        return None