File size: 4,489 Bytes
f7d4608
 
d1ca23a
 
 
 
f7d4608
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1ca23a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import streamlit as st
from typing import Union, List
import pandas as pd
from io import BytesIO
import json
import os
from application.utils import logger

logger = logger.get_logger()

PAGE_TITLE = "PDF Extractor"
PAGE_LAYOUT = "wide"
# PAGE_ICON = "src/frontend/images/page_icon.jpg"
# GITHUB_LINK = "https://github.com/Vela-Test1993/yuvabe-care-companion-ai"
# ABOUT_US = "An AI-powered assistant for personalized healthcare guidance."


def config_homepage(page_title=PAGE_TITLE):
    """

    Configures the Streamlit homepage with essential settings.



    This function sets up the page title, icon, layout, and sidebar state.

    It also defines custom menu items for better navigation.



    Args:

        page_title (str): The title displayed on the browser tab (default is PAGE_TITLE).



    Key Features:

    - Ensures `st.set_page_config()` is called only once to avoid errors.

    - Uses constants for improved maintainability and consistency.

    - Provides links for help, bug reporting, and an 'About' section.



    Example:

        >>> config_homepage("My Custom App")

    """
    if "page_config_set" not in st.session_state:
        st.set_page_config(
            page_title=page_title,
            # page_icon=PAGE_ICON,
            layout=PAGE_LAYOUT,
            initial_sidebar_state="collapsed",
            # menu_items={
            #     "Get help": GITHUB_LINK,
            #     "Report a bug": GITHUB_LINK,
            #     "About": ABOUT_US
            # }
        )
        # st.session_state.page_config_set = True

def upload_file(

    file_types: Union[str, List[str]] = "pdf",

    label: str = "📤 Upload a file",

    help_text: str = "Upload your file for processing.",

    allow_multiple: bool = False,

):
    """

    Streamlit file uploader widget with options.



    Args:

        file_types (str or list): Allowed file type(s), e.g., "pdf" or ["pdf", "docx"].

        label (str): Label displayed above the uploader.

        help_text (str): Tooltip help text.

        allow_multiple (bool): Allow multiple file uploads.



    Returns:

        Uploaded file(s): A single file object or a list of file objects.

    """
    if isinstance(file_types, str):
        file_types = [file_types]

    uploaded_files = st.file_uploader(
        label=label,
        type=file_types,
        help=help_text,
        accept_multiple_files=allow_multiple
    )

    if st.button("Submit"):
        st.session_state.pdf_file = uploaded_files
        return uploaded_files

def export_results_to_excel(results: dict, sheet_name: str, filename: str = "output.xlsx") -> BytesIO:
    """

    Converts a dictionary result into a formatted Excel file.

    Appends to a file in the 'data/' folder if it already exists,

    and returns an in-memory Excel file for download.



    Args:

        results (dict): The data to export.

        sheet_name (str): The sheet name to write to.

        filename (str): The Excel file name (with or without '.xlsx').



    Returns:

        BytesIO: In-memory Excel file for Streamlit download.

    """
    try:
        df = pd.json_normalize(results, sep='_')
        df.replace({None: "", "NULL": ""}, inplace=True)
    except Exception as e:
        df = pd.DataFrame([{"error": f"Could not parse result: {str(e)}"}])

    # Ensure correct file extension and path
    filename = f"{filename}.xlsx" if not filename.endswith(".xlsx") else filename
    full_path = os.path.join("data", filename)

    os.makedirs("data", exist_ok=True)  # Ensure the folder exists

    # Save to physical file
    if os.path.exists(full_path):
        with pd.ExcelWriter(full_path, engine="openpyxl", mode="a", if_sheet_exists="overlay") as writer:
            book = writer.book
            if sheet_name in book.sheetnames:
                sheet = book[sheet_name]
                start_row = sheet.max_row
            else:
                start_row = 0
            df.to_excel(writer, sheet_name=sheet_name, index=False, header=start_row == 0, startrow=start_row)
    else:
        df.to_excel(full_path, index=False, engine="openpyxl", sheet_name=sheet_name)

    # Prepare in-memory Excel for download
    output_stream = BytesIO()
    with pd.ExcelWriter(output_stream, engine="openpyxl") as writer:
        df.to_excel(writer, index=False, sheet_name=sheet_name)
    output_stream.seek(0)

    return output_stream