Spaces:
Sleeping
Sleeping
File size: 4,489 Bytes
f7d4608 d1ca23a f7d4608 d1ca23a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import streamlit as st
from typing import Union, List
import pandas as pd
from io import BytesIO
import json
import os
from application.utils import logger
logger = logger.get_logger()
PAGE_TITLE = "PDF Extractor"
PAGE_LAYOUT = "wide"
# PAGE_ICON = "src/frontend/images/page_icon.jpg"
# GITHUB_LINK = "https://github.com/Vela-Test1993/yuvabe-care-companion-ai"
# ABOUT_US = "An AI-powered assistant for personalized healthcare guidance."
def config_homepage(page_title=PAGE_TITLE):
"""
Configures the Streamlit homepage with essential settings.
This function sets up the page title, icon, layout, and sidebar state.
It also defines custom menu items for better navigation.
Args:
page_title (str): The title displayed on the browser tab (default is PAGE_TITLE).
Key Features:
- Ensures `st.set_page_config()` is called only once to avoid errors.
- Uses constants for improved maintainability and consistency.
- Provides links for help, bug reporting, and an 'About' section.
Example:
>>> config_homepage("My Custom App")
"""
if "page_config_set" not in st.session_state:
st.set_page_config(
page_title=page_title,
# page_icon=PAGE_ICON,
layout=PAGE_LAYOUT,
initial_sidebar_state="collapsed",
# menu_items={
# "Get help": GITHUB_LINK,
# "Report a bug": GITHUB_LINK,
# "About": ABOUT_US
# }
)
# st.session_state.page_config_set = True
def upload_file(
file_types: Union[str, List[str]] = "pdf",
label: str = "📤 Upload a file",
help_text: str = "Upload your file for processing.",
allow_multiple: bool = False,
):
"""
Streamlit file uploader widget with options.
Args:
file_types (str or list): Allowed file type(s), e.g., "pdf" or ["pdf", "docx"].
label (str): Label displayed above the uploader.
help_text (str): Tooltip help text.
allow_multiple (bool): Allow multiple file uploads.
Returns:
Uploaded file(s): A single file object or a list of file objects.
"""
if isinstance(file_types, str):
file_types = [file_types]
uploaded_files = st.file_uploader(
label=label,
type=file_types,
help=help_text,
accept_multiple_files=allow_multiple
)
if st.button("Submit"):
st.session_state.pdf_file = uploaded_files
return uploaded_files
def export_results_to_excel(results: dict, sheet_name: str, filename: str = "output.xlsx") -> BytesIO:
"""
Converts a dictionary result into a formatted Excel file.
Appends to a file in the 'data/' folder if it already exists,
and returns an in-memory Excel file for download.
Args:
results (dict): The data to export.
sheet_name (str): The sheet name to write to.
filename (str): The Excel file name (with or without '.xlsx').
Returns:
BytesIO: In-memory Excel file for Streamlit download.
"""
try:
df = pd.json_normalize(results, sep='_')
df.replace({None: "", "NULL": ""}, inplace=True)
except Exception as e:
df = pd.DataFrame([{"error": f"Could not parse result: {str(e)}"}])
# Ensure correct file extension and path
filename = f"{filename}.xlsx" if not filename.endswith(".xlsx") else filename
full_path = os.path.join("data", filename)
os.makedirs("data", exist_ok=True) # Ensure the folder exists
# Save to physical file
if os.path.exists(full_path):
with pd.ExcelWriter(full_path, engine="openpyxl", mode="a", if_sheet_exists="overlay") as writer:
book = writer.book
if sheet_name in book.sheetnames:
sheet = book[sheet_name]
start_row = sheet.max_row
else:
start_row = 0
df.to_excel(writer, sheet_name=sheet_name, index=False, header=start_row == 0, startrow=start_row)
else:
df.to_excel(full_path, index=False, engine="openpyxl", sheet_name=sheet_name)
# Prepare in-memory Excel for download
output_stream = BytesIO()
with pd.ExcelWriter(output_stream, engine="openpyxl") as writer:
df.to_excel(writer, index=False, sheet_name=sheet_name)
output_stream.seek(0)
return output_stream
|