Spaces:
Sleeping
Sleeping
File size: 5,201 Bytes
f7d4608 d1ca23a 22481bd f7d4608 22481bd f7d4608 22481bd f7d4608 d1ca23a 22481bd d1ca23a 22481bd d1ca23a 22481bd d1ca23a 22481bd d1ca23a 22481bd d1ca23a 22481bd d1ca23a 22481bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import streamlit as st
from typing import Union, List
import pandas as pd
from io import BytesIO
import json
import os
from openpyxl import load_workbook
from application.utils import logger
logger = logger.get_logger()
PAGE_TITLE = "PDF Extractor"
PAGE_LAYOUT = "wide"
# PAGE_ICON = "src/frontend/images/page_icon.jpg"
# GITHUB_LINK = "https://github.com/Vela-Test1993/yuvabe-care-companion-ai"
# ABOUT_US = "An AI-powered assistant for personalized healthcare guidance."
def config_homepage(page_title=PAGE_TITLE):
"""
Configures the Streamlit homepage with essential settings.
This function sets up the page title, icon, layout, and sidebar state.
It also defines custom menu items for better navigation.
Args:
page_title (str): The title displayed on the browser tab (default is PAGE_TITLE).
Key Features:
- Ensures `st.set_page_config()` is called only once to avoid errors.
- Uses constants for improved maintainability and consistency.
- Provides links for help, bug reporting, and an 'About' section.
Example:
>>> config_homepage("My Custom App")
"""
if "page_config_set" not in st.session_state:
st.set_page_config(
page_title=page_title,
# page_icon=PAGE_ICON,
layout=PAGE_LAYOUT,
initial_sidebar_state="collapsed",
# menu_items={
# "Get help": GITHUB_LINK,
# "Report a bug": GITHUB_LINK,
# "About": ABOUT_US
# }
)
# st.session_state.page_config_set = True
def upload_file(
file_types: Union[str, List[str]] = "pdf",
label: str = "📤 Upload a file",
help_text: str = "Upload your file for processing.",
allow_multiple: bool = True,
):
"""
Streamlit file uploader widget with options.
Args:
file_types (str or list): Allowed file type(s), e.g., "pdf" or ["pdf", "docx"].
label (str): Label displayed above the uploader.
help_text (str): Tooltip help text.
allow_multiple (bool): Allow multiple file uploads.
Returns:
Uploaded file(s): A single file object or a list of file objects.
"""
if isinstance(file_types, str):
file_types = [file_types]
uploaded_files = st.file_uploader(
label=label,
type=file_types,
help=help_text,
accept_multiple_files=allow_multiple
)
if st.button("Submit"):
st.session_state.pdf_file = uploaded_files
return uploaded_files
def export_results_to_excel(results: dict, sheet_name: str, filename: str = "output.xlsx", column: str = None) -> BytesIO:
"""
Converts a dictionary result into a formatted Excel file.
Appends to a file in the 'data/' folder if it already exists,
and returns an in-memory Excel file for download.
Args:
results (dict): The data to export.
sheet_name (str): The sheet name to write to.
filename (str): The Excel file name (with or without '.xlsx').
Returns:
BytesIO: In-memory Excel file for Streamlit download.
"""
try:
if not results:
logger.error("Results object is None or empty.")
return None
filename = filename if filename.endswith(".xlsx") else f"{filename}.xlsx"
data = results.get(column, {})
logger.info(f"Exporting data for column '{column}' to {filename}")
if not isinstance(data, dict):
logger.error(f"Expected dictionary for column '{column}', but got {type(data)}")
return None
df = pd.DataFrame(data.items(), columns=[column, "Value"])
df.fillna(0, inplace=True)
os.makedirs("data", exist_ok=True)
physical_path = os.path.join("data", filename)
file_exists = os.path.exists(physical_path)
start_row = 0
start_column = 0
if file_exists:
book = load_workbook(physical_path)
if sheet_name in book.sheetnames:
sheet = book[sheet_name]
start_row = sheet.max_row
start_column = sheet.max_column
else:
start_row = 0
if file_exists:
with pd.ExcelWriter(physical_path, engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer:
df.to_excel(writer, sheet_name=sheet_name, index=False, header=True, startrow=0, startcol=start_column)
else:
with pd.ExcelWriter(physical_path, engine='openpyxl', mode='w') as writer:
df.to_excel(writer, sheet_name=sheet_name, index=False, header=True, startrow=0)
output_stream = BytesIO()
with pd.ExcelWriter(output_stream, engine='openpyxl') as writer:
df.to_excel(writer, sheet_name=sheet_name, index=False)
output_stream.seek(0)
logger.info(f"Data exported to {filename} successfully.")
return output_stream
except Exception as e:
logger.error(f"Error creating Excel export: {e}")
return None |