Spaces:

VelaTest
/

PDFExtractor

Sleeping

PDFExtractor / application /services /streamlit_function.py

Vela

modified gemini service module add API call file handling

d1ca23a 3 months ago

4.49 kB

	import streamlit as st
	from typing import Union, List
	import pandas as pd
	from io import BytesIO
	import json
	import os
	from application.utils import logger

	logger = logger.get_logger()

	PAGE_TITLE = "PDF Extractor"
	PAGE_LAYOUT = "wide"
	# PAGE_ICON = "src/frontend/images/page_icon.jpg"
	# GITHUB_LINK = "https://github.com/Vela-Test1993/yuvabe-care-companion-ai"
	# ABOUT_US = "An AI-powered assistant for personalized healthcare guidance."


	def config_homepage(page_title=PAGE_TITLE):
	"""
	Configures the Streamlit homepage with essential settings.

	This function sets up the page title, icon, layout, and sidebar state.
	It also defines custom menu items for better navigation.

	Args:
	page_title (str): The title displayed on the browser tab (default is PAGE_TITLE).

	Key Features:
	- Ensures `st.set_page_config()` is called only once to avoid errors.
	- Uses constants for improved maintainability and consistency.
	- Provides links for help, bug reporting, and an 'About' section.

	Example:
	>>> config_homepage("My Custom App")
	"""
	if "page_config_set" not in st.session_state:
	st.set_page_config(
	page_title=page_title,
	# page_icon=PAGE_ICON,
	layout=PAGE_LAYOUT,
	initial_sidebar_state="collapsed",
	# menu_items={
	# "Get help": GITHUB_LINK,
	# "Report a bug": GITHUB_LINK,
	# "About": ABOUT_US
	# }
	)
	# st.session_state.page_config_set = True

	def upload_file(
	file_types: Union[str, List[str]] = "pdf",
	label: str = "📤 Upload a file",
	help_text: str = "Upload your file for processing.",
	allow_multiple: bool = False,
	):
	"""
	Streamlit file uploader widget with options.

	Args:
	file_types (str or list): Allowed file type(s), e.g., "pdf" or ["pdf", "docx"].
	label (str): Label displayed above the uploader.
	help_text (str): Tooltip help text.
	allow_multiple (bool): Allow multiple file uploads.

	Returns:
	Uploaded file(s): A single file object or a list of file objects.
	"""
	if isinstance(file_types, str):
	file_types = [file_types]

	uploaded_files = st.file_uploader(
	label=label,
	type=file_types,
	help=help_text,
	accept_multiple_files=allow_multiple
	)

	if st.button("Submit"):
	st.session_state.pdf_file = uploaded_files
	return uploaded_files

	def export_results_to_excel(results: dict, sheet_name: str, filename: str = "output.xlsx") -> BytesIO:
	"""
	Converts a dictionary result into a formatted Excel file.
	Appends to a file in the 'data/' folder if it already exists,
	and returns an in-memory Excel file for download.

	Args:
	results (dict): The data to export.
	sheet_name (str): The sheet name to write to.
	filename (str): The Excel file name (with or without '.xlsx').

	Returns:
	BytesIO: In-memory Excel file for Streamlit download.
	"""
	try:
	df = pd.json_normalize(results, sep='_')
	df.replace({None: "", "NULL": ""}, inplace=True)
	except Exception as e:
	df = pd.DataFrame([{"error": f"Could not parse result: {str(e)}"}])

	# Ensure correct file extension and path
	filename = f"{filename}.xlsx" if not filename.endswith(".xlsx") else filename
	full_path = os.path.join("data", filename)

	os.makedirs("data", exist_ok=True) # Ensure the folder exists

	# Save to physical file
	if os.path.exists(full_path):
	with pd.ExcelWriter(full_path, engine="openpyxl", mode="a", if_sheet_exists="overlay") as writer:
	book = writer.book
	if sheet_name in book.sheetnames:
	sheet = book[sheet_name]
	start_row = sheet.max_row
	else:
	start_row = 0
	df.to_excel(writer, sheet_name=sheet_name, index=False, header=start_row == 0, startrow=start_row)
	else:
	df.to_excel(full_path, index=False, engine="openpyxl", sheet_name=sheet_name)

	# Prepare in-memory Excel for download
	output_stream = BytesIO()
	with pd.ExcelWriter(output_stream, engine="openpyxl") as writer:
	df.to_excel(writer, index=False, sheet_name=sheet_name)
	output_stream.seek(0)

	return output_stream