PDFExtractor / test.py
Vela
Added multiple file upload functionality
22481bd
import os
import pandas as pd
from io import BytesIO
from openpyxl import load_workbook
from application.utils import logger
logger = logger.get_logger()
def export_results_to_excel(results: dict, sheet_name: str, filename: str = "output.xlsx", column: str = None) -> BytesIO:
try:
if not results:
logger.error("Results object is None or empty.")
return None
filename = filename if filename.endswith(".xlsx") else f"{filename}.xlsx"
data = results.get(column, {})
logger.info(f"Exporting data for column '{column}' to {filename}")
if not isinstance(data, dict):
logger.error(f"Expected dictionary for column '{column}', but got {type(data)}")
return None
df = pd.DataFrame(data.items(), columns=[column, "Value"])
df.fillna(0, inplace=True)
os.makedirs("data", exist_ok=True)
physical_path = os.path.join("data", filename)
file_exists = os.path.exists(physical_path)
start_row = 0
start_column = 0
if file_exists:
book = load_workbook(physical_path)
if sheet_name in book.sheetnames:
sheet = book[sheet_name]
start_row = sheet.max_row
start_column = sheet.max_column
else:
start_row = 0
if file_exists:
with pd.ExcelWriter(physical_path, engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer:
df.to_excel(writer, sheet_name=sheet_name, index=False, header=True, startrow=0, startcol=start_column)
else:
with pd.ExcelWriter(physical_path, engine='openpyxl', mode='w') as writer:
df.to_excel(writer, sheet_name=sheet_name, index=False, header=True, startrow=0)
output_stream = BytesIO()
with pd.ExcelWriter(output_stream, engine='openpyxl') as writer:
df.to_excel(writer, sheet_name=sheet_name, index=False)
output_stream.seek(0)
logger.info(f"Data exported to {filename} successfully.")
return output_stream
except Exception as e:
logger.error(f"Error creating Excel export: {e}")
return None
# export_results_to_excel(zalando_data, "Zalando Data","test", "Greenhouse Gas (GHG) Protocol Parameters")