import streamlit as st import pandas as pd from src.utils import streamlit_function from src.utils.logger import get_logger from src.services.mongo_db_service import retrieve_documents from src.utils.common_functions import prepare_comparison_df logger = get_logger() streamlit_function.config_homepage() st.title("📊 ESG Report Comparison Dashboard") METRIC_OPTIONS = { "Report Metadata": ["report_metadata"], "Environmental Parameters": [ "Emissions", "Energy Consumption", "Water Withdrawal", "Water Discharge", "Waste Generation", "Waste Disposal", "Waste Recovery" ], "Social Parameters": [ "Human Rights Training Coverage", "LTIFR", "Other Safety Incidents", "Health & Safety Training Coverage", "Grievances Reported", "Third-party Assessment Coverage", "CSR Beneficiaries", "Female Wage Share", "Wages by Location", "Well-being Cost", "Worker Well-being Coverage", "Employee Well-being Coverage", "Turnover Count", "Workforce Gender Diversity" ], "Governance Parameters": [ "Non-compliance Instances", "Disciplinary Actions", "Consumer Complaints", "Customer Data Breaches", "Governance Diversity", "Purchase Concentration", "Sales Concentration", "Related Party Transactions" ], "Materiality": ["material_topics"] } ESG_EXTRACTOR_COLLECTION = "esg_report_extracts" company_docs = retrieve_documents(collection_name=ESG_EXTRACTOR_COLLECTION) available_company_data = [doc["_id"] for doc in company_docs if "_id" in doc] selected_companies = st.multiselect( "Select up to 3 companies", options=available_company_data, max_selections=3 ) def get_all_years(docs) -> list: years = set() for doc in docs: if "esg_reports" in doc and isinstance(doc["esg_reports"], dict): years.update(doc["esg_reports"].keys()) return sorted(years, reverse=True) def highlight_missing_values(df): return df.style.map(lambda v: "background-color: #ffe6e6" if pd.isna(v) or str(v).strip() in ["", "nan", "None", "Not Available","N/A"] else "background-color: #e6ffe6") def extract_company_name_from_doc(doc, default_name): return doc.get("report_metadata", {}).get("company_legal_name", default_name) if selected_companies: all_years = get_all_years(company_docs) selected_year = st.selectbox( "Select a report year (applies to all selected companies)", options=["-- Select Year --"] + all_years, key="common_year" ) if selected_year != "-- Select Year --": tabs = st.tabs(list(METRIC_OPTIONS.keys())) metric_categories = list(METRIC_OPTIONS.keys()) for i, tab in enumerate(tabs): with tab: st.subheader(metric_categories[i]) metric_keys = METRIC_OPTIONS[metric_categories[i]] for metric in metric_keys: st.markdown(f"### {metric}") comparison_df = prepare_comparison_df( selected_companies, selected_year, metric, company_docs ) if comparison_df is not None: st.dataframe(highlight_missing_values(comparison_df), use_container_width=True) else: st.warning(f"No data found for **{metric}** in {selected_year}") else: st.info("Please select a year to view report comparisons.") else: st.info("Please select at least one company to continue.")