|
import re
|
|
import pandas as pd
|
|
|
|
def prepare_comparison_df(selected_companies, selected_year, metric_key, company_docs):
|
|
"""
|
|
Prepares a wide-format comparison DataFrame for the selected companies and metric.
|
|
"""
|
|
rows = {}
|
|
for company_id in selected_companies:
|
|
doc = next((d for d in company_docs if d["_id"] == company_id), None)
|
|
if not doc or "esg_reports" not in doc:
|
|
continue
|
|
|
|
report = doc["esg_reports"].get(selected_year, {})
|
|
metric_data = report.get(metric_key, {})
|
|
|
|
def extract_final_value(val):
|
|
if isinstance(val, dict):
|
|
numeric = val.get("numeric_value")
|
|
unit = val.get("measurement_unit")
|
|
if numeric is not None:
|
|
return f"{numeric} {unit}".strip() if unit else str(numeric)
|
|
return None
|
|
|
|
def recursively_flatten(data, parent_key=""):
|
|
flat = {}
|
|
|
|
if isinstance(data, dict):
|
|
for key, val in data.items():
|
|
full_key = f"{parent_key} - {key.replace('_', ' ').title()}" if parent_key else key.replace('_', ' ').title()
|
|
|
|
if isinstance(val, dict):
|
|
extracted = extract_final_value(val)
|
|
if extracted is not None:
|
|
flat[full_key] = extracted
|
|
else:
|
|
flat.update(recursively_flatten(val, full_key))
|
|
else:
|
|
flat[full_key] = str(val) if val is not None else "Not Available"
|
|
elif parent_key:
|
|
flat[parent_key] = str(data) if data is not None else "Not Available"
|
|
|
|
return flat
|
|
|
|
|
|
|
|
|
|
|
|
|
|
flattened = recursively_flatten(metric_data)
|
|
|
|
for key, val in flattened.items():
|
|
rows.setdefault(key, {})[company_id] = val
|
|
|
|
if not rows:
|
|
return None
|
|
|
|
df = pd.DataFrame(rows).T
|
|
df.index.name = "Metric"
|
|
df = df.fillna("Not Available")
|
|
return df
|
|
|