File size: 2,408 Bytes
5d4ad83 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import re
import pandas as pd
def prepare_comparison_df(selected_companies, selected_year, metric_key, company_docs):
"""
Prepares a wide-format comparison DataFrame for the selected companies and metric.
"""
rows = {}
for company_id in selected_companies:
doc = next((d for d in company_docs if d["_id"] == company_id), None)
if not doc or "esg_reports" not in doc:
continue
report = doc["esg_reports"].get(selected_year, {})
metric_data = report.get(metric_key, {})
def extract_final_value(val):
if isinstance(val, dict):
numeric = val.get("numeric_value")
unit = val.get("measurement_unit")
if numeric is not None:
return f"{numeric} {unit}".strip() if unit else str(numeric)
return None
def recursively_flatten(data, parent_key=""):
flat = {}
if isinstance(data, dict):
for key, val in data.items():
full_key = f"{parent_key} - {key.replace('_', ' ').title()}" if parent_key else key.replace('_', ' ').title()
if isinstance(val, dict):
extracted = extract_final_value(val)
if extracted is not None:
flat[full_key] = extracted
else:
flat.update(recursively_flatten(val, full_key))
else:
flat[full_key] = str(val) if val is not None else "Not Available"
elif parent_key:
flat[parent_key] = str(data) if data is not None else "Not Available"
return flat
# def extract_readable(data):
# if isinstance(data, dict):
# return {k.replace("_", " ").title(): (str(v) if v is not None else "Not Available") for k, v in data.items()}
# return {metric_key.replace("_", " ").title(): str(data)}
flattened = recursively_flatten(metric_data)
for key, val in flattened.items():
rows.setdefault(key, {})[company_id] = val
if not rows:
return None
df = pd.DataFrame(rows).T
df.index.name = "Metric"
df = df.fillna("Not Available")
return df
|