import re import pandas as pd def prepare_comparison_df(selected_companies, selected_year, metric_key, company_docs): """ Prepares a wide-format comparison DataFrame for the selected companies and metric. """ rows = {} for company_id in selected_companies: doc = next((d for d in company_docs if d["_id"] == company_id), None) if not doc or "esg_reports" not in doc: continue report = doc["esg_reports"].get(selected_year, {}) metric_data = report.get(metric_key, {}) def extract_final_value(val): if isinstance(val, dict): numeric = val.get("numeric_value") unit = val.get("measurement_unit") if numeric is not None: return f"{numeric} {unit}".strip() if unit else str(numeric) return None def recursively_flatten(data, parent_key=""): flat = {} if isinstance(data, dict): for key, val in data.items(): full_key = f"{parent_key} - {key.replace('_', ' ').title()}" if parent_key else key.replace('_', ' ').title() if isinstance(val, dict): extracted = extract_final_value(val) if extracted is not None: flat[full_key] = extracted else: flat.update(recursively_flatten(val, full_key)) else: flat[full_key] = str(val) if val is not None else "Not Available" elif parent_key: flat[parent_key] = str(data) if data is not None else "Not Available" return flat # def extract_readable(data): # if isinstance(data, dict): # return {k.replace("_", " ").title(): (str(v) if v is not None else "Not Available") for k, v in data.items()} # return {metric_key.replace("_", " ").title(): str(data)} flattened = recursively_flatten(metric_data) for key, val in flattened.items(): rows.setdefault(key, {})[company_id] = val if not rows: return None df = pd.DataFrame(rows).T df.index.name = "Metric" df = df.fillna("Not Available") return df