sss / src /app_utils.py
reddgr's picture
company tab v0.1
2c93b94
raw
history blame
4.99 kB
import pandas as pd
from typing import Tuple
import re
_NEG_COLOR = "red"
def format_large_number(n, decimals=2):
if n >= 1e12:
return f'{n / 1e12:.{decimals}f} T'
elif n >= 1e9:
return f'{n / 1e9:.{decimals}f} B'
elif n >= 1e6:
return f'{n / 1e6:.{decimals}f} M'
else:
return str(n)
def format_results(df: pd.DataFrame, rename_columns: dict) -> pd.DataFrame:
# Índice 100
if "ind_sust" in df.columns:
df["ind_sust"] = df["ind_sust"].apply(lambda x: "-" if pd.isna(x) else int(round(x * 100, 0)))
# 1 decimal
for col in ["trailingPE", "beta"]:
if col in df.columns:
df[col] = df[col].apply(lambda x: "-" if pd.isna(x) else f"{x:.1f}")
# 2 decimales
if "Search dist." in df.columns:
df["Search dist."] = df["Search dist."].apply(lambda n: "-" if pd.isna(n) else f"{n:.2f}")
# Cantidades monetarias grandes
if "marketCap" in df.columns:
df["marketCap"] = df["marketCap"].apply(lambda n: "-" if pd.isna(n) else format_large_number(n, 1))
# Porcentajes 1 decimal
for col in ["ret_365", "revenueGrowth"]:
if col in df.columns:
df[col] = df[col].apply(lambda x: "-" if pd.isna(x) or x == 0 else f"{(x * 100):.1f}%")
# Porcentajes 1 decimal (porcentaje numérico en fuente)
for col in ["dividendYield"]:
if col in df.columns:
df[col] = df[col].apply(lambda x: "-" if pd.isna(x) else f"{round(x, 1)}%")
# Volatilidad
if "vol_365" in df.columns:
df["vol_365"] = df["vol_365"].apply(lambda x: "-" if pd.isna(x) or x == 0 else f"{x:.4f}")
# Devolvemos el dataframe con los nombres de columnas renombrados
return df.rename(columns=rename_columns)
def random_ticker(df: pd.DataFrame) -> str:
return df["ticker"].sample(n=1).values[0]
def styler_negative_red(df: pd.DataFrame, cols: list[str] | None = None):
"""
Returns a Styler that paints negative numeric values in *cols*.
Columns absent in *df* are ignored.
"""
cols = [c for c in (cols or df.columns) if c in df.columns]
def _style(v):
try:
num = float(re.sub(r"[ %,TMB]", "", str(v)))
if num < 0:
return f"color:{_NEG_COLOR}"
except ValueError:
pass
return ""
return df.style.applymap(_style, subset=cols)
def get_company_info(
maestro: pd.DataFrame,
ticker: str,
rename_columns: dict
) -> Tuple[str, str, pd.DataFrame]:
"""
Returns the company name, longBusinessSummary, and a DataFrame
of all other fields for the given ticker.
"""
company = maestro[maestro["ticker"] == ticker]
if company.empty:
return ticker, "No data available.", pd.DataFrame()
# extract name & summary
name = company["security"].iloc[0] if "security" in company.columns else ticker
summary = company["longBusinessSummary"].iloc[0] if "longBusinessSummary" in company.columns else ""
# build details table
details = company.drop(columns=["longBusinessSummary"], errors="ignore").iloc[0]
df = pd.DataFrame({
"Field": details.index.tolist(),
"Value": details.values.tolist()
})
df["Field"] = df["Field"].map(lambda c: rename_columns.get(c, c))
# Round _norm fields to 3 decimal places
for i, field in enumerate(df["Field"]):
if field.endswith("_norm"):
value = df.iloc[i]["Value"]
if isinstance(value, (int, float)) and not pd.isna(value):
df.iloc[i, df.columns.get_loc("Value")] = round(value, 3)
# Process numeric fields using format_results function
# Extract numeric fields (excluding already processed _norm fields)
numeric_fields = []
numeric_values = []
numeric_indices = []
for i, (display_field, value) in enumerate(zip(df["Field"], df["Value"])):
if not display_field.endswith("_norm") and isinstance(value, (int, float)) and not pd.isna(value):
# Get original field name using inverse rename dictionary
orig_field = next((k for k, v in rename_columns.items() if v == display_field), display_field)
numeric_fields.append(orig_field)
numeric_values.append(value)
numeric_indices.append(i)
if numeric_fields:
# Create a single-row dataframe with original field names
temp_df = pd.DataFrame([numeric_values], columns=numeric_fields)
# Apply format_results function
formatted_df = format_results(temp_df, rename_columns)
# Put formatted values back into the original dataframe
for i, field in zip(numeric_indices, numeric_fields):
display_field = rename_columns.get(field, field)
df.iloc[i, df.columns.get_loc("Value")] = formatted_df.iloc[0][display_field]
return name, summary, df