EduardoPacheco's picture
Utilities to run app
f2b8171
raw
history blame
4.7 kB
from __future__ import annotations
import re
from typing import Optional, Union
import numpy as np
import plotly.graph_objects as go
from sklearn.ensemble import GradientBoostingRegressor
class DataGenerator:
def __init__(self, formula_str: str, x_range: list, n_samples: int, seed: int) -> None:
self.formula_str = formula_str
self.x_range = x_range
self.n_samples = n_samples
self.seed = seed
self.rng = np.random.RandomState(seed)
@property
def X(self) -> np.array:
self.rng = np.random.RandomState(42)
X = np.atleast_2d(self.rng.uniform(*self.x_range, size=self.n_samples)).T
return X
@property
def y_raw(self) -> np.array:
y_raw = self._eval_formula()
return y_raw.ravel()
@property
def y(self) -> np.array:
sigma = 0.5 + self.X.ravel() / 10
noise = self.rng.lognormal(sigma=sigma) - np.exp(sigma**2 / 2)
return self.y_raw + noise
def _eval_formula(self) -> np.array:
function_map = {
'sin': "np.sin",
'cos': "np.cos",
'tan': "np.tan",
'exp': "np.exp",
'log': "np.log",
'sqrt': "np.sqrt",
'abs': "np.abs",
}
# Replace "x" in the formula string with "x_values"
_formula_str = re.sub(r'\bx\b', '(self.X)', self.formula_str)
# Replace any function calls in the formula string with the appropriate function object
_formula_str = re.sub(r'(\w+)\(([^)]*)\)', lambda m: f'{function_map[m.group(1)]}({m.group(2)})', _formula_str)
# Evaluate the formula using the updated string and return the result
return eval(_formula_str)
class GradientBoostingCoverage:
def __init__(self, lower: float, upper: float, **kwargs) -> None:
self.lower = lower
self.upper = upper
self.kwargs = kwargs
self.models = self._build_models()
@property
def expected_coverage(self) -> float:
return self.upper - self.lower
def _build_models(self) -> dict[str, GradientBoostingRegressor]:
models = {}
for name, alpha in [("lower", self.lower), ("upper", self.upper)]:
models[f"{name}"] = GradientBoostingRegressor(loss="quantile", alpha=alpha, **self.kwargs)
return models
def fit(self, X: np.ndarray, y: np.array) -> None:
for model in self.models.values():
model.fit(X, y)
def predict(self, X: np.ndarray) -> tuple[np.array, np.array]:
lower = self.models["lower"].predict(X)
upper = self.models["upper"].predict(X)
return lower, upper
def coverage_fraction(self, X: np.ndarray, y: np.array) -> float:
y_low, y_high = self.predict(X)
return np.mean(np.logical_and(y >= y_low, y <= y_high))
def fit_gradientboosting(X, y, **kwargs) -> GradientBoostingRegressor:
model = GradientBoostingRegressor(**kwargs)
model.fit(X, y)
return model
def plot_interval(
xx: np.array,
X_test: np.array,
y_test: np.array,
y_upper: np.array,
y_lower: np.array,
y_med: np.array,
y_mean: np.array,
formula_str: Optional[str]=None,
interval: Optional[Union[int, str]]=None,
) -> go.Figure:
# Using plotly to plot an interval
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=xx.ravel(),
y=y_upper,
fill=None,
mode="lines",
line_color="rgba(255,255,0,0)",
name=""
)
)
fig.add_trace(
go.Scatter(
x=xx.ravel(),
y=y_lower,
fill="tonexty",
mode="lines",
line_color="rgba(255,255,0,0)",
name=f"Predicted Interval"
)
)
fig.add_trace(
go.Scatter(
x=xx.ravel(),
y=y_med,
mode="lines",
line_color="red",
name='Predicted Median',
)
)
fig.add_trace(
go.Scatter(
x=xx.ravel(),
y=y_mean,
mode="lines",
name='Predicted Mean',
line=dict(color='red', dash='dash')
)
)
fig.add_trace(
go.Scatter(
x=X_test.ravel(),
y=y_test,
mode="markers",
marker_color="blue",
name="Test Observations",
marker=dict(size=5, line=dict(width=2, color="DarkSlateGrey"))
)
)
fig.update_layout(
title=f"Predicted {interval}% Interval",
xaxis_title="x",
yaxis_title="f(x)" if not formula_str else formula_str,
height=600
)
return fig