|
import gradio as gr |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import matplotlib |
|
from sklearn.svm import OneClassSVM |
|
from sklearn.linear_model import SGDOneClassSVM |
|
from sklearn.kernel_approximation import Nystroem |
|
from sklearn.pipeline import make_pipeline |
|
|
|
font = {"weight": "normal", "size": 15} |
|
|
|
matplotlib.rc("font", **font) |
|
|
|
random_state = 42 |
|
rng = np.random.default_rng(random_state) |
|
|
|
|
|
X = 0.3 * rng.random((500, 2)) |
|
X_train = np.r_[X + 2, X - 2] |
|
|
|
X = 0.3 * rng.random((20, 2)) |
|
X_test = np.r_[X + 2, X - 2] |
|
|
|
X_outliers = rng.uniform(low=-4, high=4, size=(20, 2)) |
|
|
|
xx, yy = np.meshgrid(np.linspace(-4.5, 4.5, 50), np.linspace(-4.5, 4.5, 50)) |
|
|
|
|
|
|
|
|
|
|
|
md_description = """ |
|
# A 1D regression with decision tree. |
|
|
|
The [decision trees](https://scikit-learn.org/stable/modules/tree.html#tree) is used to fit a sine curve with addition noisy observation. As a result, it learns local linear regressions approximating the sine curve. |
|
|
|
We can see that if the maximum depth of the tree (controlled by the max_depth parameter) is set too high, the decision trees learn too fine details of the training data and learn from the noise, i.e. they overfit. |
|
""" |
|
|
|
|
|
def make_regression(nu, gamma): |
|
clf = OneClassSVM(gamma=gamma, kernel="rbf", nu=nu) |
|
clf.fit(X_train) |
|
y_pred_train = clf.predict(X_train) |
|
y_pred_test = clf.predict(X_test) |
|
y_pred_outliers = clf.predict(X_outliers) |
|
n_error_train = y_pred_train[y_pred_train == -1].size |
|
n_error_test = y_pred_test[y_pred_test == -1].size |
|
n_error_outliers = y_pred_outliers[y_pred_outliers == 1].size |
|
|
|
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) |
|
Z = Z.reshape(xx.shape) |
|
|
|
|
|
|
|
transform = Nystroem(gamma=gamma, random_state=random_state) |
|
clf_sgd = SGDOneClassSVM( |
|
nu=nu, shuffle=True, fit_intercept=True, random_state=random_state, tol=1e-4 |
|
) |
|
pipe_sgd = make_pipeline(transform, clf_sgd) |
|
pipe_sgd.fit(X_train) |
|
y_pred_train_sgd = pipe_sgd.predict(X_train) |
|
y_pred_test_sgd = pipe_sgd.predict(X_test) |
|
y_pred_outliers_sgd = pipe_sgd.predict(X_outliers) |
|
n_error_train_sgd = y_pred_train_sgd[y_pred_train_sgd == -1].size |
|
n_error_test_sgd = y_pred_test_sgd[y_pred_test_sgd == -1].size |
|
n_error_outliers_sgd = y_pred_outliers_sgd[y_pred_outliers_sgd == 1].size |
|
|
|
Z_sgd = pipe_sgd.decision_function(np.c_[xx.ravel(), yy.ravel()]) |
|
Z_sgd = Z_sgd.reshape(xx.shape) |
|
|
|
def make_fig_1(): |
|
|
|
fig = plt.figure(figsize=(9, 6)) |
|
|
|
ax = fig.add_subplot(111) |
|
|
|
ax.set_title("One Class SVM") |
|
ax.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm.PuBu) |
|
a = ax.contour(xx, yy, Z, levels=[0], linewidths=2, colors="darkred") |
|
ax.contourf(xx, yy, Z, levels=[0, Z.max()], colors="palevioletred") |
|
|
|
s = 20 |
|
b1 = ax.scatter(X_train[:, 0], X_train[:, 1], c="white", s=s, edgecolors="k") |
|
b2 = ax.scatter(X_test[:, 0], X_test[:, 1], c="blueviolet", s=s, edgecolors="k") |
|
c = ax.scatter(X_outliers[:, 0], X_outliers[:, 1], c="gold", s=s, edgecolors="k") |
|
ax.axis("tight") |
|
ax.set_xlim((-4.5, 4.5)) |
|
ax.set_ylim((-4.5, 4.5)) |
|
ax.legend( |
|
[a.collections[0], b1, b2, c], |
|
[ |
|
"learned frontier", |
|
"training observations", |
|
"new regular observations", |
|
"new abnormal observations", |
|
], |
|
loc="upper left", |
|
) |
|
ax.set_xlabel( |
|
"error train: %d/%d; errors novel regular: %d/%d; errors novel abnormal: %d/%d" |
|
% ( |
|
n_error_train, |
|
X_train.shape[0], |
|
n_error_test, |
|
X_test.shape[0], |
|
n_error_outliers, |
|
X_outliers.shape[0], |
|
) |
|
) |
|
|
|
return fig |
|
|
|
def make_fig_2(): |
|
fig = plt.figure(figsize=(9, 6)) |
|
ax = fig.add_subplot(111) |
|
|
|
|
|
ax.set_title("Online One-Class SVM2") |
|
ax.contourf(xx, yy, Z_sgd, levels=np.linspace(Z_sgd.min(), 0, 7), cmap=plt.cm.PuBu) |
|
a = plt.contour(xx, yy, Z_sgd, levels=[0], linewidths=2, colors="darkred") |
|
ax.contourf(xx, yy, Z_sgd, levels=[0, Z_sgd.max()], colors="palevioletred") |
|
|
|
s = 20 |
|
b1 = ax.scatter(X_train[:, 0], X_train[:, 1], c="white", s=s, edgecolors="k") |
|
b2 = ax.scatter(X_test[:, 0], X_test[:, 1], c="blueviolet", s=s, edgecolors="k") |
|
c = ax.scatter(X_outliers[:, 0], X_outliers[:, 1], c="gold", s=s, edgecolors="k") |
|
ax.axis("tight") |
|
ax.set_xlim((-4.5, 4.5)) |
|
ax.set_ylim((-4.5, 4.5)) |
|
ax.legend( |
|
[a.collections[0], b1, b2, c], |
|
[ |
|
"learned frontier", |
|
"training observations", |
|
"new regular observations", |
|
"new abnormal observations", |
|
], |
|
loc="upper left", |
|
) |
|
ax.set_xlabel( |
|
"error train: %d/%d; errors novel regular: %d/%d; errors novel abnormal: %d/%d" |
|
% ( |
|
n_error_train_sgd, |
|
X_train.shape[0], |
|
n_error_test_sgd, |
|
X_test.shape[0], |
|
n_error_outliers_sgd, |
|
X_outliers.shape[0], |
|
) |
|
) |
|
|
|
return fig |
|
|
|
|
|
|
|
|
|
return make_fig_2(), make_fig_2() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def make_example(model_1_depth, model_2_depth): |
|
return f""" |
|
With the following code you can reproduce this example with the current values of the sliders and the same data in a notebook: |
|
|
|
```python |
|
import numpy as np |
|
import plotly.graph_objects as go |
|
from sklearn.tree import DecisionTreeRegressor |
|
|
|
rng = np.random.default_rng(0) |
|
|
|
X = np.sort(5 * rng.random((80, 1)), axis=0) |
|
y = np.sin(X).ravel() |
|
y[::5] += 3 * (0.5 - rng.random(16)) |
|
|
|
regr_1 = DecisionTreeRegressor(max_depth={model_1_depth}, random_state=0) |
|
regr_2 = DecisionTreeRegressor(max_depth={model_2_depth}, random_state=0) |
|
regr_1.fit(X, y) |
|
regr_2.fit(X, y) |
|
|
|
# Predict |
|
X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis] |
|
y_1 = regr_1.predict(X_test) |
|
y_2 = regr_2.predict(X_test) |
|
|
|
|
|
fig = go.Figure() |
|
fig.add_trace(go.Scatter(x=X[:,0], y=y, mode='markers', name='data')) |
|
fig.add_trace(go.Scatter(x=X_test[:,0], y=y_1, mode='lines', name=f"max_depth={model_1_depth}")) |
|
fig.add_trace(go.Scatter(x=X_test[:,0], y=y_2, mode='lines', name=f"max_depth={model_2_depth}")) |
|
|
|
fig.update_layout(title='Decision Tree Regression') |
|
fig.update_xaxes(title_text='data') |
|
fig.update_yaxes(title_text='target') |
|
fig.show() |
|
``` |
|
""" |
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row(): |
|
gr.Markdown(md_description) |
|
with gr.Row(): |
|
|
|
slider_nu = gr.Slider(minimum=0.01, maximum=1, label='Nu', step=0.025, value=0.05) |
|
slider_gamma = gr.Slider(minimum=0.1, maximum=3, label='Gamma', step=0.1, value=2.0) |
|
button = gr.Button("Generate") |
|
with gr.Row(): |
|
plot1 = gr.Plot(label='Output') |
|
with gr.Row(): |
|
plot2 = gr.Plot(label='Output') |
|
|
|
with gr.Row(): |
|
example = gr.Markdown(make_example(slider_nu.value, slider_gamma.value)) |
|
slider_nu.change(fn=make_regression, |
|
inputs=[slider_nu, slider_gamma], |
|
outputs=[plot1, plot2]) |
|
slider_gamma.change(fn=make_regression, |
|
inputs=[slider_nu, slider_gamma], |
|
outputs=[plot1, plot2]) |
|
button.click(make_regression, inputs=[slider_nu, slider_gamma], outputs=[plot1, plot2]) |
|
|
|
demo.launch() |
|
|