nielsgl's picture
update project
1e5728b
raw
history blame
9.28 kB
import gradio as gr
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn.svm import OneClassSVM
from sklearn.linear_model import SGDOneClassSVM
from sklearn.kernel_approximation import Nystroem
from sklearn.pipeline import make_pipeline
font = {"weight": "normal", "size": 15}
matplotlib.rc("font", **font)
random_state = 42
rng = np.random.default_rng(random_state)
# Generate train data
X = 0.3 * rng.random((500, 2))
X_train = np.r_[X + 2, X - 2]
# Generate some regular novel observations
X = 0.3 * rng.random((20, 2))
X_test = np.r_[X + 2, X - 2]
# Generate some abnormal novel observations
X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
xx, yy = np.meshgrid(np.linspace(-4.5, 4.5, 50), np.linspace(-4.5, 4.5, 50))
# OCSVM hyperparameters
# nu = 0.05
# gamma = 2.0
md_description = """
# A 1D regression with decision tree.
The [decision trees](https://scikit-learn.org/stable/modules/tree.html#tree) is used to fit a sine curve with addition noisy observation. As a result, it learns local linear regressions approximating the sine curve.
We can see that if the maximum depth of the tree (controlled by the max_depth parameter) is set too high, the decision trees learn too fine details of the training data and learn from the noise, i.e. they overfit.
"""
def make_regression(nu, gamma):
clf = OneClassSVM(gamma=gamma, kernel="rbf", nu=nu)
clf.fit(X_train)
y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)
y_pred_outliers = clf.predict(X_outliers)
n_error_train = y_pred_train[y_pred_train == -1].size
n_error_test = y_pred_test[y_pred_test == -1].size
n_error_outliers = y_pred_outliers[y_pred_outliers == 1].size
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Fit the One-Class SVM using a kernel approximation and SGD
transform = Nystroem(gamma=gamma, random_state=random_state)
clf_sgd = SGDOneClassSVM(
nu=nu, shuffle=True, fit_intercept=True, random_state=random_state, tol=1e-4
)
pipe_sgd = make_pipeline(transform, clf_sgd)
pipe_sgd.fit(X_train)
y_pred_train_sgd = pipe_sgd.predict(X_train)
y_pred_test_sgd = pipe_sgd.predict(X_test)
y_pred_outliers_sgd = pipe_sgd.predict(X_outliers)
n_error_train_sgd = y_pred_train_sgd[y_pred_train_sgd == -1].size
n_error_test_sgd = y_pred_test_sgd[y_pred_test_sgd == -1].size
n_error_outliers_sgd = y_pred_outliers_sgd[y_pred_outliers_sgd == 1].size
Z_sgd = pipe_sgd.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z_sgd = Z_sgd.reshape(xx.shape)
def make_fig_1():
# plot the level sets of the decision function
fig = plt.figure(figsize=(9, 6))
# fig, ax = plt.subplots(1, 1, figsize=(9,6))
ax = fig.add_subplot(111)
ax.set_title("One Class SVM")
ax.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm.PuBu)
a = ax.contour(xx, yy, Z, levels=[0], linewidths=2, colors="darkred")
ax.contourf(xx, yy, Z, levels=[0, Z.max()], colors="palevioletred")
s = 20
b1 = ax.scatter(X_train[:, 0], X_train[:, 1], c="white", s=s, edgecolors="k")
b2 = ax.scatter(X_test[:, 0], X_test[:, 1], c="blueviolet", s=s, edgecolors="k")
c = ax.scatter(X_outliers[:, 0], X_outliers[:, 1], c="gold", s=s, edgecolors="k")
ax.axis("tight")
ax.set_xlim((-4.5, 4.5))
ax.set_ylim((-4.5, 4.5))
ax.legend(
[a.collections[0], b1, b2, c],
[
"learned frontier",
"training observations",
"new regular observations",
"new abnormal observations",
],
loc="upper left",
)
ax.set_xlabel(
"error train: %d/%d; errors novel regular: %d/%d; errors novel abnormal: %d/%d"
% (
n_error_train,
X_train.shape[0],
n_error_test,
X_test.shape[0],
n_error_outliers,
X_outliers.shape[0],
)
)
return fig
def make_fig_2():
fig = plt.figure(figsize=(9, 6))
ax = fig.add_subplot(111)
# fig, ax = plt.subplots(1, 1)
ax.set_title("Online One-Class SVM2")
ax.contourf(xx, yy, Z_sgd, levels=np.linspace(Z_sgd.min(), 0, 7), cmap=plt.cm.PuBu)
a = plt.contour(xx, yy, Z_sgd, levels=[0], linewidths=2, colors="darkred")
ax.contourf(xx, yy, Z_sgd, levels=[0, Z_sgd.max()], colors="palevioletred")
s = 20
b1 = ax.scatter(X_train[:, 0], X_train[:, 1], c="white", s=s, edgecolors="k")
b2 = ax.scatter(X_test[:, 0], X_test[:, 1], c="blueviolet", s=s, edgecolors="k")
c = ax.scatter(X_outliers[:, 0], X_outliers[:, 1], c="gold", s=s, edgecolors="k")
ax.axis("tight")
ax.set_xlim((-4.5, 4.5))
ax.set_ylim((-4.5, 4.5))
ax.legend(
[a.collections[0], b1, b2, c],
[
"learned frontier",
"training observations",
"new regular observations",
"new abnormal observations",
],
loc="upper left",
)
ax.set_xlabel(
"error train: %d/%d; errors novel regular: %d/%d; errors novel abnormal: %d/%d"
% (
n_error_train_sgd,
X_train.shape[0],
n_error_test_sgd,
X_test.shape[0],
n_error_outliers_sgd,
X_outliers.shape[0],
)
)
return fig
return make_fig_2(), make_fig_2()
# def make_figure():
# fig = plt.figure(figsize=(9, 6))
# plt.title("One Class SVM")
# plt.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm.PuBu)
# a = plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors="darkred")
# plt.contourf(xx, yy, Z, levels=[0, Z.max()], colors="palevioletred")
# s = 20
# b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c="white", s=s, edgecolors="k")
# b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c="blueviolet", s=s, edgecolors="k")
# c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c="gold", s=s, edgecolors="k")
# plt.axis("tight")
# plt.xlim((-4.5, 4.5))
# plt.ylim((-4.5, 4.5))
# plt.legend(
# [a.collections[0], b1, b2, c],
# [
# "learned frontier",
# "training observations",
# "new regular observations",
# "new abnormal observations",
# ],
# loc="upper left",
# )
# plt.xlabel(
# "error train: %d/%d; errors novel regular: %d/%d; errors novel abnormal: %d/%d"
# % (
# n_error_train,
# X_train.shape[0],
# n_error_test,
# X_test.shape[0],
# n_error_outliers,
# X_outliers.shape[0],
# )
# )
# plt.show()
def make_example(model_1_depth, model_2_depth):
return f"""
With the following code you can reproduce this example with the current values of the sliders and the same data in a notebook:
```python
import numpy as np
import plotly.graph_objects as go
from sklearn.tree import DecisionTreeRegressor
rng = np.random.default_rng(0)
X = np.sort(5 * rng.random((80, 1)), axis=0)
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - rng.random(16))
regr_1 = DecisionTreeRegressor(max_depth={model_1_depth}, random_state=0)
regr_2 = DecisionTreeRegressor(max_depth={model_2_depth}, random_state=0)
regr_1.fit(X, y)
regr_2.fit(X, y)
# Predict
X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
y_1 = regr_1.predict(X_test)
y_2 = regr_2.predict(X_test)
fig = go.Figure()
fig.add_trace(go.Scatter(x=X[:,0], y=y, mode='markers', name='data'))
fig.add_trace(go.Scatter(x=X_test[:,0], y=y_1, mode='lines', name=f"max_depth={model_1_depth}"))
fig.add_trace(go.Scatter(x=X_test[:,0], y=y_2, mode='lines', name=f"max_depth={model_2_depth}"))
fig.update_layout(title='Decision Tree Regression')
fig.update_xaxes(title_text='data')
fig.update_yaxes(title_text='target')
fig.show()
```
"""
with gr.Blocks() as demo:
with gr.Row():
gr.Markdown(md_description)
with gr.Row():
# with gr.Column():
slider_nu = gr.Slider(minimum=0.01, maximum=1, label='Nu', step=0.025, value=0.05)
slider_gamma = gr.Slider(minimum=0.1, maximum=3, label='Gamma', step=0.1, value=2.0)
button = gr.Button("Generate")
with gr.Row():
plot1 = gr.Plot(label='Output')
with gr.Row():
plot2 = gr.Plot(label='Output')
with gr.Row():
example = gr.Markdown(make_example(slider_nu.value, slider_gamma.value))
slider_nu.change(fn=make_regression,
inputs=[slider_nu, slider_gamma],
outputs=[plot1, plot2])
slider_gamma.change(fn=make_regression,
inputs=[slider_nu, slider_gamma],
outputs=[plot1, plot2])
button.click(make_regression, inputs=[slider_nu, slider_gamma], outputs=[plot1, plot2])
demo.launch()