Spaces:

sklearn-docs
/

sklearn-ocsvm-vs-sgdocsvm

Sleeping

File size: 9,279 Bytes

1e5728b

import gradio as gr
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn.svm import OneClassSVM
from sklearn.linear_model import SGDOneClassSVM
from sklearn.kernel_approximation import Nystroem
from sklearn.pipeline import make_pipeline

font = {"weight": "normal", "size": 15}

matplotlib.rc("font", **font)

random_state = 42
rng = np.random.default_rng(random_state)

# Generate train data
X = 0.3 * rng.random((500, 2))
X_train = np.r_[X + 2, X - 2]
# Generate some regular novel observations
X = 0.3 * rng.random((20, 2))
X_test = np.r_[X + 2, X - 2]
# Generate some abnormal novel observations
X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))

xx, yy = np.meshgrid(np.linspace(-4.5, 4.5, 50), np.linspace(-4.5, 4.5, 50))

# OCSVM hyperparameters
# nu = 0.05
# gamma = 2.0

md_description = """
# A 1D regression with decision tree.

The [decision trees](https://scikit-learn.org/stable/modules/tree.html#tree) is used to fit a sine curve with addition noisy observation. As a result, it learns local linear regressions approximating the sine curve.

We can see that if the maximum depth of the tree (controlled by the max_depth parameter) is set too high, the decision trees learn too fine details of the training data and learn from the noise, i.e. they overfit.
"""


def make_regression(nu, gamma):
    clf = OneClassSVM(gamma=gamma, kernel="rbf", nu=nu)
    clf.fit(X_train)
    y_pred_train = clf.predict(X_train)
    y_pred_test = clf.predict(X_test)
    y_pred_outliers = clf.predict(X_outliers)
    n_error_train = y_pred_train[y_pred_train == -1].size
    n_error_test = y_pred_test[y_pred_test == -1].size
    n_error_outliers = y_pred_outliers[y_pred_outliers == 1].size

    Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)


    # Fit the One-Class SVM using a kernel approximation and SGD
    transform = Nystroem(gamma=gamma, random_state=random_state)
    clf_sgd = SGDOneClassSVM(
        nu=nu, shuffle=True, fit_intercept=True, random_state=random_state, tol=1e-4
    )
    pipe_sgd = make_pipeline(transform, clf_sgd)
    pipe_sgd.fit(X_train)
    y_pred_train_sgd = pipe_sgd.predict(X_train)
    y_pred_test_sgd = pipe_sgd.predict(X_test)
    y_pred_outliers_sgd = pipe_sgd.predict(X_outliers)
    n_error_train_sgd = y_pred_train_sgd[y_pred_train_sgd == -1].size
    n_error_test_sgd = y_pred_test_sgd[y_pred_test_sgd == -1].size
    n_error_outliers_sgd = y_pred_outliers_sgd[y_pred_outliers_sgd == 1].size

    Z_sgd = pipe_sgd.decision_function(np.c_[xx.ravel(), yy.ravel()])
    Z_sgd = Z_sgd.reshape(xx.shape)
    
    def make_fig_1():
        # plot the level sets of the decision function
        fig = plt.figure(figsize=(9, 6))
        # fig, ax = plt.subplots(1, 1, figsize=(9,6))
        ax = fig.add_subplot(111)
        
        ax.set_title("One Class SVM")
        ax.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm.PuBu)
        a = ax.contour(xx, yy, Z, levels=[0], linewidths=2, colors="darkred")
        ax.contourf(xx, yy, Z, levels=[0, Z.max()], colors="palevioletred")

        s = 20
        b1 = ax.scatter(X_train[:, 0], X_train[:, 1], c="white", s=s, edgecolors="k")
        b2 = ax.scatter(X_test[:, 0], X_test[:, 1], c="blueviolet", s=s, edgecolors="k")
        c = ax.scatter(X_outliers[:, 0], X_outliers[:, 1], c="gold", s=s, edgecolors="k")
        ax.axis("tight")
        ax.set_xlim((-4.5, 4.5))
        ax.set_ylim((-4.5, 4.5))
        ax.legend(
            [a.collections[0], b1, b2, c],
            [
                "learned frontier",
                "training observations",
                "new regular observations",
                "new abnormal observations",
            ],
            loc="upper left",
        )
        ax.set_xlabel(
            "error train: %d/%d; errors novel regular: %d/%d; errors novel abnormal: %d/%d"
            % (
                n_error_train,
                X_train.shape[0],
                n_error_test,
                X_test.shape[0],
                n_error_outliers,
                X_outliers.shape[0],
            )
        )
        
        return fig

    def make_fig_2():
        fig = plt.figure(figsize=(9, 6))
        ax = fig.add_subplot(111)
        # fig, ax = plt.subplots(1, 1)
        
        ax.set_title("Online One-Class SVM2")
        ax.contourf(xx, yy, Z_sgd, levels=np.linspace(Z_sgd.min(), 0, 7), cmap=plt.cm.PuBu)
        a = plt.contour(xx, yy, Z_sgd, levels=[0], linewidths=2, colors="darkred")
        ax.contourf(xx, yy, Z_sgd, levels=[0, Z_sgd.max()], colors="palevioletred")

        s = 20
        b1 = ax.scatter(X_train[:, 0], X_train[:, 1], c="white", s=s, edgecolors="k")
        b2 = ax.scatter(X_test[:, 0], X_test[:, 1], c="blueviolet", s=s, edgecolors="k")
        c = ax.scatter(X_outliers[:, 0], X_outliers[:, 1], c="gold", s=s, edgecolors="k")
        ax.axis("tight")
        ax.set_xlim((-4.5, 4.5))
        ax.set_ylim((-4.5, 4.5))
        ax.legend(
            [a.collections[0], b1, b2, c],
            [
                "learned frontier",
                "training observations",
                "new regular observations",
                "new abnormal observations",
            ],
            loc="upper left",
        )
        ax.set_xlabel(
            "error train: %d/%d; errors novel regular: %d/%d; errors novel abnormal: %d/%d"
            % (
                n_error_train_sgd,
                X_train.shape[0],
                n_error_test_sgd,
                X_test.shape[0],
                n_error_outliers_sgd,
                X_outliers.shape[0],
            )
        )
        
        return fig
        
        
        

    return make_fig_2(), make_fig_2()

# def make_figure():
#     fig = plt.figure(figsize=(9, 6))

#     plt.title("One Class SVM")
#     plt.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm.PuBu)
#     a = plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors="darkred")
#     plt.contourf(xx, yy, Z, levels=[0, Z.max()], colors="palevioletred")

#     s = 20
#     b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c="white", s=s, edgecolors="k")
#     b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c="blueviolet", s=s, edgecolors="k")
#     c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c="gold", s=s, edgecolors="k")
#     plt.axis("tight")
#     plt.xlim((-4.5, 4.5))
#     plt.ylim((-4.5, 4.5))
#     plt.legend(
#         [a.collections[0], b1, b2, c],
#         [
#             "learned frontier",
#             "training observations",
#             "new regular observations",
#             "new abnormal observations",
#         ],
#         loc="upper left",
#     )
#     plt.xlabel(
#         "error train: %d/%d; errors novel regular: %d/%d; errors novel abnormal: %d/%d"
#         % (
#             n_error_train,
#             X_train.shape[0],
#             n_error_test,
#             X_test.shape[0],
#             n_error_outliers,
#             X_outliers.shape[0],
#         )
#     )
#     plt.show()
    

def make_example(model_1_depth, model_2_depth):
    return f"""
    With the following code you can reproduce this example with the current values of the sliders and the same data in a notebook:

    ```python
    import numpy as np
    import plotly.graph_objects as go
    from sklearn.tree import DecisionTreeRegressor

    rng = np.random.default_rng(0)

    X = np.sort(5 * rng.random((80, 1)), axis=0)
    y = np.sin(X).ravel()
    y[::5] += 3 * (0.5 - rng.random(16))

    regr_1 = DecisionTreeRegressor(max_depth={model_1_depth}, random_state=0)
    regr_2 = DecisionTreeRegressor(max_depth={model_2_depth}, random_state=0)
    regr_1.fit(X, y)
    regr_2.fit(X, y)

    # Predict
    X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
    y_1 = regr_1.predict(X_test)
    y_2 = regr_2.predict(X_test)


    fig = go.Figure()
    fig.add_trace(go.Scatter(x=X[:,0], y=y, mode='markers', name='data'))
    fig.add_trace(go.Scatter(x=X_test[:,0], y=y_1, mode='lines', name=f"max_depth={model_1_depth}"))
    fig.add_trace(go.Scatter(x=X_test[:,0], y=y_2, mode='lines', name=f"max_depth={model_2_depth}"))

    fig.update_layout(title='Decision Tree Regression')
    fig.update_xaxes(title_text='data')
    fig.update_yaxes(title_text='target')
    fig.show()
    ```
    """

with gr.Blocks() as demo:
    with gr.Row():
        gr.Markdown(md_description)
    with gr.Row():
        # with gr.Column():
        slider_nu = gr.Slider(minimum=0.01, maximum=1, label='Nu', step=0.025, value=0.05)
        slider_gamma = gr.Slider(minimum=0.1, maximum=3, label='Gamma', step=0.1, value=2.0)
        button = gr.Button("Generate")
    with gr.Row():
        plot1 = gr.Plot(label='Output')
    with gr.Row():
        plot2 = gr.Plot(label='Output')

    with gr.Row():
        example = gr.Markdown(make_example(slider_nu.value, slider_gamma.value))
        slider_nu.change(fn=make_regression, 
            inputs=[slider_nu, slider_gamma], 
            outputs=[plot1, plot2])
        slider_gamma.change(fn=make_regression, 
            inputs=[slider_nu, slider_gamma], 
            outputs=[plot1, plot2])
        button.click(make_regression, inputs=[slider_nu, slider_gamma], outputs=[plot1, plot2])

demo.launch()