import gradio as gr import numpy as np import matplotlib.pyplot as plt import matplotlib from sklearn.svm import OneClassSVM from sklearn.linear_model import SGDOneClassSVM from sklearn.kernel_approximation import Nystroem from sklearn.pipeline import make_pipeline font = {"weight": "normal", "size": 15} matplotlib.rc("font", **font) random_state = 42 rng = np.random.default_rng(random_state) # Generate train data X = 0.3 * rng.random((500, 2)) X_train = np.r_[X + 2, X - 2] # Generate some regular novel observations X = 0.3 * rng.random((20, 2)) X_test = np.r_[X + 2, X - 2] # Generate some abnormal novel observations X_outliers = rng.uniform(low=-4, high=4, size=(20, 2)) xx, yy = np.meshgrid(np.linspace(-4.5, 4.5, 50), np.linspace(-4.5, 4.5, 50)) # OCSVM hyperparameters # nu = 0.05 # gamma = 2.0 md_description = """ # A 1D regression with decision tree. The [decision trees](https://scikit-learn.org/stable/modules/tree.html#tree) is used to fit a sine curve with addition noisy observation. As a result, it learns local linear regressions approximating the sine curve. We can see that if the maximum depth of the tree (controlled by the max_depth parameter) is set too high, the decision trees learn too fine details of the training data and learn from the noise, i.e. they overfit. """ def make_regression(nu, gamma): clf = OneClassSVM(gamma=gamma, kernel="rbf", nu=nu) clf.fit(X_train) y_pred_train = clf.predict(X_train) y_pred_test = clf.predict(X_test) y_pred_outliers = clf.predict(X_outliers) n_error_train = y_pred_train[y_pred_train == -1].size n_error_test = y_pred_test[y_pred_test == -1].size n_error_outliers = y_pred_outliers[y_pred_outliers == 1].size Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) # Fit the One-Class SVM using a kernel approximation and SGD transform = Nystroem(gamma=gamma, random_state=random_state) clf_sgd = SGDOneClassSVM( nu=nu, shuffle=True, fit_intercept=True, random_state=random_state, tol=1e-4 ) pipe_sgd = make_pipeline(transform, clf_sgd) pipe_sgd.fit(X_train) y_pred_train_sgd = pipe_sgd.predict(X_train) y_pred_test_sgd = pipe_sgd.predict(X_test) y_pred_outliers_sgd = pipe_sgd.predict(X_outliers) n_error_train_sgd = y_pred_train_sgd[y_pred_train_sgd == -1].size n_error_test_sgd = y_pred_test_sgd[y_pred_test_sgd == -1].size n_error_outliers_sgd = y_pred_outliers_sgd[y_pred_outliers_sgd == 1].size Z_sgd = pipe_sgd.decision_function(np.c_[xx.ravel(), yy.ravel()]) Z_sgd = Z_sgd.reshape(xx.shape) def make_fig_1(): # plot the level sets of the decision function fig = plt.figure(figsize=(9, 6)) # fig, ax = plt.subplots(1, 1, figsize=(9,6)) ax = fig.add_subplot(111) ax.set_title("One Class SVM") ax.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm.PuBu) a = ax.contour(xx, yy, Z, levels=[0], linewidths=2, colors="darkred") ax.contourf(xx, yy, Z, levels=[0, Z.max()], colors="palevioletred") s = 20 b1 = ax.scatter(X_train[:, 0], X_train[:, 1], c="white", s=s, edgecolors="k") b2 = ax.scatter(X_test[:, 0], X_test[:, 1], c="blueviolet", s=s, edgecolors="k") c = ax.scatter(X_outliers[:, 0], X_outliers[:, 1], c="gold", s=s, edgecolors="k") ax.axis("tight") ax.set_xlim((-4.5, 4.5)) ax.set_ylim((-4.5, 4.5)) ax.legend( [a.collections[0], b1, b2, c], [ "learned frontier", "training observations", "new regular observations", "new abnormal observations", ], loc="upper left", ) ax.set_xlabel( "error train: %d/%d; errors novel regular: %d/%d; errors novel abnormal: %d/%d" % ( n_error_train, X_train.shape[0], n_error_test, X_test.shape[0], n_error_outliers, X_outliers.shape[0], ) ) return fig def make_fig_2(): fig = plt.figure(figsize=(9, 6)) ax = fig.add_subplot(111) # fig, ax = plt.subplots(1, 1) ax.set_title("Online One-Class SVM2") ax.contourf(xx, yy, Z_sgd, levels=np.linspace(Z_sgd.min(), 0, 7), cmap=plt.cm.PuBu) a = plt.contour(xx, yy, Z_sgd, levels=[0], linewidths=2, colors="darkred") ax.contourf(xx, yy, Z_sgd, levels=[0, Z_sgd.max()], colors="palevioletred") s = 20 b1 = ax.scatter(X_train[:, 0], X_train[:, 1], c="white", s=s, edgecolors="k") b2 = ax.scatter(X_test[:, 0], X_test[:, 1], c="blueviolet", s=s, edgecolors="k") c = ax.scatter(X_outliers[:, 0], X_outliers[:, 1], c="gold", s=s, edgecolors="k") ax.axis("tight") ax.set_xlim((-4.5, 4.5)) ax.set_ylim((-4.5, 4.5)) ax.legend( [a.collections[0], b1, b2, c], [ "learned frontier", "training observations", "new regular observations", "new abnormal observations", ], loc="upper left", ) ax.set_xlabel( "error train: %d/%d; errors novel regular: %d/%d; errors novel abnormal: %d/%d" % ( n_error_train_sgd, X_train.shape[0], n_error_test_sgd, X_test.shape[0], n_error_outliers_sgd, X_outliers.shape[0], ) ) return fig return make_fig_2(), make_fig_2() # def make_figure(): # fig = plt.figure(figsize=(9, 6)) # plt.title("One Class SVM") # plt.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm.PuBu) # a = plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors="darkred") # plt.contourf(xx, yy, Z, levels=[0, Z.max()], colors="palevioletred") # s = 20 # b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c="white", s=s, edgecolors="k") # b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c="blueviolet", s=s, edgecolors="k") # c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c="gold", s=s, edgecolors="k") # plt.axis("tight") # plt.xlim((-4.5, 4.5)) # plt.ylim((-4.5, 4.5)) # plt.legend( # [a.collections[0], b1, b2, c], # [ # "learned frontier", # "training observations", # "new regular observations", # "new abnormal observations", # ], # loc="upper left", # ) # plt.xlabel( # "error train: %d/%d; errors novel regular: %d/%d; errors novel abnormal: %d/%d" # % ( # n_error_train, # X_train.shape[0], # n_error_test, # X_test.shape[0], # n_error_outliers, # X_outliers.shape[0], # ) # ) # plt.show() def make_example(model_1_depth, model_2_depth): return f""" With the following code you can reproduce this example with the current values of the sliders and the same data in a notebook: ```python import numpy as np import plotly.graph_objects as go from sklearn.tree import DecisionTreeRegressor rng = np.random.default_rng(0) X = np.sort(5 * rng.random((80, 1)), axis=0) y = np.sin(X).ravel() y[::5] += 3 * (0.5 - rng.random(16)) regr_1 = DecisionTreeRegressor(max_depth={model_1_depth}, random_state=0) regr_2 = DecisionTreeRegressor(max_depth={model_2_depth}, random_state=0) regr_1.fit(X, y) regr_2.fit(X, y) # Predict X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis] y_1 = regr_1.predict(X_test) y_2 = regr_2.predict(X_test) fig = go.Figure() fig.add_trace(go.Scatter(x=X[:,0], y=y, mode='markers', name='data')) fig.add_trace(go.Scatter(x=X_test[:,0], y=y_1, mode='lines', name=f"max_depth={model_1_depth}")) fig.add_trace(go.Scatter(x=X_test[:,0], y=y_2, mode='lines', name=f"max_depth={model_2_depth}")) fig.update_layout(title='Decision Tree Regression') fig.update_xaxes(title_text='data') fig.update_yaxes(title_text='target') fig.show() ``` """ with gr.Blocks() as demo: with gr.Row(): gr.Markdown(md_description) with gr.Row(): # with gr.Column(): slider_nu = gr.Slider(minimum=0.01, maximum=1, label='Nu', step=0.025, value=0.05) slider_gamma = gr.Slider(minimum=0.1, maximum=3, label='Gamma', step=0.1, value=2.0) button = gr.Button("Generate") with gr.Row(): plot1 = gr.Plot(label='Output') with gr.Row(): plot2 = gr.Plot(label='Output') with gr.Row(): example = gr.Markdown(make_example(slider_nu.value, slider_gamma.value)) slider_nu.change(fn=make_regression, inputs=[slider_nu, slider_gamma], outputs=[plot1, plot2]) slider_gamma.change(fn=make_regression, inputs=[slider_nu, slider_gamma], outputs=[plot1, plot2]) button.click(make_regression, inputs=[slider_nu, slider_gamma], outputs=[plot1, plot2]) demo.launch()