Spaces:

sklearn-docs
/

sklearn-ocsvm-vs-sgdocsvm

Sleeping

App Files Files Community

sklearn-ocsvm-vs-sgdocsvm / app.py

nielsgl

update project

1e5728b about 2 years ago

raw

history blame

9.28 kB

	import gradio as gr
	import numpy as np
	import matplotlib.pyplot as plt
	import matplotlib
	from sklearn.svm import OneClassSVM
	from sklearn.linear_model import SGDOneClassSVM
	from sklearn.kernel_approximation import Nystroem
	from sklearn.pipeline import make_pipeline

	font = {"weight": "normal", "size": 15}

	matplotlib.rc("font", **font)

	random_state = 42
	rng = np.random.default_rng(random_state)

	# Generate train data
	X = 0.3 * rng.random((500, 2))
	X_train = np.r_[X + 2, X - 2]
	# Generate some regular novel observations
	X = 0.3 * rng.random((20, 2))
	X_test = np.r_[X + 2, X - 2]
	# Generate some abnormal novel observations
	X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))

	xx, yy = np.meshgrid(np.linspace(-4.5, 4.5, 50), np.linspace(-4.5, 4.5, 50))

	# OCSVM hyperparameters
	# nu = 0.05
	# gamma = 2.0

	md_description = """
	# A 1D regression with decision tree.

	The [decision trees](https://scikit-learn.org/stable/modules/tree.html#tree) is used to fit a sine curve with addition noisy observation. As a result, it learns local linear regressions approximating the sine curve.

	We can see that if the maximum depth of the tree (controlled by the max_depth parameter) is set too high, the decision trees learn too fine details of the training data and learn from the noise, i.e. they overfit.
	"""


	def make_regression(nu, gamma):
	clf = OneClassSVM(gamma=gamma, kernel="rbf", nu=nu)
	clf.fit(X_train)
	y_pred_train = clf.predict(X_train)
	y_pred_test = clf.predict(X_test)
	y_pred_outliers = clf.predict(X_outliers)
	n_error_train = y_pred_train[y_pred_train == -1].size
	n_error_test = y_pred_test[y_pred_test == -1].size
	n_error_outliers = y_pred_outliers[y_pred_outliers == 1].size

	Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
	Z = Z.reshape(xx.shape)


	# Fit the One-Class SVM using a kernel approximation and SGD
	transform = Nystroem(gamma=gamma, random_state=random_state)
	clf_sgd = SGDOneClassSVM(
	nu=nu, shuffle=True, fit_intercept=True, random_state=random_state, tol=1e-4
	)
	pipe_sgd = make_pipeline(transform, clf_sgd)
	pipe_sgd.fit(X_train)
	y_pred_train_sgd = pipe_sgd.predict(X_train)
	y_pred_test_sgd = pipe_sgd.predict(X_test)
	y_pred_outliers_sgd = pipe_sgd.predict(X_outliers)
	n_error_train_sgd = y_pred_train_sgd[y_pred_train_sgd == -1].size
	n_error_test_sgd = y_pred_test_sgd[y_pred_test_sgd == -1].size
	n_error_outliers_sgd = y_pred_outliers_sgd[y_pred_outliers_sgd == 1].size

	Z_sgd = pipe_sgd.decision_function(np.c_[xx.ravel(), yy.ravel()])
	Z_sgd = Z_sgd.reshape(xx.shape)

	def make_fig_1():
	# plot the level sets of the decision function
	fig = plt.figure(figsize=(9, 6))
	# fig, ax = plt.subplots(1, 1, figsize=(9,6))
	ax = fig.add_subplot(111)

	ax.set_title("One Class SVM")
	ax.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm.PuBu)
	a = ax.contour(xx, yy, Z, levels=[0], linewidths=2, colors="darkred")
	ax.contourf(xx, yy, Z, levels=[0, Z.max()], colors="palevioletred")

	s = 20
	b1 = ax.scatter(X_train[:, 0], X_train[:, 1], c="white", s=s, edgecolors="k")
	b2 = ax.scatter(X_test[:, 0], X_test[:, 1], c="blueviolet", s=s, edgecolors="k")
	c = ax.scatter(X_outliers[:, 0], X_outliers[:, 1], c="gold", s=s, edgecolors="k")
	ax.axis("tight")
	ax.set_xlim((-4.5, 4.5))
	ax.set_ylim((-4.5, 4.5))
	ax.legend(
	[a.collections[0], b1, b2, c],
	[
	"learned frontier",
	"training observations",
	"new regular observations",
	"new abnormal observations",
	],
	loc="upper left",
	)
	ax.set_xlabel(
	"error train: %d/%d; errors novel regular: %d/%d; errors novel abnormal: %d/%d"
	% (
	n_error_train,
	X_train.shape[0],
	n_error_test,
	X_test.shape[0],
	n_error_outliers,
	X_outliers.shape[0],
	)
	)

	return fig

	def make_fig_2():
	fig = plt.figure(figsize=(9, 6))
	ax = fig.add_subplot(111)
	# fig, ax = plt.subplots(1, 1)

	ax.set_title("Online One-Class SVM2")
	ax.contourf(xx, yy, Z_sgd, levels=np.linspace(Z_sgd.min(), 0, 7), cmap=plt.cm.PuBu)
	a = plt.contour(xx, yy, Z_sgd, levels=[0], linewidths=2, colors="darkred")
	ax.contourf(xx, yy, Z_sgd, levels=[0, Z_sgd.max()], colors="palevioletred")

	s = 20
	b1 = ax.scatter(X_train[:, 0], X_train[:, 1], c="white", s=s, edgecolors="k")
	b2 = ax.scatter(X_test[:, 0], X_test[:, 1], c="blueviolet", s=s, edgecolors="k")
	c = ax.scatter(X_outliers[:, 0], X_outliers[:, 1], c="gold", s=s, edgecolors="k")
	ax.axis("tight")
	ax.set_xlim((-4.5, 4.5))
	ax.set_ylim((-4.5, 4.5))
	ax.legend(
	[a.collections[0], b1, b2, c],
	[
	"learned frontier",
	"training observations",
	"new regular observations",
	"new abnormal observations",
	],
	loc="upper left",
	)
	ax.set_xlabel(
	"error train: %d/%d; errors novel regular: %d/%d; errors novel abnormal: %d/%d"
	% (
	n_error_train_sgd,
	X_train.shape[0],
	n_error_test_sgd,
	X_test.shape[0],
	n_error_outliers_sgd,
	X_outliers.shape[0],
	)
	)

	return fig




	return make_fig_2(), make_fig_2()

	# def make_figure():
	# fig = plt.figure(figsize=(9, 6))

	# plt.title("One Class SVM")
	# plt.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm.PuBu)
	# a = plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors="darkred")
	# plt.contourf(xx, yy, Z, levels=[0, Z.max()], colors="palevioletred")

	# s = 20
	# b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c="white", s=s, edgecolors="k")
	# b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c="blueviolet", s=s, edgecolors="k")
	# c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c="gold", s=s, edgecolors="k")
	# plt.axis("tight")
	# plt.xlim((-4.5, 4.5))
	# plt.ylim((-4.5, 4.5))
	# plt.legend(
	# [a.collections[0], b1, b2, c],
	# [
	# "learned frontier",
	# "training observations",
	# "new regular observations",
	# "new abnormal observations",
	# ],
	# loc="upper left",
	# )
	# plt.xlabel(
	# "error train: %d/%d; errors novel regular: %d/%d; errors novel abnormal: %d/%d"
	# % (
	# n_error_train,
	# X_train.shape[0],
	# n_error_test,
	# X_test.shape[0],
	# n_error_outliers,
	# X_outliers.shape[0],
	# )
	# )
	# plt.show()


	def make_example(model_1_depth, model_2_depth):
	return f"""
	With the following code you can reproduce this example with the current values of the sliders and the same data in a notebook:

	```python
	import numpy as np
	import plotly.graph_objects as go
	from sklearn.tree import DecisionTreeRegressor

	rng = np.random.default_rng(0)

	X = np.sort(5 * rng.random((80, 1)), axis=0)
	y = np.sin(X).ravel()
	y[::5] += 3 * (0.5 - rng.random(16))

	regr_1 = DecisionTreeRegressor(max_depth={model_1_depth}, random_state=0)
	regr_2 = DecisionTreeRegressor(max_depth={model_2_depth}, random_state=0)
	regr_1.fit(X, y)
	regr_2.fit(X, y)

	# Predict
	X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
	y_1 = regr_1.predict(X_test)
	y_2 = regr_2.predict(X_test)


	fig = go.Figure()
	fig.add_trace(go.Scatter(x=X[:,0], y=y, mode='markers', name='data'))
	fig.add_trace(go.Scatter(x=X_test[:,0], y=y_1, mode='lines', name=f"max_depth={model_1_depth}"))
	fig.add_trace(go.Scatter(x=X_test[:,0], y=y_2, mode='lines', name=f"max_depth={model_2_depth}"))

	fig.update_layout(title='Decision Tree Regression')
	fig.update_xaxes(title_text='data')
	fig.update_yaxes(title_text='target')
	fig.show()
	```
	"""

	with gr.Blocks() as demo:
	with gr.Row():
	gr.Markdown(md_description)
	with gr.Row():
	# with gr.Column():
	slider_nu = gr.Slider(minimum=0.01, maximum=1, label='Nu', step=0.025, value=0.05)
	slider_gamma = gr.Slider(minimum=0.1, maximum=3, label='Gamma', step=0.1, value=2.0)
	button = gr.Button("Generate")
	with gr.Row():
	plot1 = gr.Plot(label='Output')
	with gr.Row():
	plot2 = gr.Plot(label='Output')

	with gr.Row():
	example = gr.Markdown(make_example(slider_nu.value, slider_gamma.value))
	slider_nu.change(fn=make_regression,
	inputs=[slider_nu, slider_gamma],
	outputs=[plot1, plot2])
	slider_gamma.change(fn=make_regression,
	inputs=[slider_nu, slider_gamma],
	outputs=[plot1, plot2])
	button.click(make_regression, inputs=[slider_nu, slider_gamma], outputs=[plot1, plot2])

	demo.launch()