Spaces:

sklearn-docs
/

underfit_vs_overfit

Runtime error

App Files Files Community

underfit_vs_overfit / app.py

bvk1ng

Initial Commit

571eb00 about 2 years ago

raw

history blame contribute delete

2.96 kB

	import numpy as np
	import gradio as gr
	import matplotlib.pyplot as plt

	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import PolynomialFeatures
	from sklearn.linear_model import LinearRegression
	from sklearn.model_selection import cross_val_score

	plt.switch_backend("agg")

	def true_fn(X):
	return np.cos(1.5 * np.pi * X)


	def modelData(n_samples: int, degree: int, cv: int) -> "plt.Figure":
	"""
	This function demonstrate the principle of overfitting vs underfitting by
	modeling a dataset using Linear Regression.

	:param n_samples: the number of samples required in the data.
	:param degree: the number of degrees for the polynomial features.

	:returns: the matplotlib figures
	"""

	X = np.sort(np.random.rand(n_samples))
	y = true_fn(X) + np.random.randn(n_samples) * .1

	fig, ax = plt.subplots(1, 1, figsize=(24, 15))

	poly_feats = PolynomialFeatures(degree=degree, include_bias=False)
	model = LinearRegression()

	pipeline = Pipeline([
	("polynomial_feats", poly_feats),
	("lr", model)
	])

	pipeline.fit(X[:, np.newaxis], y)
	scores = cross_val_score(
	pipeline, X[:, np.newaxis], y, scoring="neg_mean_squared_error", cv=cv
	)

	X_test = np.linspace(0, 1, 1000)

	ax.plot(X_test, pipeline.predict(X_test[:, np.newaxis]), "--", linewidth=2.5, color="#C73E1D", label="Model")
	ax.plot(X_test, true_fn(X_test), linewidth=2.5, color="#2E86AB", label="True function")
	ax.scatter(X, y, s=20, alpha=.75, edgecolors="#3B1F2B", label="Samples")
	ax.set_xlabel("x")
	ax.set_ylabel("y")
	ax.set_xticks(())
	ax.set_yticks(())
	ax.set_xlim((0, 1))
	ax.set_ylim((-2, 2))
	ax.legend(loc="best")
	ax.set_title(f"Degree : {degree} \n MSE: {-scores.mean():.2e}(+/- {scores.std():.2e})")

	return fig


	with gr.Blocks() as demo:

	gr.Markdown("""
	# Underfitting vs Overfitting

	This space is a re-implementation of the original scikit-learn docs [Underfitting vs Overfitting](https://scikit-learn.org/stable/auto_examples/model_selection/plot_underfitting_overfitting.html#sphx-glr-auto-examples-model-selection-plot-underfitting-overfitting-py)
	In this space you can vary the sliders to get a picture of what an underfitted model looks like and what an overfitted model looks like.
	If you want more details you can always head onto the scikit-learn doc mentioned above.

	Have fun enjoying the tool 🤗
	""")

	n_samples = gr.Slider(30, 10_000, label="n_samples", info="number of samples", step=1, value=100)
	degree = gr.Slider(1, 20, label="degree", info="the polynomial features degree", step=1, value=4)
	cv = gr.Slider(1, 10, label="cv", info="number of cross-validation to run", step=1, value=5)

	output = gr.Plot(label="Plot")

	btn = gr.Button("Show")
	btn.click(fn=modelData, inputs=[n_samples, degree, cv], outputs=output, api_name="overfitunderfit")