Spaces:
Runtime error
Runtime error
import numpy as np | |
import gradio as gr | |
import matplotlib.pyplot as plt | |
from sklearn.pipeline import Pipeline | |
from sklearn.preprocessing import PolynomialFeatures | |
from sklearn.linear_model import LinearRegression | |
from sklearn.model_selection import cross_val_score | |
plt.switch_backend("agg") | |
def true_fn(X): | |
return np.cos(1.5 * np.pi * X) | |
def modelData(n_samples: int, degree: int, cv: int) -> "plt.Figure": | |
""" | |
This function demonstrate the principle of overfitting vs underfitting by | |
modeling a dataset using Linear Regression. | |
:param n_samples: the number of samples required in the data. | |
:param degree: the number of degrees for the polynomial features. | |
:returns: the matplotlib figures | |
""" | |
X = np.sort(np.random.rand(n_samples)) | |
y = true_fn(X) + np.random.randn(n_samples) * .1 | |
fig, ax = plt.subplots(1, 1, figsize=(24, 15)) | |
poly_feats = PolynomialFeatures(degree=degree, include_bias=False) | |
model = LinearRegression() | |
pipeline = Pipeline([ | |
("polynomial_feats", poly_feats), | |
("lr", model) | |
]) | |
pipeline.fit(X[:, np.newaxis], y) | |
scores = cross_val_score( | |
pipeline, X[:, np.newaxis], y, scoring="neg_mean_squared_error", cv=cv | |
) | |
X_test = np.linspace(0, 1, 1000) | |
ax.plot(X_test, pipeline.predict(X_test[:, np.newaxis]), "--", linewidth=2.5, color="#C73E1D", label="Model") | |
ax.plot(X_test, true_fn(X_test), linewidth=2.5, color="#2E86AB", label="True function") | |
ax.scatter(X, y, s=20, alpha=.75, edgecolors="#3B1F2B", label="Samples") | |
ax.set_xlabel("x") | |
ax.set_ylabel("y") | |
ax.set_xticks(()) | |
ax.set_yticks(()) | |
ax.set_xlim((0, 1)) | |
ax.set_ylim((-2, 2)) | |
ax.legend(loc="best") | |
ax.set_title(f"Degree : {degree} \n MSE: {-scores.mean():.2e}(+/- {scores.std():.2e})") | |
return fig | |
with gr.Blocks() as demo: | |
gr.Markdown(""" | |
# Underfitting vs Overfitting | |
This space is a re-implementation of the original scikit-learn docs [Underfitting vs Overfitting](https://scikit-learn.org/stable/auto_examples/model_selection/plot_underfitting_overfitting.html#sphx-glr-auto-examples-model-selection-plot-underfitting-overfitting-py) | |
In this space you can vary the sliders to get a picture of what an **underfitted** model looks like and what an **overfitted** model looks like. | |
If you want more details you can always head onto the scikit-learn doc mentioned above. | |
Have fun enjoying the tool 🤗 | |
""") | |
n_samples = gr.Slider(30, 10_000, label="n_samples", info="number of samples", step=1, value=100) | |
degree = gr.Slider(1, 20, label="degree", info="the polynomial features degree", step=1, value=4) | |
cv = gr.Slider(1, 10, label="cv", info="number of cross-validation to run", step=1, value=5) | |
output = gr.Plot(label="Plot") | |
btn = gr.Button("Show") | |
btn.click(fn=modelData, inputs=[n_samples, degree, cv], outputs=output, api_name="overfitunderfit") | |