bvk1ng's picture
Initial Commit
571eb00
raw
history blame
2.96 kB
import numpy as np
import gradio as gr
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
plt.switch_backend("agg")
def true_fn(X):
return np.cos(1.5 * np.pi * X)
def modelData(n_samples: int, degree: int, cv: int) -> "plt.Figure":
"""
This function demonstrate the principle of overfitting vs underfitting by
modeling a dataset using Linear Regression.
:param n_samples: the number of samples required in the data.
:param degree: the number of degrees for the polynomial features.
:returns: the matplotlib figures
"""
X = np.sort(np.random.rand(n_samples))
y = true_fn(X) + np.random.randn(n_samples) * .1
fig, ax = plt.subplots(1, 1, figsize=(24, 15))
poly_feats = PolynomialFeatures(degree=degree, include_bias=False)
model = LinearRegression()
pipeline = Pipeline([
("polynomial_feats", poly_feats),
("lr", model)
])
pipeline.fit(X[:, np.newaxis], y)
scores = cross_val_score(
pipeline, X[:, np.newaxis], y, scoring="neg_mean_squared_error", cv=cv
)
X_test = np.linspace(0, 1, 1000)
ax.plot(X_test, pipeline.predict(X_test[:, np.newaxis]), "--", linewidth=2.5, color="#C73E1D", label="Model")
ax.plot(X_test, true_fn(X_test), linewidth=2.5, color="#2E86AB", label="True function")
ax.scatter(X, y, s=20, alpha=.75, edgecolors="#3B1F2B", label="Samples")
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_xticks(())
ax.set_yticks(())
ax.set_xlim((0, 1))
ax.set_ylim((-2, 2))
ax.legend(loc="best")
ax.set_title(f"Degree : {degree} \n MSE: {-scores.mean():.2e}(+/- {scores.std():.2e})")
return fig
with gr.Blocks() as demo:
gr.Markdown("""
# Underfitting vs Overfitting
This space is a re-implementation of the original scikit-learn docs [Underfitting vs Overfitting](https://scikit-learn.org/stable/auto_examples/model_selection/plot_underfitting_overfitting.html#sphx-glr-auto-examples-model-selection-plot-underfitting-overfitting-py)
In this space you can vary the sliders to get a picture of what an **underfitted** model looks like and what an **overfitted** model looks like.
If you want more details you can always head onto the scikit-learn doc mentioned above.
Have fun enjoying the tool 🤗
""")
n_samples = gr.Slider(30, 10_000, label="n_samples", info="number of samples", step=1, value=100)
degree = gr.Slider(1, 20, label="degree", info="the polynomial features degree", step=1, value=4)
cv = gr.Slider(1, 10, label="cv", info="number of cross-validation to run", step=1, value=5)
output = gr.Plot(label="Plot")
btn = gr.Button("Show")
btn.click(fn=modelData, inputs=[n_samples, degree, cv], outputs=output, api_name="overfitunderfit")