Spaces:
Runtime error
Runtime error
File size: 2,957 Bytes
571eb00 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import numpy as np
import gradio as gr
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
plt.switch_backend("agg")
def true_fn(X):
return np.cos(1.5 * np.pi * X)
def modelData(n_samples: int, degree: int, cv: int) -> "plt.Figure":
"""
This function demonstrate the principle of overfitting vs underfitting by
modeling a dataset using Linear Regression.
:param n_samples: the number of samples required in the data.
:param degree: the number of degrees for the polynomial features.
:returns: the matplotlib figures
"""
X = np.sort(np.random.rand(n_samples))
y = true_fn(X) + np.random.randn(n_samples) * .1
fig, ax = plt.subplots(1, 1, figsize=(24, 15))
poly_feats = PolynomialFeatures(degree=degree, include_bias=False)
model = LinearRegression()
pipeline = Pipeline([
("polynomial_feats", poly_feats),
("lr", model)
])
pipeline.fit(X[:, np.newaxis], y)
scores = cross_val_score(
pipeline, X[:, np.newaxis], y, scoring="neg_mean_squared_error", cv=cv
)
X_test = np.linspace(0, 1, 1000)
ax.plot(X_test, pipeline.predict(X_test[:, np.newaxis]), "--", linewidth=2.5, color="#C73E1D", label="Model")
ax.plot(X_test, true_fn(X_test), linewidth=2.5, color="#2E86AB", label="True function")
ax.scatter(X, y, s=20, alpha=.75, edgecolors="#3B1F2B", label="Samples")
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_xticks(())
ax.set_yticks(())
ax.set_xlim((0, 1))
ax.set_ylim((-2, 2))
ax.legend(loc="best")
ax.set_title(f"Degree : {degree} \n MSE: {-scores.mean():.2e}(+/- {scores.std():.2e})")
return fig
with gr.Blocks() as demo:
gr.Markdown("""
# Underfitting vs Overfitting
This space is a re-implementation of the original scikit-learn docs [Underfitting vs Overfitting](https://scikit-learn.org/stable/auto_examples/model_selection/plot_underfitting_overfitting.html#sphx-glr-auto-examples-model-selection-plot-underfitting-overfitting-py)
In this space you can vary the sliders to get a picture of what an **underfitted** model looks like and what an **overfitted** model looks like.
If you want more details you can always head onto the scikit-learn doc mentioned above.
Have fun enjoying the tool 🤗
""")
n_samples = gr.Slider(30, 10_000, label="n_samples", info="number of samples", step=1, value=100)
degree = gr.Slider(1, 20, label="degree", info="the polynomial features degree", step=1, value=4)
cv = gr.Slider(1, 10, label="cv", info="number of cross-validation to run", step=1, value=5)
output = gr.Plot(label="Plot")
btn = gr.Button("Show")
btn.click(fn=modelData, inputs=[n_samples, degree, cv], outputs=output, api_name="overfitunderfit")
|