|
""" |
|
Demo is based on https://scikit-learn.org/stable/auto_examples/feature_selection/plot_rfe_with_cross_validation.html |
|
""" |
|
from sklearn.svm import SVC |
|
from sklearn.datasets import load_digits |
|
from sklearn.feature_selection import RFE |
|
import matplotlib.pyplot as plt |
|
|
|
from sklearn.datasets import make_classification |
|
import matplotlib.pyplot as plt |
|
from sklearn.feature_selection import RFECV |
|
from sklearn.model_selection import StratifiedKFold |
|
from sklearn.linear_model import LogisticRegression |
|
|
|
import gradio as gr |
|
|
|
|
|
def create_classification_data(informative, redundant): |
|
X, y = make_classification( |
|
n_samples=500, |
|
n_features=15, |
|
n_informative=informative, |
|
n_redundant=redundant, |
|
n_repeated=0, |
|
n_classes=8, |
|
n_clusters_per_class=1, |
|
class_sep=0.8, |
|
random_state=0, |
|
) |
|
return X, y |
|
|
|
|
|
def run_rfecv(informative, redundant): |
|
X, y = create_classification_data(informative, redundant) |
|
min_features_to_select = 1 |
|
clf = LogisticRegression() |
|
cv = StratifiedKFold(5) |
|
rfecv = RFECV( |
|
estimator=clf, |
|
step=1, |
|
cv=cv, |
|
scoring="accuracy", |
|
min_features_to_select=min_features_to_select, |
|
n_jobs=2, |
|
) |
|
rfecv.fit(X, y) |
|
|
|
print(f"Optimal number of features: {rfecv.n_features_}") |
|
n_scores = len(rfecv.cv_results_["mean_test_score"]) |
|
fig = plt.figure() |
|
plt.xlabel("Number of features selected") |
|
plt.ylabel("Mean test accuracy") |
|
plt.errorbar( |
|
range(min_features_to_select, n_scores + min_features_to_select), |
|
rfecv.cv_results_["mean_test_score"], |
|
yerr=rfecv.cv_results_["std_test_score"], |
|
) |
|
plt.title("\n Recursive Feature Elimination \nwith correlated features") |
|
return plt |
|
|
|
|
|
title = " Recursive feature elimination with cross-validation " |
|
|
|
with gr.Blocks(title=title) as demo: |
|
gr.Markdown(f"# {title}") |
|
gr.Markdown( |
|
" This example the feature importnace when features have both redundant and useless features using Recursive feature elimination <br>" |
|
" Dataset: A classification set of 500 data points and 15 features in total <br>" |
|
" **Features** <br> <br> **Informative features** : Number of features that actually having the signal to differentiate between classes. <br>" |
|
" **Redundant features** : Number of feature which are just some random linear combinations of informative features.<br>" |
|
) |
|
|
|
gr.Markdown( |
|
" **Note** Total features - (informative features + redundant features) are Useless features. <br>" |
|
) |
|
gr.Markdown( |
|
" Logistic Regression classifier is used as estimator to rank features. <br>" |
|
) |
|
|
|
gr.Markdown( |
|
" **[Demo is based on sklearn docs](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_rfe_with_cross_validation.html)**" |
|
) |
|
with gr.Row(): |
|
informative = gr.Slider( |
|
minimum=0, |
|
maximum=10, |
|
step=1, |
|
value=3, |
|
label="Number of Informative features in data", |
|
) |
|
redundant = gr.Slider( |
|
minimum=0, |
|
maximum=5, |
|
step=1, |
|
value=2, |
|
label="Number of Redundant features in data", |
|
) |
|
|
|
btn = gr.Button(value="Submit") |
|
btn.click( |
|
run_rfecv, |
|
inputs=[informative, redundant], |
|
outputs=gr.Plot(label="RFE with cross validation"), |
|
) |
|
|
|
gr.Markdown( |
|
" Plot demonstrate mean test accuracy for the corresponding feature selected . <br>" |
|
) |
|
gr.Markdown( |
|
" Number of features selected with highest test accuracy will be nearly ~ equal to informative features . <br>" |
|
) |
|
|
|
demo.launch() |
|
|