Spaces:

sklearn-docs
/

Recursive_feature_elimination_with_cross_validation

Sleeping

Recursive_feature_elimination_with_cross_validation

File size: 3,748 Bytes

f94b872

"""
Demo is based on https://scikit-learn.org/stable/auto_examples/feature_selection/plot_rfe_with_cross_validation.html
"""
from sklearn.svm import SVC
from sklearn.datasets import load_digits
from sklearn.feature_selection import RFE
import matplotlib.pyplot as plt

from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
from sklearn.feature_selection import RFECV
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression

import gradio as gr


def create_classification_data(informative, redundant):
    X, y = make_classification(
        n_samples=500,
        n_features=15,
        n_informative=informative,
        n_redundant=redundant,
        n_repeated=0,
        n_classes=8,
        n_clusters_per_class=1,
        class_sep=0.8,
        random_state=0,
    )
    return X, y


def run_rfecv(informative, redundant):
    X, y = create_classification_data(informative, redundant)
    min_features_to_select = 1  # Minimum number of features to consider
    clf = LogisticRegression()
    cv = StratifiedKFold(5)
    rfecv = RFECV(
        estimator=clf,
        step=1,
        cv=cv,
        scoring="accuracy",
        min_features_to_select=min_features_to_select,
        n_jobs=2,
    )
    rfecv.fit(X, y)

    print(f"Optimal number of features: {rfecv.n_features_}")
    n_scores = len(rfecv.cv_results_["mean_test_score"])
    fig = plt.figure()
    plt.xlabel("Number of features selected")
    plt.ylabel("Mean test accuracy")
    plt.errorbar(
        range(min_features_to_select, n_scores + min_features_to_select),
        rfecv.cv_results_["mean_test_score"],
        yerr=rfecv.cv_results_["std_test_score"],
    )
    plt.title("\n Recursive Feature Elimination \nwith correlated features")
    return plt


title = " Recursive feature elimination with cross-validation "

with gr.Blocks(title=title) as demo:
    gr.Markdown(f"# {title}")
    gr.Markdown(
        " This example the feature importnace when features have both redundant and useless features using  Recursive feature elimination <br>"
        " Dataset: A classification set of 500 data points and 15 features in total  <br>"
        " **Features** <br> <br> **Informative features** : Number of features that actually having the signal to differentiate between classes. <br>"
        " **Redundant features** : Number of feature which are just some random linear combinations of informative features.<br>"
    )

    gr.Markdown(
        " **Note** Total features - (informative features + redundant features) are Useless features. <br>"
    )
    gr.Markdown(
        " Logistic Regression classifier is used as estimator to rank features. <br>"
    )

    gr.Markdown(
        " **[Demo is based on sklearn docs](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_rfe_with_cross_validation.html)**"
    )
    with gr.Row():
        informative = gr.Slider(
            minimum=0,
            maximum=10,
            step=1,
            value=3,
            label="Number of Informative features in data",
        )
        redundant = gr.Slider(
            minimum=0,
            maximum=5,
            step=1,
            value=2,
            label="Number of Redundant features in data",
        )

    btn = gr.Button(value="Submit")
    btn.click(
        run_rfecv,
        inputs=[informative, redundant],
        outputs=gr.Plot(label="RFE with cross validation"),
    )

    gr.Markdown(
        " Plot demonstrate mean test accuracy for the corresponding feature selected . <br>"
    )
    gr.Markdown(
        " Number of features selected with highest test accuracy will be  nearly ~ equal to informative features . <br>"
    )

demo.launch()