File size: 3,748 Bytes
f94b872
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
"""
Demo is based on https://scikit-learn.org/stable/auto_examples/feature_selection/plot_rfe_with_cross_validation.html
"""
from sklearn.svm import SVC
from sklearn.datasets import load_digits
from sklearn.feature_selection import RFE
import matplotlib.pyplot as plt

from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
from sklearn.feature_selection import RFECV
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression

import gradio as gr


def create_classification_data(informative, redundant):
    X, y = make_classification(
        n_samples=500,
        n_features=15,
        n_informative=informative,
        n_redundant=redundant,
        n_repeated=0,
        n_classes=8,
        n_clusters_per_class=1,
        class_sep=0.8,
        random_state=0,
    )
    return X, y


def run_rfecv(informative, redundant):
    X, y = create_classification_data(informative, redundant)
    min_features_to_select = 1  # Minimum number of features to consider
    clf = LogisticRegression()
    cv = StratifiedKFold(5)
    rfecv = RFECV(
        estimator=clf,
        step=1,
        cv=cv,
        scoring="accuracy",
        min_features_to_select=min_features_to_select,
        n_jobs=2,
    )
    rfecv.fit(X, y)

    print(f"Optimal number of features: {rfecv.n_features_}")
    n_scores = len(rfecv.cv_results_["mean_test_score"])
    fig = plt.figure()
    plt.xlabel("Number of features selected")
    plt.ylabel("Mean test accuracy")
    plt.errorbar(
        range(min_features_to_select, n_scores + min_features_to_select),
        rfecv.cv_results_["mean_test_score"],
        yerr=rfecv.cv_results_["std_test_score"],
    )
    plt.title("\n Recursive Feature Elimination \nwith correlated features")
    return plt


title = " Recursive feature elimination with cross-validation "

with gr.Blocks(title=title) as demo:
    gr.Markdown(f"# {title}")
    gr.Markdown(
        " This example the feature importnace when features have both redundant and useless features using  Recursive feature elimination <br>"
        " Dataset: A classification set of 500 data points and 15 features in total  <br>"
        " **Features** <br> <br> **Informative features** : Number of features that actually having the signal to differentiate between classes. <br>"
        " **Redundant features** : Number of feature which are just some random linear combinations of informative features.<br>"
    )

    gr.Markdown(
        " **Note** Total features - (informative features + redundant features) are Useless features. <br>"
    )
    gr.Markdown(
        " Logistic Regression classifier is used as estimator to rank features. <br>"
    )

    gr.Markdown(
        " **[Demo is based on sklearn docs](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_rfe_with_cross_validation.html)**"
    )
    with gr.Row():
        informative = gr.Slider(
            minimum=0,
            maximum=10,
            step=1,
            value=3,
            label="Number of Informative features in data",
        )
        redundant = gr.Slider(
            minimum=0,
            maximum=5,
            step=1,
            value=2,
            label="Number of Redundant features in data",
        )

    btn = gr.Button(value="Submit")
    btn.click(
        run_rfecv,
        inputs=[informative, redundant],
        outputs=gr.Plot(label="RFE with cross validation"),
    )

    gr.Markdown(
        " Plot demonstrate mean test accuracy for the corresponding feature selected . <br>"
    )
    gr.Markdown(
        " Number of features selected with highest test accuracy will be  nearly ~ equal to informative features . <br>"
    )

demo.launch()