tushifire commited on
Commit
f94b872
·
1 Parent(s): c1e274f

Initial Commit

Browse files
Files changed (2) hide show
  1. app.py +113 -0
  2. requirements.txt +1 -0
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Demo is based on https://scikit-learn.org/stable/auto_examples/feature_selection/plot_rfe_with_cross_validation.html
3
+ """
4
+ from sklearn.svm import SVC
5
+ from sklearn.datasets import load_digits
6
+ from sklearn.feature_selection import RFE
7
+ import matplotlib.pyplot as plt
8
+
9
+ from sklearn.datasets import make_classification
10
+ import matplotlib.pyplot as plt
11
+ from sklearn.feature_selection import RFECV
12
+ from sklearn.model_selection import StratifiedKFold
13
+ from sklearn.linear_model import LogisticRegression
14
+
15
+ import gradio as gr
16
+
17
+
18
+ def create_classification_data(informative, redundant):
19
+ X, y = make_classification(
20
+ n_samples=500,
21
+ n_features=15,
22
+ n_informative=informative,
23
+ n_redundant=redundant,
24
+ n_repeated=0,
25
+ n_classes=8,
26
+ n_clusters_per_class=1,
27
+ class_sep=0.8,
28
+ random_state=0,
29
+ )
30
+ return X, y
31
+
32
+
33
+ def run_rfecv(informative, redundant):
34
+ X, y = create_classification_data(informative, redundant)
35
+ min_features_to_select = 1 # Minimum number of features to consider
36
+ clf = LogisticRegression()
37
+ cv = StratifiedKFold(5)
38
+ rfecv = RFECV(
39
+ estimator=clf,
40
+ step=1,
41
+ cv=cv,
42
+ scoring="accuracy",
43
+ min_features_to_select=min_features_to_select,
44
+ n_jobs=2,
45
+ )
46
+ rfecv.fit(X, y)
47
+
48
+ print(f"Optimal number of features: {rfecv.n_features_}")
49
+ n_scores = len(rfecv.cv_results_["mean_test_score"])
50
+ fig = plt.figure()
51
+ plt.xlabel("Number of features selected")
52
+ plt.ylabel("Mean test accuracy")
53
+ plt.errorbar(
54
+ range(min_features_to_select, n_scores + min_features_to_select),
55
+ rfecv.cv_results_["mean_test_score"],
56
+ yerr=rfecv.cv_results_["std_test_score"],
57
+ )
58
+ plt.title("\n Recursive Feature Elimination \nwith correlated features")
59
+ return plt
60
+
61
+
62
+ title = " Recursive feature elimination with cross-validation "
63
+
64
+ with gr.Blocks(title=title) as demo:
65
+ gr.Markdown(f"# {title}")
66
+ gr.Markdown(
67
+ " This example the feature importnace when features have both redundant and useless features using Recursive feature elimination <br>"
68
+ " Dataset: A classification set of 500 data points and 15 features in total <br>"
69
+ " **Features** <br> <br> **Informative features** : Number of features that actually having the signal to differentiate between classes. <br>"
70
+ " **Redundant features** : Number of feature which are just some random linear combinations of informative features.<br>"
71
+ )
72
+
73
+ gr.Markdown(
74
+ " **Note** Total features - (informative features + redundant features) are Useless features. <br>"
75
+ )
76
+ gr.Markdown(
77
+ " Logistic Regression classifier is used as estimator to rank features. <br>"
78
+ )
79
+
80
+ gr.Markdown(
81
+ " **[Demo is based on sklearn docs](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_rfe_with_cross_validation.html)**"
82
+ )
83
+ with gr.Row():
84
+ informative = gr.Slider(
85
+ minimum=0,
86
+ maximum=10,
87
+ step=1,
88
+ value=3,
89
+ label="Number of Informative features in data",
90
+ )
91
+ redundant = gr.Slider(
92
+ minimum=0,
93
+ maximum=5,
94
+ step=1,
95
+ value=2,
96
+ label="Number of Redundant features in data",
97
+ )
98
+
99
+ btn = gr.Button(value="Submit")
100
+ btn.click(
101
+ run_rfecv,
102
+ inputs=[informative, redundant],
103
+ outputs=gr.Plot(label="RFE with cross validation"),
104
+ )
105
+
106
+ gr.Markdown(
107
+ " Plot demonstrate mean test accuracy for the corresponding feature selected . <br>"
108
+ )
109
+ gr.Markdown(
110
+ " Number of features selected with highest test accuracy will be nearly ~ equal to informative features . <br>"
111
+ )
112
+
113
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ scikit-learn==1.2.1