tushifire's picture
Initial Commit
f94b872
raw
history blame
3.75 kB
"""
Demo is based on https://scikit-learn.org/stable/auto_examples/feature_selection/plot_rfe_with_cross_validation.html
"""
from sklearn.svm import SVC
from sklearn.datasets import load_digits
from sklearn.feature_selection import RFE
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
from sklearn.feature_selection import RFECV
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
import gradio as gr
def create_classification_data(informative, redundant):
X, y = make_classification(
n_samples=500,
n_features=15,
n_informative=informative,
n_redundant=redundant,
n_repeated=0,
n_classes=8,
n_clusters_per_class=1,
class_sep=0.8,
random_state=0,
)
return X, y
def run_rfecv(informative, redundant):
X, y = create_classification_data(informative, redundant)
min_features_to_select = 1 # Minimum number of features to consider
clf = LogisticRegression()
cv = StratifiedKFold(5)
rfecv = RFECV(
estimator=clf,
step=1,
cv=cv,
scoring="accuracy",
min_features_to_select=min_features_to_select,
n_jobs=2,
)
rfecv.fit(X, y)
print(f"Optimal number of features: {rfecv.n_features_}")
n_scores = len(rfecv.cv_results_["mean_test_score"])
fig = plt.figure()
plt.xlabel("Number of features selected")
plt.ylabel("Mean test accuracy")
plt.errorbar(
range(min_features_to_select, n_scores + min_features_to_select),
rfecv.cv_results_["mean_test_score"],
yerr=rfecv.cv_results_["std_test_score"],
)
plt.title("\n Recursive Feature Elimination \nwith correlated features")
return plt
title = " Recursive feature elimination with cross-validation "
with gr.Blocks(title=title) as demo:
gr.Markdown(f"# {title}")
gr.Markdown(
" This example the feature importnace when features have both redundant and useless features using Recursive feature elimination <br>"
" Dataset: A classification set of 500 data points and 15 features in total <br>"
" **Features** <br> <br> **Informative features** : Number of features that actually having the signal to differentiate between classes. <br>"
" **Redundant features** : Number of feature which are just some random linear combinations of informative features.<br>"
)
gr.Markdown(
" **Note** Total features - (informative features + redundant features) are Useless features. <br>"
)
gr.Markdown(
" Logistic Regression classifier is used as estimator to rank features. <br>"
)
gr.Markdown(
" **[Demo is based on sklearn docs](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_rfe_with_cross_validation.html)**"
)
with gr.Row():
informative = gr.Slider(
minimum=0,
maximum=10,
step=1,
value=3,
label="Number of Informative features in data",
)
redundant = gr.Slider(
minimum=0,
maximum=5,
step=1,
value=2,
label="Number of Redundant features in data",
)
btn = gr.Button(value="Submit")
btn.click(
run_rfecv,
inputs=[informative, redundant],
outputs=gr.Plot(label="RFE with cross validation"),
)
gr.Markdown(
" Plot demonstrate mean test accuracy for the corresponding feature selected . <br>"
)
gr.Markdown(
" Number of features selected with highest test accuracy will be nearly ~ equal to informative features . <br>"
)
demo.launch()