Spaces:

sklearn-docs
/

Recursive_feature_elimination_with_cross_validation

Sleeping

App Files Files Community

Recursive_feature_elimination_with_cross_validation / app.py

tushifire's picture

Initial Commit

f94b872 about 2 years ago

3.75 kB

	"""
	Demo is based on https://scikit-learn.org/stable/auto_examples/feature_selection/plot_rfe_with_cross_validation.html
	"""
	from sklearn.svm import SVC
	from sklearn.datasets import load_digits
	from sklearn.feature_selection import RFE
	import matplotlib.pyplot as plt

	from sklearn.datasets import make_classification
	import matplotlib.pyplot as plt
	from sklearn.feature_selection import RFECV
	from sklearn.model_selection import StratifiedKFold
	from sklearn.linear_model import LogisticRegression

	import gradio as gr


	def create_classification_data(informative, redundant):
	X, y = make_classification(
	n_samples=500,
	n_features=15,
	n_informative=informative,
	n_redundant=redundant,
	n_repeated=0,
	n_classes=8,
	n_clusters_per_class=1,
	class_sep=0.8,
	random_state=0,
	)
	return X, y


	def run_rfecv(informative, redundant):
	X, y = create_classification_data(informative, redundant)
	min_features_to_select = 1 # Minimum number of features to consider
	clf = LogisticRegression()
	cv = StratifiedKFold(5)
	rfecv = RFECV(
	estimator=clf,
	step=1,
	cv=cv,
	scoring="accuracy",
	min_features_to_select=min_features_to_select,
	n_jobs=2,
	)
	rfecv.fit(X, y)

	print(f"Optimal number of features: {rfecv.n_features_}")
	n_scores = len(rfecv.cv_results_["mean_test_score"])
	fig = plt.figure()
	plt.xlabel("Number of features selected")
	plt.ylabel("Mean test accuracy")
	plt.errorbar(
	range(min_features_to_select, n_scores + min_features_to_select),
	rfecv.cv_results_["mean_test_score"],
	yerr=rfecv.cv_results_["std_test_score"],
	)
	plt.title("\n Recursive Feature Elimination \nwith correlated features")
	return plt


	title = " Recursive feature elimination with cross-validation "

	with gr.Blocks(title=title) as demo:
	gr.Markdown(f"# {title}")
	gr.Markdown(
	" This example the feature importnace when features have both redundant and useless features using Recursive feature elimination <br>"
	" Dataset: A classification set of 500 data points and 15 features in total <br>"
	" Features <br> <br> Informative features : Number of features that actually having the signal to differentiate between classes. <br>"
	" Redundant features : Number of feature which are just some random linear combinations of informative features.<br>"
	)

	gr.Markdown(
	" Note Total features - (informative features + redundant features) are Useless features. <br>"
	)
	gr.Markdown(
	" Logistic Regression classifier is used as estimator to rank features. <br>"
	)

	gr.Markdown(
	" [Demo is based on sklearn docs](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_rfe_with_cross_validation.html)"
	)
	with gr.Row():
	informative = gr.Slider(
	minimum=0,
	maximum=10,
	step=1,
	value=3,
	label="Number of Informative features in data",
	)
	redundant = gr.Slider(
	minimum=0,
	maximum=5,
	step=1,
	value=2,
	label="Number of Redundant features in data",
	)

	btn = gr.Button(value="Submit")
	btn.click(
	run_rfecv,
	inputs=[informative, redundant],
	outputs=gr.Plot(label="RFE with cross validation"),
	)

	gr.Markdown(
	" Plot demonstrate mean test accuracy for the corresponding feature selected . <br>"
	)
	gr.Markdown(
	" Number of features selected with highest test accuracy will be nearly ~ equal to informative features . <br>"
	)

	demo.launch()