Skip to content

Commit de814cf

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 9e09e4d6f434649e1d56e7262680e02ecc1e6aa2
1 parent bd3b1b7 commit de814cf

File tree

1,302 files changed

+6321
-5739
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,302 files changed

+6321
-5739
lines changed

dev/.buildinfo

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: ddf766342de275afbccede952d4a0601
3+
config: 28b7307cedb45eb961cbd8228e144f2a
44
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file not shown.

dev/_downloads/53e76f761ef04e8d06fa5757554513b0/plot_select_from_model_diabetes.py

+51-3
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,6 @@
122122
print(f"Done in {toc_bwd - tic_bwd:.3f}s")
123123

124124
# %%
125-
# Discussion
126-
# ----------
127-
#
128125
# Interestingly, forward and backward selection have selected the same set of
129126
# features. In general, this isn't the case and the two methods would lead to
130127
# different results.
@@ -145,3 +142,54 @@
145142
# attribute. The forward SFS is faster than the backward SFS because it only
146143
# needs to perform `n_features_to_select = 2` iterations, while the backward
147144
# SFS needs to perform `n_features - n_features_to_select = 8` iterations.
145+
#
146+
# Using negative tolerance values
147+
# -------------------------------
148+
#
149+
# :class:`~sklearn.feature_selection.SequentialFeatureSelector` can be used
150+
# to remove features present in the dataset and return a
151+
# smaller subset of the original features with `direction="backward"`
152+
# and a negative value of `tol`.
153+
#
154+
# We begin by loading the Breast Cancer dataset, consisting of 30 different
155+
# features and 569 samples.
156+
import numpy as np
157+
158+
from sklearn.datasets import load_breast_cancer
159+
160+
breast_cancer_data = load_breast_cancer()
161+
X, y = breast_cancer_data.data, breast_cancer_data.target
162+
feature_names = np.array(breast_cancer_data.feature_names)
163+
print(breast_cancer_data.DESCR)
164+
165+
# %%
166+
# We will make use of the :class:`~sklearn.linear_model.LogisticRegression`
167+
# estimator with :class:`~sklearn.feature_selection.SequentialFeatureSelector`
168+
# to perform the feature selection.
169+
from sklearn.linear_model import LogisticRegression
170+
from sklearn.metrics import roc_auc_score
171+
from sklearn.pipeline import make_pipeline
172+
from sklearn.preprocessing import StandardScaler
173+
174+
for tol in [-1e-2, -1e-3, -1e-4]:
175+
start = time()
176+
feature_selector = SequentialFeatureSelector(
177+
LogisticRegression(),
178+
n_features_to_select="auto",
179+
direction="backward",
180+
scoring="roc_auc",
181+
tol=tol,
182+
n_jobs=2,
183+
)
184+
model = make_pipeline(StandardScaler(), feature_selector, LogisticRegression())
185+
model.fit(X, y)
186+
end = time()
187+
print(f"\ntol: {tol}")
188+
print(f"Features selected: {feature_names[model[1].get_support()]}")
189+
print(f"ROC AUC score: {roc_auc_score(y, model.predict_proba(X)[:, 1]):.3f}")
190+
print(f"Done in {end - start:.3f}s")
191+
192+
# %%
193+
# We can see that the number of features selected tend to increase as negative
194+
# values of `tol` approach to zero. The time taken for feature selection also
195+
# decreases as the values of `tol` come closer to zero.

0 commit comments

Comments
 (0)