scikit-learn
diff --git a/‎dev/.buildinfo
+1-1 b/‎dev/.buildinfo
+1-1
diff --git a/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
1.82 KB b/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
1.82 KB
diff --git a/‎dev/_downloads/53e76f761ef04e8d06fa5757554513b0/plot_select_from_model_diabetes.py
+51-3 b/‎dev/_downloads/53e76f761ef04e8d06fa5757554513b0/plot_select_from_model_diabetes.py
+51-3
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: ddf766342de275afbccede952d4a0601
+config: 28b7307cedb45eb961cbd8228e144f2a
 tags: 645f666f9bcd5a90fca523b33c5a78b7
@@ -122,9 +122,6 @@
 print(f"Done in {toc_bwd - tic_bwd:.3f}s")
 
 # %%
-# Discussion
-# ----------
-#
 # Interestingly, forward and backward selection have selected the same set of
 # features. In general, this isn't the case and the two methods would lead to
 # different results.
@@ -145,3 +142,54 @@
 # attribute. The forward SFS is faster than the backward SFS because it only
 # needs to perform `n_features_to_select = 2` iterations, while the backward
 # SFS needs to perform `n_features - n_features_to_select = 8` iterations.
+#
+# Using negative tolerance values
+# -------------------------------
+#
+# :class:`~sklearn.feature_selection.SequentialFeatureSelector` can be used
+# to remove features present in the dataset and return a
+# smaller subset of the original features with `direction="backward"`
+# and a negative value of `tol`.
+#
+# We begin by loading the Breast Cancer dataset, consisting of 30 different
+# features and 569 samples.
+import numpy as np
+
+from sklearn.datasets import load_breast_cancer
+
+breast_cancer_data = load_breast_cancer()
+X, y = breast_cancer_data.data, breast_cancer_data.target
+feature_names = np.array(breast_cancer_data.feature_names)
+print(breast_cancer_data.DESCR)
+
+# %%
+# We will make use of the :class:`~sklearn.linear_model.LogisticRegression`
+# estimator with :class:`~sklearn.feature_selection.SequentialFeatureSelector`
+# to perform the feature selection.
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import roc_auc_score
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
+
+for tol in [-1e-2, -1e-3, -1e-4]:
+    start = time()
+    feature_selector = SequentialFeatureSelector(
+        LogisticRegression(),
+        n_features_to_select="auto",
+        direction="backward",
+        scoring="roc_auc",
+        tol=tol,
+        n_jobs=2,
+    )
+    model = make_pipeline(StandardScaler(), feature_selector, LogisticRegression())
+    model.fit(X, y)
+    end = time()
+    print(f"\ntol: {tol}")
+    print(f"Features selected: {feature_names[model[1].get_support()]}")
+    print(f"ROC AUC score: {roc_auc_score(y, model.predict_proba(X)[:, 1]):.3f}")
+    print(f"Done in {end - start:.3f}s")
+
+# %%
+# We can see that the number of features selected tend to increase as negative
+# values of `tol` approach to zero. The time taken for feature selection also
+# decreases as the values of `tol` come closer to zero.