scikit-learn
diff --git a/‎dev/_downloads/02192f99342d6d1323161978b6c80bfc/plot_ols_ridge.zip
0 Bytes b/‎dev/_downloads/02192f99342d6d1323161978b6c80bfc/plot_ols_ridge.zip
0 Bytes
diff --git a/‎dev/_downloads/02fe21a1f5d14bd1ebbe34aa905d9bf6/plot_multilabel.zip
0 Bytes b/‎dev/_downloads/02fe21a1f5d14bd1ebbe34aa905d9bf6/plot_multilabel.zip
0 Bytes
diff --git a/‎dev/_downloads/0358b1921962fd7c6f5a94c5abc91476/plot_hashing_vs_dict_vectorizer.zip
0 Bytes b/‎dev/_downloads/0358b1921962fd7c6f5a94c5abc91476/plot_hashing_vs_dict_vectorizer.zip
0 Bytes
diff --git a/‎dev/_downloads/038d1885eb5f5ea53aca42da3031fe38/plot_document_clustering.zip
0 Bytes b/‎dev/_downloads/038d1885eb5f5ea53aca42da3031fe38/plot_document_clustering.zip
0 Bytes
diff --git a/‎dev/_downloads/03c018a16384c69f3a89e473650d57ee/plot_svm_margin.zip
0 Bytes b/‎dev/_downloads/03c018a16384c69f3a89e473650d57ee/plot_svm_margin.zip
0 Bytes
diff --git a/‎dev/_downloads/04b9a7769df5b331f4d94e1d065b4311/plot_voting_decision_regions.zip
0 Bytes b/‎dev/_downloads/04b9a7769df5b331f4d94e1d065b4311/plot_voting_decision_regions.zip
0 Bytes
diff --git a/‎dev/_downloads/053b58bbfc8177072856c743b2c93424/plot_varimax_fa.zip
0 Bytes b/‎dev/_downloads/053b58bbfc8177072856c743b2c93424/plot_varimax_fa.zip
0 Bytes
diff --git a/‎dev/_downloads/067cd5d39b097d2c49dd98f563dac13a/plot_iterative_imputer_variants_comparison.ipynb
+1-1 b/‎dev/_downloads/067cd5d39b097d2c49dd98f563dac13a/plot_iterative_imputer_variants_comparison.ipynb
+1-1
diff --git a/‎dev/_downloads/06c18f4675ecd124f98537d05e10abd0/plot_nca_dim_reduction.zip
0 Bytes b/‎dev/_downloads/06c18f4675ecd124f98537d05e10abd0/plot_nca_dim_reduction.zip
0 Bytes
diff --git a/‎dev/_downloads/06e5cfec52777b1f220312b4ada9469d/plot_document_classification_20newsgroups.zip
0 Bytes b/‎dev/_downloads/06e5cfec52777b1f220312b4ada9469d/plot_document_classification_20newsgroups.zip
0 Bytes
diff --git a/‎dev/_downloads/0785ea6d45bde062e5beedda88131215/plot_release_highlights_1_3_0.ipynb
+5-5 b/‎dev/_downloads/0785ea6d45bde062e5beedda88131215/plot_release_highlights_1_3_0.ipynb
+5-5
diff --git a/‎dev/_downloads/07882f339cdda68a73e6335f34418af6/plot_theilsen.zip
0 Bytes b/‎dev/_downloads/07882f339cdda68a73e6335f34418af6/plot_theilsen.zip
0 Bytes
diff --git a/‎dev/_downloads/07f60cdd0b8c83dd80140be675140b02/plot_face_compress.zip
0 Bytes b/‎dev/_downloads/07f60cdd0b8c83dd80140be675140b02/plot_face_compress.zip
0 Bytes
diff --git a/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
135 Bytes b/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
135 Bytes
diff --git a/‎dev/_downloads/0919b8971cbb94ffb3b06c80621a3283/plot_label_propagation_structure.zip
0 Bytes b/‎dev/_downloads/0919b8971cbb94ffb3b06c80621a3283/plot_label_propagation_structure.zip
0 Bytes
diff --git a/‎dev/_downloads/0a3f5445678ab43e77a0d7602ce7e53a/plot_nca_illustration.zip
0 Bytes b/‎dev/_downloads/0a3f5445678ab43e77a0d7602ce7e53a/plot_nca_illustration.zip
0 Bytes
diff --git a/‎dev/_downloads/0afbdae4d5c21dfbf1e7f98e95e973c5/plot_separating_hyperplane.zip
0 Bytes b/‎dev/_downloads/0afbdae4d5c21dfbf1e7f98e95e973c5/plot_separating_hyperplane.zip
0 Bytes
diff --git a/‎dev/_downloads/0b03eb096c3d32195335ba930a0b3662/plot_digits_agglomeration.zip
0 Bytes b/‎dev/_downloads/0b03eb096c3d32195335ba930a0b3662/plot_digits_agglomeration.zip
0 Bytes
diff --git a/‎dev/_downloads/0b569e153dc27fb6eeb5063e271e8921/plot_rfe_digits.zip
0 Bytes b/‎dev/_downloads/0b569e153dc27fb6eeb5063e271e8921/plot_rfe_digits.zip
0 Bytes
diff --git a/‎dev/_downloads/1003e2b79d0fc902b8f0cbd0f449fd06/plot_birch_vs_minibatchkmeans.zip
0 Bytes b/‎dev/_downloads/1003e2b79d0fc902b8f0cbd0f449fd06/plot_birch_vs_minibatchkmeans.zip
0 Bytes
diff --git a/‎dev/_downloads/11651f4eebcf15ff7959e210e82dc45c/plot_linear_model_coefficient_interpretation.zip
0 Bytes b/‎dev/_downloads/11651f4eebcf15ff7959e210e82dc45c/plot_linear_model_coefficient_interpretation.zip
0 Bytes
diff --git a/‎dev/_downloads/1180cd432166ec9764125420df10ec77/plot_poisson_regression_non_normal_loss.zip
0 Bytes b/‎dev/_downloads/1180cd432166ec9764125420df10ec77/plot_poisson_regression_non_normal_loss.zip
0 Bytes
diff --git a/‎dev/_downloads/129e49615b0c1e7782384456dd7c3ec3/plot_rfe_with_cross_validation.zip
0 Bytes b/‎dev/_downloads/129e49615b0c1e7782384456dd7c3ec3/plot_rfe_with_cross_validation.zip
0 Bytes
diff --git a/‎dev/_downloads/12fb3a51f4f60a9be2386b10ccc076cf/plot_gpr_noisy.zip
0 Bytes b/‎dev/_downloads/12fb3a51f4f60a9be2386b10ccc076cf/plot_gpr_noisy.zip
0 Bytes
diff --git a/‎dev/_downloads/138e7c706c17949c3098ff8074b03ce7/plot_release_highlights_1_2_0.py
+6-4 b/‎dev/_downloads/138e7c706c17949c3098ff8074b03ce7/plot_release_highlights_1_2_0.py
+6-4
diff --git a/‎dev/_downloads/1393861b58df827d4c681b80a5be2472/plot_kmeans_digits.zip
0 Bytes b/‎dev/_downloads/1393861b58df827d4c681b80a5be2472/plot_kmeans_digits.zip
0 Bytes
diff --git a/‎dev/_downloads/151e78f803ff9f3154dd7dbbaf45e0b0/plot_release_highlights_1_6_0.zip
20 Bytes b/‎dev/_downloads/151e78f803ff9f3154dd7dbbaf45e0b0/plot_release_highlights_1_6_0.zip
20 Bytes
diff --git a/‎dev/_downloads/1548afe7d3147cdbff3b5cba88ffc124/plot_sparse_coding.zip
0 Bytes b/‎dev/_downloads/1548afe7d3147cdbff3b5cba88ffc124/plot_sparse_coding.zip
0 Bytes
diff --git a/‎dev/_downloads/161b401efba4b01936c9871f71688386/plot_species_kde.zip
0 Bytes b/‎dev/_downloads/161b401efba4b01936c9871f71688386/plot_species_kde.zip
0 Bytes
diff --git a/‎dev/_downloads/1630d9144544d31ca40f70d5ce894b1c/plot_roc_curve_visualization_api.zip
0 Bytes b/‎dev/_downloads/1630d9144544d31ca40f70d5ce894b1c/plot_roc_curve_visualization_api.zip
0 Bytes
diff --git a/‎dev/_downloads/175dc7cd79fedbe3d38829513430275a/plot_forest_importances.zip
0 Bytes b/‎dev/_downloads/175dc7cd79fedbe3d38829513430275a/plot_forest_importances.zip
0 Bytes
diff --git a/‎dev/_downloads/1929d77b217d252c180ea45cab60f1c0/plot_kmeans_assumptions.zip
0 Bytes b/‎dev/_downloads/1929d77b217d252c180ea45cab60f1c0/plot_kmeans_assumptions.zip
0 Bytes
diff --git a/‎dev/_downloads/1969c243dbe2c542ebd3887f2ecde211/plot_coin_ward_segmentation.zip
0 Bytes b/‎dev/_downloads/1969c243dbe2c542ebd3887f2ecde211/plot_coin_ward_segmentation.zip
0 Bytes
diff --git a/‎dev/_downloads/1acfe5256fc92dfaed2c1033304ed297/plot_gradient_boosting_regularization.zip
0 Bytes b/‎dev/_downloads/1acfe5256fc92dfaed2c1033304ed297/plot_gradient_boosting_regularization.zip
0 Bytes
diff --git a/‎dev/_downloads/1b3f17ff0f112d5b77cbdb90f1c17046/plot_set_output.py
+1-1 b/‎dev/_downloads/1b3f17ff0f112d5b77cbdb90f1c17046/plot_set_output.py
+1-1
diff --git a/‎dev/_downloads/1c8e15dadd5530db0ffab60f34a29a64/plot_discretization_classification.zip
0 Bytes b/‎dev/_downloads/1c8e15dadd5530db0ffab60f34a29a64/plot_discretization_classification.zip
0 Bytes
diff --git a/‎dev/_downloads/1c9575e30c88815f6de0280064b2c594/plot_missing_values.zip
12 Bytes b/‎dev/_downloads/1c9575e30c88815f6de0280064b2c594/plot_missing_values.zip
12 Bytes
diff --git a/‎dev/_downloads/1e308ad8f0f51110ea07c8d3b552c5bf/plot_pcr_vs_pls.zip
0 Bytes b/‎dev/_downloads/1e308ad8f0f51110ea07c8d3b552c5bf/plot_pcr_vs_pls.zip
0 Bytes
diff --git a/‎dev/_downloads/1e69769246e8d916462fd0c67f19c605/plot_set_output.zip
8 Bytes b/‎dev/_downloads/1e69769246e8d916462fd0c67f19c605/plot_set_output.zip
8 Bytes
diff --git a/‎dev/_downloads/1fffb175865f0e684095ddb70724914b/plot_release_highlights_1_5_0.zip
8 Bytes b/‎dev/_downloads/1fffb175865f0e684095ddb70724914b/plot_release_highlights_1_5_0.zip
8 Bytes
diff --git a/‎dev/_downloads/200e4f5a26d62475b041ec6d03eca0a6/plot_logistic_multinomial.zip
0 Bytes b/‎dev/_downloads/200e4f5a26d62475b041ec6d03eca0a6/plot_logistic_multinomial.zip
0 Bytes
diff --git a/‎dev/_downloads/207f6ccc40c29c40986fcdad2dcf463f/plot_digits_kde_sampling.zip
0 Bytes b/‎dev/_downloads/207f6ccc40c29c40986fcdad2dcf463f/plot_digits_kde_sampling.zip
0 Bytes
diff --git a/‎dev/_downloads/20976afc231c41d12cde1932ddd60594/plot_species_distribution_modeling.zip
0 Bytes b/‎dev/_downloads/20976afc231c41d12cde1932ddd60594/plot_species_distribution_modeling.zip
0 Bytes
diff --git a/‎dev/_downloads/20d4c73f80be097989f7cb73d3588767/plot_oneclass.zip
0 Bytes b/‎dev/_downloads/20d4c73f80be097989f7cb73d3588767/plot_oneclass.zip
0 Bytes
diff --git a/‎dev/_downloads/20f62896d2f883bba3587708f3e8a82a/plot_successive_halving_iterations.zip
12 Bytes b/‎dev/_downloads/20f62896d2f883bba3587708f3e8a82a/plot_successive_halving_iterations.zip
12 Bytes
diff --git a/‎dev/_downloads/2153fd2d9ab25873fb0a10316b24a9fe/plot_kmeans_plusplus.zip
0 Bytes b/‎dev/_downloads/2153fd2d9ab25873fb0a10316b24a9fe/plot_kmeans_plusplus.zip
0 Bytes
diff --git a/‎dev/_downloads/21a52befdcfce43d65a7b00dc6414056/plot_gpr_prior_posterior.zip
0 Bytes b/‎dev/_downloads/21a52befdcfce43d65a7b00dc6414056/plot_gpr_prior_posterior.zip
0 Bytes
diff --git a/‎dev/_downloads/21c126a0a1b9942f3e59abf4750774bc/plot_gmm_covariances.zip
0 Bytes b/‎dev/_downloads/21c126a0a1b9942f3e59abf4750774bc/plot_gmm_covariances.zip
0 Bytes
diff --git a/‎dev/_downloads/2216a33072e644eaf7be587530c2726d/plot_compare_calibration.zip
0 Bytes b/‎dev/_downloads/2216a33072e644eaf7be587530c2726d/plot_compare_calibration.zip
0 Bytes
diff --git a/‎dev/_downloads/235d8129d59e30e986be1e36b5badcc5/plot_monotonic_constraints.zip
0 Bytes b/‎dev/_downloads/235d8129d59e30e986be1e36b5badcc5/plot_monotonic_constraints.zip
0 Bytes
diff --git a/‎dev/_downloads/2393eac1ae8c3b57f87d3a992f256469/plot_det.zip
0 Bytes b/‎dev/_downloads/2393eac1ae8c3b57f87d3a992f256469/plot_det.zip
0 Bytes
diff --git a/‎dev/_downloads/23fb33f64b3c23edf25165a3a4f04237/plot_successive_halving_iterations.ipynb
+1-1 b/‎dev/_downloads/23fb33f64b3c23edf25165a3a4f04237/plot_successive_halving_iterations.ipynb
+1-1
diff --git a/‎dev/_downloads/248a4cae89eb02703d4ab8c58361c523/plot_likelihood_ratios.zip
0 Bytes b/‎dev/_downloads/248a4cae89eb02703d4ab8c58361c523/plot_likelihood_ratios.zip
0 Bytes
diff --git a/‎dev/_downloads/24e6ec210b28537f67bbf0b9a883e04b/plot_roc.zip
0 Bytes b/‎dev/_downloads/24e6ec210b28537f67bbf0b9a883e04b/plot_roc.zip
0 Bytes
diff --git a/‎dev/_downloads/250bd742eab59d3e4df845fe6c5fbaf4/plot_lda.zip
0 Bytes b/‎dev/_downloads/250bd742eab59d3e4df845fe6c5fbaf4/plot_lda.zip
0 Bytes
diff --git a/‎dev/_downloads/25110f74d039a8886ab72ea3a1c2876d/plot_digits_pipe.zip
0 Bytes b/‎dev/_downloads/25110f74d039a8886ab72ea3a1c2876d/plot_digits_pipe.zip
0 Bytes
diff --git a/‎dev/_downloads/26485aa00d25776ba2b4e87114e5ad56/plot_manifold_sphere.zip
0 Bytes b/‎dev/_downloads/26485aa00d25776ba2b4e87114e5ad56/plot_manifold_sphere.zip
0 Bytes
diff --git a/‎dev/_downloads/27381e4bd5a9b2476f7ed4a8c0d7c8a2/plot_release_highlights_1_3_0.zip
23 Bytes b/‎dev/_downloads/27381e4bd5a9b2476f7ed4a8c0d7c8a2/plot_release_highlights_1_3_0.zip
23 Bytes
diff --git a/‎dev/_downloads/27a581bf95a4c4534db079c70a1a14de/plot_f_test_vs_mi.zip
0 Bytes b/‎dev/_downloads/27a581bf95a4c4534db079c70a1a14de/plot_f_test_vs_mi.zip
0 Bytes
diff --git a/‎dev/_downloads/27a5c8b09da77b54a3849bd8e0ff0d31/plot_nnls.zip
0 Bytes b/‎dev/_downloads/27a5c8b09da77b54a3849bd8e0ff0d31/plot_nnls.zip
0 Bytes
diff --git a/‎dev/_downloads/2a405f4296fe09a86824e347e7ccf7e5/plot_discretization.zip
0 Bytes b/‎dev/_downloads/2a405f4296fe09a86824e347e7ccf7e5/plot_discretization.zip
0 Bytes
diff --git a/‎dev/_downloads/2b824c234439fb855e512d2465db9d8e/plot_scaling_importance.zip
0 Bytes b/‎dev/_downloads/2b824c234439fb855e512d2465db9d8e/plot_scaling_importance.zip
0 Bytes
diff --git a/‎dev/_downloads/2cb37d76ae6c02821275d6ef8b96620f/plot_ica_blind_source_separation.zip
0 Bytes b/‎dev/_downloads/2cb37d76ae6c02821275d6ef8b96620f/plot_ica_blind_source_separation.zip
0 Bytes
diff --git a/‎dev/_downloads/2dd2f734250e8481fde18f8cd2e3820c/plot_target_encoder.zip
0 Bytes b/‎dev/_downloads/2dd2f734250e8481fde18f8cd2e3820c/plot_target_encoder.zip
0 Bytes
diff --git a/‎dev/_downloads/2e18b618565acbb9b5918136a879a534/plot_logistic_path.zip
0 Bytes b/‎dev/_downloads/2e18b618565acbb9b5918136a879a534/plot_logistic_path.zip
0 Bytes
@@ -15,7 +15,7 @@
       },
       "outputs": [],
       "source": [
-        "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\n\nfrom sklearn.datasets import fetch_california_housing\nfrom sklearn.ensemble import RandomForestRegressor\n\n# To use this experimental feature, we need to explicitly ask for it:\nfrom sklearn.experimental import enable_iterative_imputer  # noqa\nfrom sklearn.impute import IterativeImputer, SimpleImputer\nfrom sklearn.kernel_approximation import Nystroem\nfrom sklearn.linear_model import BayesianRidge, Ridge\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.neighbors import KNeighborsRegressor\nfrom sklearn.pipeline import make_pipeline\n\nN_SPLITS = 5\n\nrng = np.random.RandomState(0)\n\nX_full, y_full = fetch_california_housing(return_X_y=True)\n# ~2k samples is enough for the purpose of the example.\n# Remove the following two lines for a slower run with different error bars.\nX_full = X_full[::10]\ny_full = y_full[::10]\nn_samples, n_features = X_full.shape\n\n# Estimate the score on the entire dataset, with no missing values\nbr_estimator = BayesianRidge()\nscore_full_data = pd.DataFrame(\n    cross_val_score(\n        br_estimator, X_full, y_full, scoring=\"neg_mean_squared_error\", cv=N_SPLITS\n    ),\n    columns=[\"Full Data\"],\n)\n\n# Add a single missing value to each row\nX_missing = X_full.copy()\ny_missing = y_full\nmissing_samples = np.arange(n_samples)\nmissing_features = rng.choice(n_features, n_samples, replace=True)\nX_missing[missing_samples, missing_features] = np.nan\n\n# Estimate the score after imputation (mean and median strategies)\nscore_simple_imputer = pd.DataFrame()\nfor strategy in (\"mean\", \"median\"):\n    estimator = make_pipeline(\n        SimpleImputer(missing_values=np.nan, strategy=strategy), br_estimator\n    )\n    score_simple_imputer[strategy] = cross_val_score(\n        estimator, X_missing, y_missing, scoring=\"neg_mean_squared_error\", cv=N_SPLITS\n    )\n\n# Estimate the score after iterative imputation of the missing values\n# with different estimators\nestimators = [\n    BayesianRidge(),\n    RandomForestRegressor(\n        # We tuned the hyperparameters of the RandomForestRegressor to get a good\n        # enough predictive performance for a restricted execution time.\n        n_estimators=4,\n        max_depth=10,\n        bootstrap=True,\n        max_samples=0.5,\n        n_jobs=2,\n        random_state=0,\n    ),\n    make_pipeline(\n        Nystroem(kernel=\"polynomial\", degree=2, random_state=0), Ridge(alpha=1e3)\n    ),\n    KNeighborsRegressor(n_neighbors=15),\n]\nscore_iterative_imputer = pd.DataFrame()\n# iterative imputer is sensible to the tolerance and\n# dependent on the estimator used internally.\n# we tuned the tolerance to keep this example run with limited computational\n# resources while not changing the results too much compared to keeping the\n# stricter default value for the tolerance parameter.\ntolerances = (1e-3, 1e-1, 1e-1, 1e-2)\nfor impute_estimator, tol in zip(estimators, tolerances):\n    estimator = make_pipeline(\n        IterativeImputer(\n            random_state=0, estimator=impute_estimator, max_iter=25, tol=tol\n        ),\n        br_estimator,\n    )\n    score_iterative_imputer[impute_estimator.__class__.__name__] = cross_val_score(\n        estimator, X_missing, y_missing, scoring=\"neg_mean_squared_error\", cv=N_SPLITS\n    )\n\nscores = pd.concat(\n    [score_full_data, score_simple_imputer, score_iterative_imputer],\n    keys=[\"Original\", \"SimpleImputer\", \"IterativeImputer\"],\n    axis=1,\n)\n\n# plot california housing results\nfig, ax = plt.subplots(figsize=(13, 6))\nmeans = -scores.mean()\nerrors = scores.std()\nmeans.plot.barh(xerr=errors, ax=ax)\nax.set_title(\"California Housing Regression with Different Imputation Methods\")\nax.set_xlabel(\"MSE (smaller is better)\")\nax.set_yticks(np.arange(means.shape[0]))\nax.set_yticklabels([\" w/ \".join(label) for label in means.index.tolist()])\nplt.tight_layout(pad=1)\nplt.show()"
+        "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\n\nfrom sklearn.datasets import fetch_california_housing\nfrom sklearn.ensemble import RandomForestRegressor\n\n# To use this experimental feature, we need to explicitly ask for it:\nfrom sklearn.experimental import enable_iterative_imputer  # noqa: F401\nfrom sklearn.impute import IterativeImputer, SimpleImputer\nfrom sklearn.kernel_approximation import Nystroem\nfrom sklearn.linear_model import BayesianRidge, Ridge\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.neighbors import KNeighborsRegressor\nfrom sklearn.pipeline import make_pipeline\n\nN_SPLITS = 5\n\nrng = np.random.RandomState(0)\n\nX_full, y_full = fetch_california_housing(return_X_y=True)\n# ~2k samples is enough for the purpose of the example.\n# Remove the following two lines for a slower run with different error bars.\nX_full = X_full[::10]\ny_full = y_full[::10]\nn_samples, n_features = X_full.shape\n\n# Estimate the score on the entire dataset, with no missing values\nbr_estimator = BayesianRidge()\nscore_full_data = pd.DataFrame(\n    cross_val_score(\n        br_estimator, X_full, y_full, scoring=\"neg_mean_squared_error\", cv=N_SPLITS\n    ),\n    columns=[\"Full Data\"],\n)\n\n# Add a single missing value to each row\nX_missing = X_full.copy()\ny_missing = y_full\nmissing_samples = np.arange(n_samples)\nmissing_features = rng.choice(n_features, n_samples, replace=True)\nX_missing[missing_samples, missing_features] = np.nan\n\n# Estimate the score after imputation (mean and median strategies)\nscore_simple_imputer = pd.DataFrame()\nfor strategy in (\"mean\", \"median\"):\n    estimator = make_pipeline(\n        SimpleImputer(missing_values=np.nan, strategy=strategy), br_estimator\n    )\n    score_simple_imputer[strategy] = cross_val_score(\n        estimator, X_missing, y_missing, scoring=\"neg_mean_squared_error\", cv=N_SPLITS\n    )\n\n# Estimate the score after iterative imputation of the missing values\n# with different estimators\nestimators = [\n    BayesianRidge(),\n    RandomForestRegressor(\n        # We tuned the hyperparameters of the RandomForestRegressor to get a good\n        # enough predictive performance for a restricted execution time.\n        n_estimators=4,\n        max_depth=10,\n        bootstrap=True,\n        max_samples=0.5,\n        n_jobs=2,\n        random_state=0,\n    ),\n    make_pipeline(\n        Nystroem(kernel=\"polynomial\", degree=2, random_state=0), Ridge(alpha=1e3)\n    ),\n    KNeighborsRegressor(n_neighbors=15),\n]\nscore_iterative_imputer = pd.DataFrame()\n# iterative imputer is sensible to the tolerance and\n# dependent on the estimator used internally.\n# we tuned the tolerance to keep this example run with limited computational\n# resources while not changing the results too much compared to keeping the\n# stricter default value for the tolerance parameter.\ntolerances = (1e-3, 1e-1, 1e-1, 1e-2)\nfor impute_estimator, tol in zip(estimators, tolerances):\n    estimator = make_pipeline(\n        IterativeImputer(\n            random_state=0, estimator=impute_estimator, max_iter=25, tol=tol\n        ),\n        br_estimator,\n    )\n    score_iterative_imputer[impute_estimator.__class__.__name__] = cross_val_score(\n        estimator, X_missing, y_missing, scoring=\"neg_mean_squared_error\", cv=N_SPLITS\n    )\n\nscores = pd.concat(\n    [score_full_data, score_simple_imputer, score_iterative_imputer],\n    keys=[\"Original\", \"SimpleImputer\", \"IterativeImputer\"],\n    axis=1,\n)\n\n# plot california housing results\nfig, ax = plt.subplots(figsize=(13, 6))\nmeans = -scores.mean()\nerrors = scores.std()\nmeans.plot.barh(xerr=errors, ax=ax)\nax.set_title(\"California Housing Regression with Different Imputation Methods\")\nax.set_xlabel(\"MSE (smaller is better)\")\nax.set_yticks(np.arange(means.shape[0]))\nax.set_yticklabels([\" w/ \".join(label) for label in means.index.tolist()])\nplt.tight_layout(pad=1)\nplt.show()"
       ]
     }
   ],
 
@@ -29,7 +29,7 @@
       },
       "outputs": [],
       "source": [
-        "import numpy as np\nfrom sklearn.cluster import HDBSCAN\nfrom sklearn.datasets import load_digits\nfrom sklearn.metrics import v_measure_score\n\nX, true_labels = load_digits(return_X_y=True)\nprint(f\"number of digits: {len(np.unique(true_labels))}\")\n\nhdbscan = HDBSCAN(min_cluster_size=15).fit(X)\nnon_noisy_labels = hdbscan.labels_[hdbscan.labels_ != -1]\nprint(f\"number of clusters found: {len(np.unique(non_noisy_labels))}\")\n\nprint(v_measure_score(true_labels[hdbscan.labels_ != -1], non_noisy_labels))"
+        "import numpy as np\n\nfrom sklearn.cluster import HDBSCAN\nfrom sklearn.datasets import load_digits\nfrom sklearn.metrics import v_measure_score\n\nX, true_labels = load_digits(return_X_y=True)\nprint(f\"number of digits: {len(np.unique(true_labels))}\")\n\nhdbscan = HDBSCAN(min_cluster_size=15).fit(X)\nnon_noisy_labels = hdbscan.labels_[hdbscan.labels_ != -1]\nprint(f\"number of clusters found: {len(np.unique(non_noisy_labels))}\")\n\nprint(v_measure_score(true_labels[hdbscan.labels_ != -1], non_noisy_labels))"
       ]
     },
     {
@@ -47,7 +47,7 @@
       },
       "outputs": [],
       "source": [
-        "import numpy as np\nfrom sklearn.preprocessing import TargetEncoder\n\nX = np.array([[\"cat\"] * 30 + [\"dog\"] * 20 + [\"snake\"] * 38], dtype=object).T\ny = [90.3] * 30 + [20.4] * 20 + [21.2] * 38\n\nenc = TargetEncoder(random_state=0)\nX_trans = enc.fit_transform(X, y)\n\nenc.encodings_"
+        "import numpy as np\n\nfrom sklearn.preprocessing import TargetEncoder\n\nX = np.array([[\"cat\"] * 30 + [\"dog\"] * 20 + [\"snake\"] * 38], dtype=object).T\ny = [90.3] * 30 + [20.4] * 20 + [21.2] * 38\n\nenc = TargetEncoder(random_state=0)\nX_trans = enc.fit_transform(X, y)\n\nenc.encodings_"
       ]
     },
     {
@@ -65,7 +65,7 @@
       },
       "outputs": [],
       "source": [
-        "import numpy as np\nfrom sklearn.tree import DecisionTreeClassifier\n\nX = np.array([0, 1, 6, np.nan]).reshape(-1, 1)\ny = [0, 0, 1, 1]\n\ntree = DecisionTreeClassifier(random_state=0).fit(X, y)\ntree.predict(X)"
+        "import numpy as np\n\nfrom sklearn.tree import DecisionTreeClassifier\n\nX = np.array([0, 1, 6, np.nan]).reshape(-1, 1)\ny = [0, 0, 1, 1]\n\ntree = DecisionTreeClassifier(random_state=0).fit(X, y)\ntree.predict(X)"
       ]
     },
     {
@@ -101,7 +101,7 @@
       },
       "outputs": [],
       "source": [
-        "import numpy as np\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.datasets import make_low_rank_matrix\nfrom sklearn.ensemble import HistGradientBoostingRegressor\n\nn_samples, n_features = 500, 10\nrng = np.random.RandomState(0)\nX = make_low_rank_matrix(n_samples, n_features, random_state=rng)\ncoef = rng.uniform(low=-10, high=20, size=n_features)\ny = rng.gamma(shape=2, scale=np.exp(X @ coef) / 2)\ngbdt = HistGradientBoostingRegressor(loss=\"gamma\")\ncross_val_score(gbdt, X, y).mean()"
+        "import numpy as np\n\nfrom sklearn.datasets import make_low_rank_matrix\nfrom sklearn.ensemble import HistGradientBoostingRegressor\nfrom sklearn.model_selection import cross_val_score\n\nn_samples, n_features = 500, 10\nrng = np.random.RandomState(0)\nX = make_low_rank_matrix(n_samples, n_features, random_state=rng)\ncoef = rng.uniform(low=-10, high=20, size=n_features)\ny = rng.gamma(shape=2, scale=np.exp(X @ coef) / 2)\ngbdt = HistGradientBoostingRegressor(loss=\"gamma\")\ncross_val_score(gbdt, X, y).mean()"
       ]
     },
     {
@@ -119,7 +119,7 @@
       },
       "outputs": [],
       "source": [
-        "from sklearn.preprocessing import OrdinalEncoder\nimport numpy as np\n\nX = np.array(\n    [[\"dog\"] * 5 + [\"cat\"] * 20 + [\"rabbit\"] * 10 + [\"snake\"] * 3], dtype=object\n).T\nenc = OrdinalEncoder(min_frequency=6).fit(X)\nenc.infrequent_categories_"
+        "import numpy as np\n\nfrom sklearn.preprocessing import OrdinalEncoder\n\nX = np.array(\n    [[\"dog\"] * 5 + [\"cat\"] * 20 + [\"rabbit\"] * 10 + [\"snake\"] * 3], dtype=object\n).T\nenc = OrdinalEncoder(min_frequency=6).fit(X)\nenc.infrequent_categories_"
       ]
     }
   ],
 
@@ -1,4 +1,4 @@
-# ruff: noqa
+# ruff: noqa: CPY001, E501
 """
 =======================================
 Release Highlights for scikit-learn 1.2
@@ -31,9 +31,10 @@
 # (some examples) <https://youtu.be/5bCg8VfX2x8>`__.
 
 import numpy as np
-from sklearn.datasets import load_iris
-from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
+
 from sklearn.compose import ColumnTransformer
+from sklearn.datasets import load_iris
+from sklearn.preprocessing import KBinsDiscretizer, StandardScaler
 
 X, y = load_iris(as_frame=True, return_X_y=True)
 sepal_cols = ["sepal length (cm)", "sepal width (cm)"]
@@ -78,6 +79,7 @@
 # :class:`~metrics.PredictionErrorDisplay` provides a way to analyze regression
 # models in a qualitative manner.
 import matplotlib.pyplot as plt
+
 from sklearn.metrics import PredictionErrorDisplay
 
 fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(12, 5))
@@ -109,8 +111,8 @@
 X = X.select_dtypes(["number", "category"]).drop(columns=["body"])
 
 # %%
-from sklearn.preprocessing import OrdinalEncoder
 from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import OrdinalEncoder
 
 categorical_features = ["pclass", "sex", "embarked"]
 model = make_pipeline(
 
@@ -10,7 +10,7 @@
 the `set_output` method or globally by setting `set_config(transform_output="pandas")`.
 For details, see
 `SLEP018 <https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep018/proposal.html>`__.
-"""  # noqa
+"""  # noqa: CPY001
 
 # %%
 # First, we load the iris dataset as a DataFrame to demonstrate the `set_output` API.
 
@@ -15,7 +15,7 @@
       },
       "outputs": [],
       "source": [
-        "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom scipy.stats import randint\n\nfrom sklearn import datasets\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.experimental import enable_halving_search_cv  # noqa\nfrom sklearn.model_selection import HalvingRandomSearchCV"
+        "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom scipy.stats import randint\n\nfrom sklearn import datasets\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.experimental import enable_halving_search_cv  # noqa: F401\nfrom sklearn.model_selection import HalvingRandomSearchCV"
       ]
     },
     {
Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@`
`15`	`15`	`},`
`16`	`16`	`"outputs": [],`
`17`	`17`	`"source": [`
`18`		- "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\n\nfrom sklearn.datasets import fetch_california_housing\nfrom sklearn.ensemble import RandomForestRegressor\n\n# To use this experimental feature, we need to explicitly ask for it:\nfrom sklearn.experimental import enable_iterative_imputer # noqa\nfrom sklearn.impute import IterativeImputer, SimpleImputer\nfrom sklearn.kernel_approximation import Nystroem\nfrom sklearn.linear_model import BayesianRidge, Ridge\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.neighbors import KNeighborsRegressor\nfrom sklearn.pipeline import make_pipeline\n\nN_SPLITS = 5\n\nrng = np.random.RandomState(0)\n\nX_full, y_full = fetch_california_housing(return_X_y=True)\n# ~2k samples is enough for the purpose of the example.\n# Remove the following two lines for a slower run with different error bars.\nX_full = X_full[::10]\ny_full = y_full[::10]\nn_samples, n_features = X_full.shape\n\n# Estimate the score on the entire dataset, with no missing values\nbr_estimator = BayesianRidge()\nscore_full_data = pd.DataFrame(\n cross_val_score(\n br_estimator, X_full, y_full, scoring=\"neg_mean_squared_error\", cv=N_SPLITS\n ),\n columns=[\"Full Data\"],\n)\n\n# Add a single missing value to each row\nX_missing = X_full.copy()\ny_missing = y_full\nmissing_samples = np.arange(n_samples)\nmissing_features = rng.choice(n_features, n_samples, replace=True)\nX_missing[missing_samples, missing_features] = np.nan\n\n# Estimate the score after imputation (mean and median strategies)\nscore_simple_imputer = pd.DataFrame()\nfor strategy in (\"mean\", \"median\"):\n estimator = make_pipeline(\n SimpleImputer(missing_values=np.nan, strategy=strategy), br_estimator\n )\n score_simple_imputer[strategy] = cross_val_score(\n estimator, X_missing, y_missing, scoring=\"neg_mean_squared_error\", cv=N_SPLITS\n )\n\n# Estimate the score after iterative imputation of the missing values\n# with different estimators\nestimators = [\n BayesianRidge(),\n RandomForestRegressor(\n # We tuned the hyperparameters of the RandomForestRegressor to get a good\n # enough predictive performance for a restricted execution time.\n n_estimators=4,\n max_depth=10,\n bootstrap=True,\n max_samples=0.5,\n n_jobs=2,\n random_state=0,\n ),\n make_pipeline(\n Nystroem(kernel=\"polynomial\", degree=2, random_state=0), Ridge(alpha=1e3)\n ),\n KNeighborsRegressor(n_neighbors=15),\n]\nscore_iterative_imputer = pd.DataFrame()\n# iterative imputer is sensible to the tolerance and\n# dependent on the estimator used internally.\n# we tuned the tolerance to keep this example run with limited computational\n# resources while not changing the results too much compared to keeping the\n# stricter default value for the tolerance parameter.\ntolerances = (1e-3, 1e-1, 1e-1, 1e-2)\nfor impute_estimator, tol in zip(estimators, tolerances):\n estimator = make_pipeline(\n IterativeImputer(\n random_state=0, estimator=impute_estimator, max_iter=25, tol=tol\n ),\n br_estimator,\n )\n score_iterative_imputer[impute_estimator.__class__.__name__] = cross_val_score(\n estimator, X_missing, y_missing, scoring=\"neg_mean_squared_error\", cv=N_SPLITS\n )\n\nscores = pd.concat(\n [score_full_data, score_simple_imputer, score_iterative_imputer],\n keys=[\"Original\", \"SimpleImputer\", \"IterativeImputer\"],\n axis=1,\n)\n\n# plot california housing results\nfig, ax = plt.subplots(figsize=(13, 6))\nmeans = -scores.mean()\nerrors = scores.std()\nmeans.plot.barh(xerr=errors, ax=ax)\nax.set_title(\"California Housing Regression with Different Imputation Methods\")\nax.set_xlabel(\"MSE (smaller is better)\")\nax.set_yticks(np.arange(means.shape[0]))\nax.set_yticklabels([\" w/ \".join(label) for label in means.index.tolist()])\nplt.tight_layout(pad=1)\nplt.show()"
	`18`	+ "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\n\nfrom sklearn.datasets import fetch_california_housing\nfrom sklearn.ensemble import RandomForestRegressor\n\n# To use this experimental feature, we need to explicitly ask for it:\nfrom sklearn.experimental import enable_iterative_imputer # noqa: F401\nfrom sklearn.impute import IterativeImputer, SimpleImputer\nfrom sklearn.kernel_approximation import Nystroem\nfrom sklearn.linear_model import BayesianRidge, Ridge\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.neighbors import KNeighborsRegressor\nfrom sklearn.pipeline import make_pipeline\n\nN_SPLITS = 5\n\nrng = np.random.RandomState(0)\n\nX_full, y_full = fetch_california_housing(return_X_y=True)\n# ~2k samples is enough for the purpose of the example.\n# Remove the following two lines for a slower run with different error bars.\nX_full = X_full[::10]\ny_full = y_full[::10]\nn_samples, n_features = X_full.shape\n\n# Estimate the score on the entire dataset, with no missing values\nbr_estimator = BayesianRidge()\nscore_full_data = pd.DataFrame(\n cross_val_score(\n br_estimator, X_full, y_full, scoring=\"neg_mean_squared_error\", cv=N_SPLITS\n ),\n columns=[\"Full Data\"],\n)\n\n# Add a single missing value to each row\nX_missing = X_full.copy()\ny_missing = y_full\nmissing_samples = np.arange(n_samples)\nmissing_features = rng.choice(n_features, n_samples, replace=True)\nX_missing[missing_samples, missing_features] = np.nan\n\n# Estimate the score after imputation (mean and median strategies)\nscore_simple_imputer = pd.DataFrame()\nfor strategy in (\"mean\", \"median\"):\n estimator = make_pipeline(\n SimpleImputer(missing_values=np.nan, strategy=strategy), br_estimator\n )\n score_simple_imputer[strategy] = cross_val_score(\n estimator, X_missing, y_missing, scoring=\"neg_mean_squared_error\", cv=N_SPLITS\n )\n\n# Estimate the score after iterative imputation of the missing values\n# with different estimators\nestimators = [\n BayesianRidge(),\n RandomForestRegressor(\n # We tuned the hyperparameters of the RandomForestRegressor to get a good\n # enough predictive performance for a restricted execution time.\n n_estimators=4,\n max_depth=10,\n bootstrap=True,\n max_samples=0.5,\n n_jobs=2,\n random_state=0,\n ),\n make_pipeline(\n Nystroem(kernel=\"polynomial\", degree=2, random_state=0), Ridge(alpha=1e3)\n ),\n KNeighborsRegressor(n_neighbors=15),\n]\nscore_iterative_imputer = pd.DataFrame()\n# iterative imputer is sensible to the tolerance and\n# dependent on the estimator used internally.\n# we tuned the tolerance to keep this example run with limited computational\n# resources while not changing the results too much compared to keeping the\n# stricter default value for the tolerance parameter.\ntolerances = (1e-3, 1e-1, 1e-1, 1e-2)\nfor impute_estimator, tol in zip(estimators, tolerances):\n estimator = make_pipeline(\n IterativeImputer(\n random_state=0, estimator=impute_estimator, max_iter=25, tol=tol\n ),\n br_estimator,\n )\n score_iterative_imputer[impute_estimator.__class__.__name__] = cross_val_score(\n estimator, X_missing, y_missing, scoring=\"neg_mean_squared_error\", cv=N_SPLITS\n )\n\nscores = pd.concat(\n [score_full_data, score_simple_imputer, score_iterative_imputer],\n keys=[\"Original\", \"SimpleImputer\", \"IterativeImputer\"],\n axis=1,\n)\n\n# plot california housing results\nfig, ax = plt.subplots(figsize=(13, 6))\nmeans = -scores.mean()\nerrors = scores.std()\nmeans.plot.barh(xerr=errors, ax=ax)\nax.set_title(\"California Housing Regression with Different Imputation Methods\")\nax.set_xlabel(\"MSE (smaller is better)\")\nax.set_yticks(np.arange(means.shape[0]))\nax.set_yticklabels([\" w/ \".join(label) for label in means.index.tolist()])\nplt.tight_layout(pad=1)\nplt.show()"
`19`	`19`	`]`
`20`	`20`	`}`
`21`	`21`	`],`
Original file line number	Diff line number	Diff line change
`@@ -29,7 +29,7 @@`
`29`	`29`	`},`
`30`	`30`	`"outputs": [],`
`31`	`31`	`"source": [`
`32`		- "import numpy as np\nfrom sklearn.cluster import HDBSCAN\nfrom sklearn.datasets import load_digits\nfrom sklearn.metrics import v_measure_score\n\nX, true_labels = load_digits(return_X_y=True)\nprint(f\"number of digits: {len(np.unique(true_labels))}\")\n\nhdbscan = HDBSCAN(min_cluster_size=15).fit(X)\nnon_noisy_labels = hdbscan.labels_[hdbscan.labels_ != -1]\nprint(f\"number of clusters found: {len(np.unique(non_noisy_labels))}\")\n\nprint(v_measure_score(true_labels[hdbscan.labels_ != -1], non_noisy_labels))"
	`32`	+ "import numpy as np\n\nfrom sklearn.cluster import HDBSCAN\nfrom sklearn.datasets import load_digits\nfrom sklearn.metrics import v_measure_score\n\nX, true_labels = load_digits(return_X_y=True)\nprint(f\"number of digits: {len(np.unique(true_labels))}\")\n\nhdbscan = HDBSCAN(min_cluster_size=15).fit(X)\nnon_noisy_labels = hdbscan.labels_[hdbscan.labels_ != -1]\nprint(f\"number of clusters found: {len(np.unique(non_noisy_labels))}\")\n\nprint(v_measure_score(true_labels[hdbscan.labels_ != -1], non_noisy_labels))"
`33`	`33`	`]`
`34`	`34`	`},`
`35`	`35`	`{`
`@@ -47,7 +47,7 @@`
`47`	`47`	`},`
`48`	`48`	`"outputs": [],`
`49`	`49`	`"source": [`
`50`		`- "import numpy as np\nfrom sklearn.preprocessing import TargetEncoder\n\nX = np.array([[\"cat\"] * 30 + [\"dog\"] * 20 + [\"snake\"] * 38], dtype=object).T\ny = [90.3] * 30 + [20.4] * 20 + [21.2] * 38\n\nenc = TargetEncoder(random_state=0)\nX_trans = enc.fit_transform(X, y)\n\nenc.encodings_"`
	`50`	`+ "import numpy as np\n\nfrom sklearn.preprocessing import TargetEncoder\n\nX = np.array([[\"cat\"] * 30 + [\"dog\"] * 20 + [\"snake\"] * 38], dtype=object).T\ny = [90.3] * 30 + [20.4] * 20 + [21.2] * 38\n\nenc = TargetEncoder(random_state=0)\nX_trans = enc.fit_transform(X, y)\n\nenc.encodings_"`
`51`	`51`	`]`
`52`	`52`	`},`
`53`	`53`	`{`
`@@ -65,7 +65,7 @@`
`65`	`65`	`},`
`66`	`66`	`"outputs": [],`
`67`	`67`	`"source": [`
`68`		`- "import numpy as np\nfrom sklearn.tree import DecisionTreeClassifier\n\nX = np.array([0, 1, 6, np.nan]).reshape(-1, 1)\ny = [0, 0, 1, 1]\n\ntree = DecisionTreeClassifier(random_state=0).fit(X, y)\ntree.predict(X)"`
	`68`	`+ "import numpy as np\n\nfrom sklearn.tree import DecisionTreeClassifier\n\nX = np.array([0, 1, 6, np.nan]).reshape(-1, 1)\ny = [0, 0, 1, 1]\n\ntree = DecisionTreeClassifier(random_state=0).fit(X, y)\ntree.predict(X)"`
`69`	`69`	`]`
`70`	`70`	`},`
`71`	`71`	`{`
`@@ -101,7 +101,7 @@`
`101`	`101`	`},`
`102`	`102`	`"outputs": [],`
`103`	`103`	`"source": [`
`104`		- "import numpy as np\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.datasets import make_low_rank_matrix\nfrom sklearn.ensemble import HistGradientBoostingRegressor\n\nn_samples, n_features = 500, 10\nrng = np.random.RandomState(0)\nX = make_low_rank_matrix(n_samples, n_features, random_state=rng)\ncoef = rng.uniform(low=-10, high=20, size=n_features)\ny = rng.gamma(shape=2, scale=np.exp(X @ coef) / 2)\ngbdt = HistGradientBoostingRegressor(loss=\"gamma\")\ncross_val_score(gbdt, X, y).mean()"
	`104`	+ "import numpy as np\n\nfrom sklearn.datasets import make_low_rank_matrix\nfrom sklearn.ensemble import HistGradientBoostingRegressor\nfrom sklearn.model_selection import cross_val_score\n\nn_samples, n_features = 500, 10\nrng = np.random.RandomState(0)\nX = make_low_rank_matrix(n_samples, n_features, random_state=rng)\ncoef = rng.uniform(low=-10, high=20, size=n_features)\ny = rng.gamma(shape=2, scale=np.exp(X @ coef) / 2)\ngbdt = HistGradientBoostingRegressor(loss=\"gamma\")\ncross_val_score(gbdt, X, y).mean()"
`105`	`105`	`]`
`106`	`106`	`},`
`107`	`107`	`{`
`@@ -119,7 +119,7 @@`
`119`	`119`	`},`
`120`	`120`	`"outputs": [],`
`121`	`121`	`"source": [`
`122`		`- "from sklearn.preprocessing import OrdinalEncoder\nimport numpy as np\n\nX = np.array(\n [[\"dog\"] * 5 + [\"cat\"] * 20 + [\"rabbit\"] * 10 + [\"snake\"] * 3], dtype=object\n).T\nenc = OrdinalEncoder(min_frequency=6).fit(X)\nenc.infrequent_categories_"`
	`122`	`+ "import numpy as np\n\nfrom sklearn.preprocessing import OrdinalEncoder\n\nX = np.array(\n [[\"dog\"] * 5 + [\"cat\"] * 20 + [\"rabbit\"] * 10 + [\"snake\"] * 3], dtype=object\n).T\nenc = OrdinalEncoder(min_frequency=6).fit(X)\nenc.infrequent_categories_"`
`123`	`123`	`]`
`124`	`124`	`}`
`125`	`125`	`],`