scikit-learn
diff --git a/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
-1.1 KB b/‎dev/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
-1.1 KB
diff --git a/‎dev/_downloads/26f110ad6cff1a8a7c58b1a00d8b8b5a/plot_column_transformer_mixed_types.ipynb
Lines changed: 55 additions & 8 deletions b/‎dev/_downloads/26f110ad6cff1a8a7c58b1a00d8b8b5a/plot_column_transformer_mixed_types.ipynb
Lines changed: 55 additions & 8 deletions
diff --git a/‎dev/_downloads/3a10dcfbc1a4bf1349c7101a429aa47b/plot_feature_transformation.py
Lines changed: 0 additions & 4 deletions b/‎dev/_downloads/3a10dcfbc1a4bf1349c7101a429aa47b/plot_feature_transformation.py
Lines changed: 0 additions & 4 deletions
diff --git a/‎dev/_downloads/3c9b7bcd0b16f172ac12ffad61f3b5f0/plot_stack_predictors.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/3c9b7bcd0b16f172ac12ffad61f3b5f0/plot_stack_predictors.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/51833337bfc73d152b44902e5baa50ff/plot_lasso_lars_ic.ipynb
Lines changed: 0 additions & 11 deletions b/‎dev/_downloads/51833337bfc73d152b44902e5baa50ff/plot_lasso_lars_ic.ipynb
Lines changed: 0 additions & 11 deletions
diff --git a/‎dev/_downloads/51e6f272e94e3b63cfd48c4b41fbaa10/plot_feature_selection_pipeline.ipynb
Lines changed: 0 additions & 11 deletions b/‎dev/_downloads/51e6f272e94e3b63cfd48c4b41fbaa10/plot_feature_selection_pipeline.ipynb
Lines changed: 0 additions & 11 deletions
diff --git a/‎dev/_downloads/58580795dd881384f33e7e6492e154e2/plot_lasso_model_selection.py
Lines changed: 0 additions & 5 deletions b/‎dev/_downloads/58580795dd881384f33e7e6492e154e2/plot_lasso_model_selection.py
Lines changed: 0 additions & 5 deletions
diff --git a/‎dev/_downloads/5a7e586367163444711012a4c5214817/plot_feature_selection_pipeline.py
Lines changed: 0 additions & 4 deletions b/‎dev/_downloads/5a7e586367163444711012a4c5214817/plot_feature_selection_pipeline.py
Lines changed: 0 additions & 4 deletions
@@ -26,14 +26,61 @@
       },
       "outputs": [],
       "source": [
-        "# Author: Pedro Morales <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\n\nfrom sklearn.compose import ColumnTransformer\nfrom sklearn.datasets import fetch_openml\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.preprocessing import StandardScaler, OneHotEncoder\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split, GridSearchCV\n\nnp.random.seed(0)\n\n# Load data from https://www.openml.org/d/40945\nX, y = fetch_openml(\"titanic\", version=1, as_frame=True, return_X_y=True)\n\n# Alternatively X and y can be obtained directly from the frame attribute:\n# X = titanic.frame.drop('survived', axis=1)\n# y = titanic.frame['survived']"
+        "# Author: Pedro Morales <[email protected]>\n#\n# License: BSD 3 clause"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n\nfrom sklearn.compose import ColumnTransformer\nfrom sklearn.datasets import fetch_openml\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.preprocessing import StandardScaler, OneHotEncoder\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split, GridSearchCV\n\nnp.random.seed(0)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Load data from https://www.openml.org/d/40945\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "X, y = fetch_openml(\"titanic\", version=1, as_frame=True, return_X_y=True)\n\n# Alternatively X and y can be obtained directly from the frame attribute:\n# X = titanic.frame.drop('survived', axis=1)\n# y = titanic.frame['survived']"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Use ``ColumnTransformer`` by selecting column by names\n\nWe will train our classifier with the following features:\n\nNumeric Features:\n\n* ``age``: float;\n* ``fare``: float.\n\nCategorical Features:\n\n* ``embarked``: categories encoded as strings ``{'C', 'S', 'Q'}``;\n* ``sex``: categories encoded as strings ``{'female', 'male'}``;\n* ``pclass``: ordinal integers ``{1, 2, 3}``.\n\nWe create the preprocessing pipelines for both numeric and categorical data.\nNote that ``pclass`` could either be treated as a categorical or numeric\nfeature.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "numeric_features = [\"age\", \"fare\"]\nnumeric_transformer = Pipeline(\n    steps=[(\"imputer\", SimpleImputer(strategy=\"median\")), (\"scaler\", StandardScaler())]\n)\n\ncategorical_features = [\"embarked\", \"sex\", \"pclass\"]\ncategorical_transformer = OneHotEncoder(handle_unknown=\"ignore\")\n\npreprocessor = ColumnTransformer(\n    transformers=[\n        (\"num\", numeric_transformer, numeric_features),\n        (\"cat\", categorical_transformer, categorical_features),\n    ]\n)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Use ``ColumnTransformer`` by selecting column by names\n We will train our classifier with the following features:\n\n Numeric Features:\n\n * ``age``: float;\n * ``fare``: float.\n\n Categorical Features:\n\n * ``embarked``: categories encoded as strings ``{'C', 'S', 'Q'}``;\n * ``sex``: categories encoded as strings ``{'female', 'male'}``;\n * ``pclass``: ordinal integers ``{1, 2, 3}``.\n\n We create the preprocessing pipelines for both numeric and categorical data.\n Note that ``pclass`` could either be treated as a categorical or numeric\n feature.\n\n"
+        "Append classifier to preprocessing pipeline.\nNow we have a full prediction pipeline.\n\n"
       ]
     },
     {
@@ -44,14 +91,14 @@
       },
       "outputs": [],
       "source": [
-        "numeric_features = [\"age\", \"fare\"]\nnumeric_transformer = Pipeline(\n    steps=[(\"imputer\", SimpleImputer(strategy=\"median\")), (\"scaler\", StandardScaler())]\n)\n\ncategorical_features = [\"embarked\", \"sex\", \"pclass\"]\ncategorical_transformer = OneHotEncoder(handle_unknown=\"ignore\")\n\npreprocessor = ColumnTransformer(\n    transformers=[\n        (\"num\", numeric_transformer, numeric_features),\n        (\"cat\", categorical_transformer, categorical_features),\n    ]\n)\n\n# Append classifier to preprocessing pipeline.\n# Now we have a full prediction pipeline.\nclf = Pipeline(\n    steps=[(\"preprocessor\", preprocessor), (\"classifier\", LogisticRegression())]\n)\n\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n\nclf.fit(X_train, y_train)\nprint(\"model score: %.3f\" % clf.score(X_test, y_test))"
+        "clf = Pipeline(\n    steps=[(\"preprocessor\", preprocessor), (\"classifier\", LogisticRegression())]\n)\n\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n\nclf.fit(X_train, y_train)\nprint(\"model score: %.3f\" % clf.score(X_test, y_test))"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## HTML representation of ``Pipeline`` (display diagram)\n When the ``Pipeline`` is printed out in a jupyter notebook an HTML\n representation of the estimator is displayed as follows:\n\n"
+        "HTML representation of ``Pipeline`` (display diagram)\n\nWhen the ``Pipeline`` is printed out in a jupyter notebook an HTML\nrepresentation of the estimator is displayed:\n\n"
       ]
     },
     {
@@ -62,14 +109,14 @@
       },
       "outputs": [],
       "source": [
-        "from sklearn import set_config\n\nset_config(display=\"diagram\")\nclf"
+        "clf"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Use ``ColumnTransformer`` by selecting column by data types\n When dealing with a cleaned dataset, the preprocessing can be automatic by\n using the data types of the column to decide whether to treat a column as a\n numerical or categorical feature.\n :func:`sklearn.compose.make_column_selector` gives this possibility.\n First, let's only select a subset of columns to simplify our\n example.\n\n"
+        "Use ``ColumnTransformer`` by selecting column by data types\n\nWhen dealing with a cleaned dataset, the preprocessing can be automatic by\nusing the data types of the column to decide whether to treat a column as a\nnumerical or categorical feature.\n:func:`sklearn.compose.make_column_selector` gives this possibility.\nFirst, let's only select a subset of columns to simplify our\nexample.\n\n"
       ]
     },
     {
@@ -123,7 +170,7 @@
       },
       "outputs": [],
       "source": [
-        "from sklearn.compose import make_column_selector as selector\n\npreprocessor = ColumnTransformer(\n    transformers=[\n        (\"num\", numeric_transformer, selector(dtype_exclude=\"category\")),\n        (\"cat\", categorical_transformer, selector(dtype_include=\"category\")),\n    ]\n)\nclf = Pipeline(\n    steps=[(\"preprocessor\", preprocessor), (\"classifier\", LogisticRegression())]\n)\n\n\nclf.fit(X_train, y_train)\nprint(\"model score: %.3f\" % clf.score(X_test, y_test))"
+        "from sklearn.compose import make_column_selector as selector\n\npreprocessor = ColumnTransformer(\n    transformers=[\n        (\"num\", numeric_transformer, selector(dtype_exclude=\"category\")),\n        (\"cat\", categorical_transformer, selector(dtype_include=\"category\")),\n    ]\n)\nclf = Pipeline(\n    steps=[(\"preprocessor\", preprocessor), (\"classifier\", LogisticRegression())]\n)\n\n\nclf.fit(X_train, y_train)\nprint(\"model score: %.3f\" % clf.score(X_test, y_test))\nclf"
       ]
     },
     {
@@ -159,7 +206,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Using the prediction pipeline in a grid search\n Grid search can also be performed on the different preprocessing steps\n defined in the ``ColumnTransformer`` object, together with the classifier's\n hyperparameters as part of the ``Pipeline``.\n We will search for both the imputer strategy of the numeric preprocessing\n and the regularization parameter of the logistic regression using\n :class:`~sklearn.model_selection.GridSearchCV`.\n\n"
+        "Using the prediction pipeline in a grid search\n\nGrid search can also be performed on the different preprocessing steps\ndefined in the ``ColumnTransformer`` object, together with the classifier's\nhyperparameters as part of the ``Pipeline``.\nWe will search for both the imputer strategy of the numeric preprocessing\nand the regularization parameter of the logistic regression using\n:class:`~sklearn.model_selection.GridSearchCV`.\n\n"
       ]
     },
     {
 
@@ -25,10 +25,6 @@
 #
 # License: BSD 3 clause
 
-from sklearn import set_config
-
-set_config(display="diagram")
-
 # %%
 # First, we will create a large dataset and split it into three sets:
 #
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "# Authors: Guillaume Lemaitre <[email protected]>\n#          Maria Telenczuk    <https://github.com/maikia>\n# License: BSD 3 clause\n\nfrom sklearn import set_config\n\nset_config(display=\"diagram\")"
+        "# Authors: Guillaume Lemaitre <[email protected]>\n#          Maria Telenczuk    <https://github.com/maikia>\n# License: BSD 3 clause"
       ]
     },
     {
 
@@ -29,17 +29,6 @@
         "# Author: Alexandre Gramfort\n#         Guillaume Lemaitre\n# License: BSD 3 clause"
       ]
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "import sklearn\n\nsklearn.set_config(display=\"diagram\")"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {},
 
@@ -18,17 +18,6 @@
         "\n# Pipeline ANOVA SVM\n\nThis example shows how a feature selection can be easily integrated within\na machine learning pipeline.\n\nWe also show that you can easily introspect part of the pipeline.\n"
       ]
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "from sklearn import set_config\n\nset_config(display=\"diagram\")"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {},
 
@@ -19,11 +19,6 @@
 #         Guillaume Lemaitre
 # License: BSD 3 clause
 
-# %%
-import sklearn
-
-sklearn.set_config(display="diagram")
-
 # %%
 # Dataset
 # -------
 
@@ -10,10 +10,6 @@
 
 """
 
-from sklearn import set_config
-
-set_config(display="diagram")
-
 # %%
 # We will start by generating a binary classification dataset. Subsequently, we
 # will divide the dataset into two subsets.
Original file line number	Diff line number	Diff line change
`@@ -25,10 +25,6 @@`
`25`	`25`	`#`
`26`	`26`	`# License: BSD 3 clause`
`27`	`27`
`28`		`-from sklearn import set_config`
`29`		`-`
`30`		`-set_config(display="diagram")`
`31`		`-`
`32`	`28`	`# %%`
`33`	`29`	`# First, we will create a large dataset and split it into three sets:`
`34`	`30`	`#`
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		`- "# Authors: Guillaume Lemaitre <[email protected]>\n# Maria Telenczuk <https://github.com/maikia>\n# License: BSD 3 clause\n\nfrom sklearn import set_config\n\nset_config(display=\"diagram\")"`
	`29`	`+ "# Authors: Guillaume Lemaitre <[email protected]>\n# Maria Telenczuk <https://github.com/maikia>\n# License: BSD 3 clause"`
`30`	`30`	`]`
`31`	`31`	`},`
`32`	`32`	`{`