<!DOCTYPE html>


<html lang="en" data-content_root="./" >

  <head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta property="og:title" content="6. Dataset transformations" />
<meta property="og:type" content="website" />
<meta property="og:url" content="https://scikit-learn/stable/data_transforms.html" />
<meta property="og:site_name" content="scikit-learn" />
<meta property="og:description" content="scikit-learn provides a library of transformers, which may clean (see Preprocessing data), reduce (see Unsupervised dimensionality reduction), expand (see Kernel Approximation) or generate (see Fea..." />
<meta property="og:image" content="https://scikit-learn.org/stable/_static/scikit-learn-logo-small.png" />
<meta property="og:image:alt" content="scikit-learn" />
<meta name="description" content="scikit-learn provides a library of transformers, which may clean (see Preprocessing data), reduce (see Unsupervised dimensionality reduction), expand (see Kernel Approximation) or generate (see Fea..." />

    <title>6. Dataset transformations &#8212; scikit-learn 1.6.1 documentation</title>
  
  
  
  <script data-cfasync="false">
    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
  </script>
  <!--
    this give us a css class that will be invisible only if js is disabled
  -->
  <noscript>
    <style>
      .pst-js-only { display: none !important; }

    </style>
  </noscript>
  
  <!-- Loaded before other Sphinx assets -->
  <link href="_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link href="_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />

    <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=a746c00c" />
    <link rel="stylesheet" type="text/css" href="_static/copybutton.css?v=76b2166b" />
    <link rel="stylesheet" type="text/css" href="_static/plot_directive.css" />
    <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Vibur" />
    <link rel="stylesheet" type="text/css" href="_static/jupyterlite_sphinx.css?v=e3ca86de" />
    <link rel="stylesheet" type="text/css" href="_static/sg_gallery.css?v=d2d258e8" />
    <link rel="stylesheet" type="text/css" href="_static/sg_gallery-binder.css?v=f4aeca0c" />
    <link rel="stylesheet" type="text/css" href="_static/sg_gallery-dataframe.css?v=2082cf3c" />
    <link rel="stylesheet" type="text/css" href="_static/sg_gallery-rendered-html.css?v=1277b6f3" />
    <link rel="stylesheet" type="text/css" href="_static/sphinx-design.min.css?v=95c83b7e" />
    <link rel="stylesheet" type="text/css" href="_static/styles/colors.css?v=cc94ab7d" />
    <link rel="stylesheet" type="text/css" href="_static/styles/custom.css?v=d67e4bb0" />
  
  <!-- So that users can add custom icons -->
  <script src="_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script>
  <!-- Pre-loaded scripts that we'll load fully later -->
  <link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />

    <script src="_static/documentation_options.js?v=d6a008b6"></script>
    <script src="_static/doctools.js?v=9a2dae69"></script>
    <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
    <script src="_static/clipboard.min.js?v=a7894cd8"></script>
    <script src="_static/copybutton.js?v=97f0b27d"></script>
    <script src="_static/jupyterlite_sphinx.js?v=d6bdf5f8"></script>
    <script src="_static/design-tabs.js?v=f930bc37"></script>
    <script data-domain="scikit-learn.org" defer="defer" src="https://views.scientific-python.org/js/script.js"></script>
    <script async="async" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js"></script>
    <script>DOCUMENTATION_OPTIONS.pagename = 'data_transforms';</script>
    <script>
        DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
        DOCUMENTATION_OPTIONS.theme_switcher_json_url = 'https://scikit-learn.org/dev/_static/versions.json';
        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.6.1';
        DOCUMENTATION_OPTIONS.show_version_warning_banner =
            true;
        </script>
    <script src="_static/scripts/dropdown.js?v=e2048168"></script>
    <script src="_static/scripts/version-switcher.js?v=a6dd8357"></script>
    <script src="_static/scripts/sg_plotly_resize.js?v=eeb41cab"></script>
    <link rel="canonical" href="https://scikit-learn.org/stable/data_transforms.html" />
    <link rel="icon" href="_static/favicon.ico"/>
    <link rel="author" title="About these documents" href="about.html" />
    <link rel="search" title="Search" href="search.html" />
    <link rel="next" title="6.1. Pipelines and composite estimators" href="modules/compose.html" />
    <link rel="prev" title="5. Visualizations" href="visualizations.html" />
  <meta name="viewport" content="width=device-width, initial-scale=1"/>
  <meta name="docsearch:language" content="en"/>
  <meta name="docsearch:version" content="1.6" />
  </head>
  
  
  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">

  
  
  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
  
  <div id="pst-scroll-pixel-helper"></div>
  
  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
    <i class="fa-solid fa-arrow-up"></i>Back to top</button>

  
  <dialog id="pst-search-dialog">
    
<form class="bd-search d-flex align-items-center"
      action="search.html"
      method="get">
  <i class="fa-solid fa-magnifying-glass"></i>
  <input type="search"
         class="form-control"
         name="q"
         placeholder="Search the docs ..."
         aria-label="Search the docs ..."
         autocomplete="off"
         autocorrect="off"
         autocapitalize="off"
         spellcheck="false"/>
  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form>
  </dialog>

  <div class="pst-async-banner-revealer d-none">
  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
</div>

  
    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
<div class="bd-header__inner bd-page-width">
  <button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation">
    <span class="fa-solid fa-bars"></span>
  </button>
  
  
  <div class=" navbar-header-items__start">
    
      <div class="navbar-item">

  
    
  

<a class="navbar-brand logo" href="index.html">
  
  
  
  
  
    
    
      
    
    
    <img src="_static/scikit-learn-logo-small.png" class="logo__image only-light" alt="scikit-learn homepage"/>
    <img src="_static/scikit-learn-logo-small.png" class="logo__image only-dark pst-js-only" alt="scikit-learn homepage"/>
  
  
</a></div>
    
  </div>
  
  <div class=" navbar-header-items">
    
    <div class="me-auto navbar-header-items__center">
      
        <div class="navbar-item">
<nav>
  <ul class="bd-navbar-elements navbar-nav">
    
<li class="nav-item ">
  <a class="nav-link nav-internal" href="install.html">
    Install
  </a>
</li>


<li class="nav-item current active">
  <a class="nav-link nav-internal" href="user_guide.html">
    User Guide
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="api/index.html">
    API
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="auto_examples/index.html">
    Examples
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-external" href="https://blog.scikit-learn.org/">
    Community
  </a>
</li>

            <li class="nav-item dropdown">
                <button class="btn dropdown-toggle nav-item" type="button"
                data-bs-toggle="dropdown" aria-expanded="false"
                aria-controls="pst-nav-more-links">
                    More
                </button>
                <ul id="pst-nav-more-links" class="dropdown-menu">
                    
<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="getting_started.html">
    Getting Started
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="whats_new.html">
    Release History
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="glossary.html">
    Glossary
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-external" href="https://scikit-learn.org/dev/developers/index.html">
    Development
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="faq.html">
    FAQ
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="support.html">
    Support
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="related_projects.html">
    Related Projects
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="roadmap.html">
    Roadmap
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="governance.html">
    Governance
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="about.html">
    About us
  </a>
</li>

                </ul>
            </li>
            
  </ul>
</nav></div>
      
    </div>
    
    
    <div class="navbar-header-items__end">
      
        <div class="navbar-item navbar-persistent--container">
          

<button class="btn btn-sm pst-navbar-icon search-button search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
</button>
        </div>
      
      
        <div class="navbar-item">

<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
</button></div>
      
        <div class="navbar-item"><ul class="navbar-icon-links"
    aria-label="Icon Links">
        <li class="nav-item">
          
          
          
          
          
          
          
          
          <a href="https://github.com/scikit-learn/scikit-learn" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i>
            <span class="sr-only">GitHub</span></a>
        </li>
</ul></div>
      
        <div class="navbar-item">
<div class="version-switcher__container dropdown pst-js-only">
  <button id="pst-version-switcher-button-2"
    type="button"
    class="version-switcher__button btn btn-sm dropdown-toggle"
    data-bs-toggle="dropdown"
    aria-haspopup="listbox"
    aria-controls="pst-version-switcher-list-2"
    aria-label="Version switcher list"
  >
    Choose version  <!-- this text may get changed later by javascript -->
    <span class="caret"></span>
  </button>
  <div id="pst-version-switcher-list-2"
    class="version-switcher__menu dropdown-menu list-group-flush py-0"
    role="listbox" aria-labelledby="pst-version-switcher-button-2">
    <!-- dropdown will be populated by javascript on page load -->
  </div>
</div></div>
      
    </div>
    
  </div>
  
  
    <div class="navbar-persistent--mobile">

<button class="btn btn-sm pst-navbar-icon search-button search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
</button>
    </div>
  

  
    <button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page">
      <span class="fa-solid fa-outdent"></span>
    </button>
  
</div>

    </header>
  

  <div class="bd-container">
    <div class="bd-container__inner bd-page-width">
      
      
      
      <dialog id="pst-primary-sidebar-modal"></dialog>
      <div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar">
        

  
  <div class="sidebar-header-items sidebar-primary__section">
    
    
      <div class="sidebar-header-items__center">
        
          
          
            <div class="navbar-item">
<nav>
  <ul class="bd-navbar-elements navbar-nav">
    
<li class="nav-item ">
  <a class="nav-link nav-internal" href="install.html">
    Install
  </a>
</li>


<li class="nav-item current active">
  <a class="nav-link nav-internal" href="user_guide.html">
    User Guide
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="api/index.html">
    API
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="auto_examples/index.html">
    Examples
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-external" href="https://blog.scikit-learn.org/">
    Community
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="getting_started.html">
    Getting Started
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="whats_new.html">
    Release History
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="glossary.html">
    Glossary
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-external" href="https://scikit-learn.org/dev/developers/index.html">
    Development
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="faq.html">
    FAQ
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="support.html">
    Support
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="related_projects.html">
    Related Projects
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="roadmap.html">
    Roadmap
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="governance.html">
    Governance
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="about.html">
    About us
  </a>
</li>

  </ul>
</nav></div>
          
        
      </div>
    
    
    
      <div class="sidebar-header-items__end">
        
          <div class="navbar-item">

<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
</button></div>
        
          <div class="navbar-item"><ul class="navbar-icon-links"
    aria-label="Icon Links">
        <li class="nav-item">
          
          
          
          
          
          
          
          
          <a href="https://github.com/scikit-learn/scikit-learn" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i>
            <span class="sr-only">GitHub</span></a>
        </li>
</ul></div>
        
          <div class="navbar-item">
<div class="version-switcher__container dropdown pst-js-only">
  <button id="pst-version-switcher-button-3"
    type="button"
    class="version-switcher__button btn btn-sm dropdown-toggle"
    data-bs-toggle="dropdown"
    aria-haspopup="listbox"
    aria-controls="pst-version-switcher-list-3"
    aria-label="Version switcher list"
  >
    Choose version  <!-- this text may get changed later by javascript -->
    <span class="caret"></span>
  </button>
  <div id="pst-version-switcher-list-3"
    class="version-switcher__menu dropdown-menu list-group-flush py-0"
    role="listbox" aria-labelledby="pst-version-switcher-button-3">
    <!-- dropdown will be populated by javascript on page load -->
  </div>
</div></div>
        
      </div>
    
  </div>
  
    <div class="sidebar-primary-items__start sidebar-primary__section">
        <div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
     aria-label="Section Navigation">
  <p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
  <div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1 has-children"><a class="reference internal" href="supervised_learning.html">1. Supervised learning</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/linear_model.html">1.1. Linear Models</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/lda_qda.html">1.2. Linear and Quadratic Discriminant Analysis</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/kernel_ridge.html">1.3. Kernel ridge regression</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/svm.html">1.4. Support Vector Machines</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/sgd.html">1.5. Stochastic Gradient Descent</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/neighbors.html">1.6. Nearest Neighbors</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/gaussian_process.html">1.7. Gaussian Processes</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/cross_decomposition.html">1.8. Cross decomposition</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/naive_bayes.html">1.9. Naive Bayes</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/tree.html">1.10. Decision Trees</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/ensemble.html">1.11. Ensembles: Gradient boosting, random forests, bagging, voting, stacking</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/multiclass.html">1.12. Multiclass and multioutput algorithms</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/feature_selection.html">1.13. Feature selection</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/semi_supervised.html">1.14. Semi-supervised learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/isotonic.html">1.15. Isotonic regression</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/calibration.html">1.16. Probability calibration</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/neural_networks_supervised.html">1.17. Neural network models (supervised)</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="unsupervised_learning.html">2. Unsupervised learning</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/mixture.html">2.1. Gaussian mixture models</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/manifold.html">2.2. Manifold learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/clustering.html">2.3. Clustering</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/biclustering.html">2.4. Biclustering</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/decomposition.html">2.5. Decomposing signals in components (matrix factorization problems)</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/covariance.html">2.6. Covariance estimation</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/outlier_detection.html">2.7. Novelty and Outlier Detection</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/density.html">2.8. Density Estimation</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/neural_networks_unsupervised.html">2.9. Neural network models (unsupervised)</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="model_selection.html">3. Model selection and evaluation</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/cross_validation.html">3.1. Cross-validation: evaluating estimator performance</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/grid_search.html">3.2. Tuning the hyper-parameters of an estimator</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/classification_threshold.html">3.3. Tuning the decision threshold for class prediction</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/model_evaluation.html">3.4. Metrics and scoring: quantifying the quality of predictions</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/learning_curve.html">3.5. Validation curves: plotting scores to evaluate models</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="inspection.html">4. Inspection</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/partial_dependence.html">4.1. Partial Dependence and Individual Conditional Expectation plots</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/permutation_importance.html">4.2. Permutation feature importance</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="visualizations.html">5. Visualizations</a></li>
<li class="toctree-l1 current active has-children"><a class="current reference internal" href="#">6. Dataset transformations</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/compose.html">6.1. Pipelines and composite estimators</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/feature_extraction.html">6.2. Feature extraction</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/preprocessing.html">6.3. Preprocessing data</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/impute.html">6.4. Imputation of missing values</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/unsupervised_reduction.html">6.5. Unsupervised dimensionality reduction</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/random_projection.html">6.6. Random Projection</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/kernel_approximation.html">6.7. Kernel Approximation</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/metrics.html">6.8. Pairwise metrics, Affinities and Kernels</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/preprocessing_targets.html">6.9. Transforming the prediction target (<code class="docutils literal notranslate"><span class="pre">y</span></code>)</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="datasets.html">7. Dataset loading utilities</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="datasets/toy_dataset.html">7.1. Toy datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="datasets/real_world.html">7.2. Real world datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="datasets/sample_generators.html">7.3. Generated datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="datasets/loading_other_datasets.html">7.4. Loading other datasets</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="computing.html">8. Computing with scikit-learn</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="computing/scaling_strategies.html">8.1. Strategies to scale computationally: bigger data</a></li>
<li class="toctree-l2"><a class="reference internal" href="computing/computational_performance.html">8.2. Computational Performance</a></li>
<li class="toctree-l2"><a class="reference internal" href="computing/parallelism.html">8.3. Parallelism, resource management, and configuration</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="model_persistence.html">9. Model persistence</a></li>
<li class="toctree-l1"><a class="reference internal" href="common_pitfalls.html">10. Common pitfalls and recommended practices</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="dispatching.html">11. Dispatching</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/array_api.html">11.1. Array API support (experimental)</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="machine_learning_map.html">12. Choosing the right estimator</a></li>
<li class="toctree-l1"><a class="reference internal" href="presentations.html">13. External Resources, Videos and Talks</a></li>
</ul>
</div>
</nav></div>
    </div>
  
  
  <div class="sidebar-primary-items__end sidebar-primary__section">
  </div>


      </div>
      
      <main id="main-content" class="bd-main" role="main">
        
        
          <div class="bd-content">
            <div class="bd-article-container">
              
              <div class="bd-header-article d-print-none">
<div class="header-article-items header-article__inner">
  
    <div class="header-article-items__start">
      
        <div class="header-article-item">

<nav aria-label="Breadcrumb" class="d-print-none">
  <ul class="bd-breadcrumbs">
    
    <li class="breadcrumb-item breadcrumb-home">
      <a href="index.html" class="nav-link" aria-label="Home">
        <i class="fa-solid fa-home"></i>
      </a>
    </li>
    
    <li class="breadcrumb-item"><a href="user_guide.html" class="nav-link">User Guide</a></li>
    
    <li class="breadcrumb-item active" aria-current="page"><span class="ellipsis"><span class="section-number">6. </span>Dataset transformations</span></li>
  </ul>
</nav>
</div>
      
    </div>
  
  
</div>
</div>
              
              
              
                
<div id="searchbox"></div>
                <article class="bd-article">
                  
  <section id="dataset-transformations">
<span id="data-transforms"></span><h1><span class="section-number">6. </span>Dataset transformations<a class="headerlink" href="#dataset-transformations" title="Link to this heading">#</a></h1>
<p>scikit-learn provides a library of transformers, which may clean (see
<a class="reference internal" href="modules/preprocessing.html#preprocessing"><span class="std std-ref">Preprocessing data</span></a>), reduce (see <a class="reference internal" href="modules/unsupervised_reduction.html#data-reduction"><span class="std std-ref">Unsupervised dimensionality reduction</span></a>), expand (see
<a class="reference internal" href="modules/kernel_approximation.html#kernel-approximation"><span class="std std-ref">Kernel Approximation</span></a>) or generate (see <a class="reference internal" href="modules/feature_extraction.html#feature-extraction"><span class="std std-ref">Feature extraction</span></a>)
feature representations.</p>
<p>Like other estimators, these are represented by classes with a <code class="docutils literal notranslate"><span class="pre">fit</span></code> method,
which learns model parameters (e.g. mean and standard deviation for
normalization) from a training set, and a <code class="docutils literal notranslate"><span class="pre">transform</span></code> method which applies
this transformation model to unseen data. <code class="docutils literal notranslate"><span class="pre">fit_transform</span></code> may be more
convenient and efficient for modelling and transforming the training data
simultaneously.</p>
<p>Combining such transformers, either in parallel or series is covered in
<a class="reference internal" href="modules/compose.html#combining-estimators"><span class="std std-ref">Pipelines and composite estimators</span></a>. <a class="reference internal" href="modules/metrics.html#metrics"><span class="std std-ref">Pairwise metrics, Affinities and Kernels</span></a> covers transforming feature
spaces into affinity matrices, while <a class="reference internal" href="modules/preprocessing_targets.html#preprocessing-targets"><span class="std std-ref">Transforming the prediction target (y)</span></a> considers
transformations of the target space (e.g. categorical labels) for use in
scikit-learn.</p>
<div class="toctree-wrapper compound">
<ul>
<li class="toctree-l1"><a class="reference internal" href="modules/compose.html">6.1. Pipelines and composite estimators</a><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/compose.html#pipeline-chaining-estimators">6.1.1. Pipeline: chaining estimators</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/compose.html#transforming-target-in-regression">6.1.2. Transforming target in regression</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/compose.html#featureunion-composite-feature-spaces">6.1.3. FeatureUnion: composite feature spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/compose.html#columntransformer-for-heterogeneous-data">6.1.4. ColumnTransformer for heterogeneous data</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/compose.html#visualizing-composite-estimators">6.1.5. Visualizing Composite Estimators</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="modules/feature_extraction.html">6.2. Feature extraction</a><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/feature_extraction.html#loading-features-from-dicts">6.2.1. Loading features from dicts</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/feature_extraction.html#feature-hashing">6.2.2. Feature hashing</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/feature_extraction.html#text-feature-extraction">6.2.3. Text feature extraction</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/feature_extraction.html#image-feature-extraction">6.2.4. Image feature extraction</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="modules/preprocessing.html">6.3. Preprocessing data</a><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/preprocessing.html#standardization-or-mean-removal-and-variance-scaling">6.3.1. Standardization, or mean removal and variance scaling</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/preprocessing.html#non-linear-transformation">6.3.2. Non-linear transformation</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/preprocessing.html#normalization">6.3.3. Normalization</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/preprocessing.html#encoding-categorical-features">6.3.4. Encoding categorical features</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/preprocessing.html#discretization">6.3.5. Discretization</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/preprocessing.html#imputation-of-missing-values">6.3.6. Imputation of missing values</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/preprocessing.html#generating-polynomial-features">6.3.7. Generating polynomial features</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/preprocessing.html#custom-transformers">6.3.8. Custom transformers</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="modules/impute.html">6.4. Imputation of missing values</a><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/impute.html#univariate-vs-multivariate-imputation">6.4.1. Univariate vs. Multivariate Imputation</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/impute.html#univariate-feature-imputation">6.4.2. Univariate feature imputation</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/impute.html#multivariate-feature-imputation">6.4.3. Multivariate feature imputation</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/impute.html#nearest-neighbors-imputation">6.4.4. Nearest neighbors imputation</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/impute.html#keeping-the-number-of-features-constant">6.4.5. Keeping the number of features constant</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/impute.html#marking-imputed-values">6.4.6. Marking imputed values</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/impute.html#estimators-that-handle-nan-values">6.4.7. Estimators that handle NaN values</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="modules/unsupervised_reduction.html">6.5. Unsupervised dimensionality reduction</a><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/unsupervised_reduction.html#pca-principal-component-analysis">6.5.1. PCA: principal component analysis</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/unsupervised_reduction.html#random-projections">6.5.2. Random projections</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/unsupervised_reduction.html#feature-agglomeration">6.5.3. Feature agglomeration</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="modules/random_projection.html">6.6. Random Projection</a><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/random_projection.html#the-johnson-lindenstrauss-lemma">6.6.1. The Johnson-Lindenstrauss lemma</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/random_projection.html#gaussian-random-projection">6.6.2. Gaussian random projection</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/random_projection.html#sparse-random-projection">6.6.3. Sparse random projection</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/random_projection.html#inverse-transform">6.6.4. Inverse Transform</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="modules/kernel_approximation.html">6.7. Kernel Approximation</a><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/kernel_approximation.html#nystroem-method-for-kernel-approximation">6.7.1. Nystroem Method for Kernel Approximation</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/kernel_approximation.html#radial-basis-function-kernel">6.7.2. Radial Basis Function Kernel</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/kernel_approximation.html#additive-chi-squared-kernel">6.7.3. Additive Chi Squared Kernel</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/kernel_approximation.html#skewed-chi-squared-kernel">6.7.4. Skewed Chi Squared Kernel</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/kernel_approximation.html#polynomial-kernel-approximation-via-tensor-sketch">6.7.5. Polynomial Kernel Approximation via Tensor Sketch</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/kernel_approximation.html#mathematical-details">6.7.6. Mathematical Details</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="modules/metrics.html">6.8. Pairwise metrics, Affinities and Kernels</a><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/metrics.html#cosine-similarity">6.8.1. Cosine similarity</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/metrics.html#linear-kernel">6.8.2. Linear kernel</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/metrics.html#polynomial-kernel">6.8.3. Polynomial kernel</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/metrics.html#sigmoid-kernel">6.8.4. Sigmoid kernel</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/metrics.html#rbf-kernel">6.8.5. RBF kernel</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/metrics.html#laplacian-kernel">6.8.6. Laplacian kernel</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/metrics.html#chi-squared-kernel">6.8.7. Chi-squared kernel</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="modules/preprocessing_targets.html">6.9. Transforming the prediction target (<code class="docutils literal notranslate"><span class="pre">y</span></code>)</a><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/preprocessing_targets.html#label-binarization">6.9.1. Label binarization</a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/preprocessing_targets.html#label-encoding">6.9.2. Label encoding</a></li>
</ul>
</li>
</ul>
</div>
</section>


                </article>
              
              
              
                <footer class="bd-footer-article">
                  <div class="footer-article-items footer-article__inner">
  
    <div class="footer-article-item">
<div class="prev-next-area">
    <a class="left-prev"
       href="visualizations.html"
       title="previous page">
      <i class="fa-solid fa-angle-left"></i>
      <div class="prev-next-info">
        <p class="prev-next-subtitle">previous</p>
        <p class="prev-next-title"><span class="section-number">5. </span>Visualizations</p>
      </div>
    </a>
    <a class="right-next"
       href="modules/compose.html"
       title="next page">
      <div class="prev-next-info">
        <p class="prev-next-subtitle">next</p>
        <p class="prev-next-title"><span class="section-number">6.1. </span>Pipelines and composite estimators</p>
      </div>
      <i class="fa-solid fa-angle-right"></i>
    </a>
</div></div>
  
</div>
                </footer>
              
              
              
            </div>
            
            
              
                <dialog id="pst-secondary-sidebar-modal"></dialog>
                <div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">


  <div class="sidebar-secondary-item">
  <div role="note" aria-label="source link">
    <h3>This Page</h3>
    <ul class="this-page-menu">
      <li><a href="_sources/data_transforms.rst.txt"
            rel="nofollow">Show Source</a></li>
    </ul>
   </div></div>

</div></div>
              
            
          </div>
          <footer class="bd-footer-content">
            
          </footer>
        
      </main>
    </div>
  </div>
  
  <!-- Scripts loaded after <body> so the DOM is not blocked -->
  <script defer src="_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>

  <footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
  
    <div class="footer-items__start">
      
        <div class="footer-item">

  <p class="copyright">
    
      © Copyright 2007 - 2025, scikit-learn developers (BSD License).
      <br/>
    
  </p>
</div>
      
    </div>
  
  
  
</div>

  </footer>
  </body>
</html>