{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "dMUdSvjnKO3n",
        "outputId": "311e3f59-df16-4346-87cf-569faf947fdd"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([1, 1, 2, 6])"
            ]
          },
          "metadata": {},
          "execution_count": 2
        }
      ],
      "source": [
        "from sklearn.preprocessing import LabelEncoder\n",
        "le = LabelEncoder()\n",
        "le.fit([1, 2, 2, 6])\n",
        "le.classes_\n",
        "le.transform([1, 1, 2, 6])\n",
        "le.inverse_transform([0, 0, 1, 2])"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.preprocessing import LabelEncoder\n",
        "\n",
        "le = LabelEncoder()\n",
        "print(\"LabelEncoder initialized.\")\n",
        "\n",
        "le.fit([1, 2, 2, 6])\n",
        "print(\"Fitted classes:\", le.classes_)\n",
        "\n",
        "print(\"Transformed values:\", le.transform([1, 1, 2, 6]))\n",
        "\n",
        "print(\"Inverse transformed values:\", le.inverse_transform([0, 0, 1, 2]))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ztNCqmqHKRrF",
        "outputId": "63dd3d43-2626-4ec2-844d-e6cab4aaee1f"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "LabelEncoder initialized.\n",
            "Fitted classes: [1 2 6]\n",
            "Transformed values: [0 0 1 2]\n",
            "Inverse transformed values: [1 1 2 6]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "QrmOo4pkKRtz"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.preprocessing import LabelEncoder\n",
        "import pandas as pd\n",
        "\n",
        "# Sample dataset\n",
        "data = pd.DataFrame({\n",
        "    'Fruit': ['Apple', 'Banana', 'Orange', 'Apple', 'Orange', 'Banana'],\n",
        "    'Price': [1.2, 0.5, 0.8, 1.3, 0.9, 0.6]\n",
        "})\n",
        "\n",
        "# Initialize and fit LabelEncoder\n",
        "le = LabelEncoder()\n",
        "data['Fruit_Encoded'] = le.fit_transform(data['Fruit'])\n",
        "\n",
        "print(data)\n",
        "print(\"Category Mapping:\", le.classes_)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "9oLyNGZVKRxs",
        "outputId": "9f9a6461-c5c3-40ab-c465-21727f1259dd"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "    Fruit  Price  Fruit_Encoded\n",
            "0   Apple    1.2              0\n",
            "1  Banana    0.5              1\n",
            "2  Orange    0.8              2\n",
            "3   Apple    1.3              0\n",
            "4  Orange    0.9              2\n",
            "5  Banana    0.6              1\n",
            "Category Mapping: ['Apple' 'Banana' 'Orange']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "data['Fruit_Encoded_Pandas'] = data['Fruit'].astype('category').cat.codes\n",
        "print(data)\n",
        "print(\"Category Mapping:\", dict(enumerate(data['Fruit'].astype('category').cat.categories)))\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Zs3ex8hJO3B-",
        "outputId": "8483d3eb-852b-4417-b4d5-ab0550c205a3"
      },
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "    Fruit  Price  Fruit_Encoded  Fruit_Encoded_Pandas\n",
            "0   Apple    1.2              0                     0\n",
            "1  Banana    0.5              1                     1\n",
            "2  Orange    0.8              2                     2\n",
            "3   Apple    1.3              0                     0\n",
            "4  Orange    0.9              2                     2\n",
            "5  Banana    0.6              1                     1\n",
            "Category Mapping: {0: 'Apple', 1: 'Banana', 2: 'Orange'}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "data = pd.DataFrame({\n",
        "    'Satisfaction': ['Low', 'High', 'Medium', 'Low', 'High'],\n",
        "    'Score': [3, 8, 5, 2, 9]\n",
        "})\n",
        "\n",
        "satisfaction_order = {'Low': 0, 'Medium': 1, 'High': 2}\n",
        "data['Satisfaction_Encoded'] = data['Satisfaction'].map(satisfaction_order)\n",
        "\n",
        "print(data)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "RRBziEq0O3FD",
        "outputId": "95806b62-a9a7-4cfd-b6dc-9183b582aee5"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "  Satisfaction  Score  Satisfaction_Encoded\n",
            "0          Low      3                     0\n",
            "1         High      8                     2\n",
            "2       Medium      5                     1\n",
            "3          Low      2                     0\n",
            "4         High      9                     2\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.pipeline import Pipeline\n",
        "from sklearn.tree import DecisionTreeClassifier\n",
        "\n",
        "pipeline = Pipeline([\n",
        "    ('label_encoding', LabelEncoder()),\n",
        "    ('model', DecisionTreeClassifier())\n",
        "])\n"
      ],
      "metadata": {
        "id": "WTF5vOMJO3HV"
      },
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "train = pd.DataFrame({'City': ['Delhi', 'Mumbai', 'Chennai', 'Delhi']})\n",
        "test = pd.DataFrame({'City': ['Mumbai', 'Kolkata']})\n",
        "\n",
        "le = LabelEncoder()\n",
        "train['City_Encoded'] = le.fit_transform(train['City'])\n",
        "\n",
        "# Handle unseen category safely\n",
        "test['City_Encoded'] = test['City'].apply(lambda x: le.transform([x])[0] if x in le.classes_ else -1)\n",
        "\n",
        "print(train)\n",
        "print(test)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "GurrRKK_QPni",
        "outputId": "c60554ce-18d1-462c-8c99-1c3fe04df0b9"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "      City  City_Encoded\n",
            "0    Delhi             1\n",
            "1   Mumbai             2\n",
            "2  Chennai             0\n",
            "3    Delhi             1\n",
            "      City  City_Encoded\n",
            "0   Mumbai             2\n",
            "1  Kolkata            -1\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "dzJ1L-H_QPql"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}