{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "import matplotlib.pyplot as plt\n",
        "from sklearn.model_selection import train_test_split\n",
        "\n",
        "data = pd.read_csv(\"data.csv\")\n",
        "print (data.head)\n",
        "data.info()\n",
        "\n",
        "\n",
        "# Preprocess data\n",
        "data.drop(['Unnamed: 32', 'id'], axis=1, inplace=True)\n",
        "data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0})\n",
        "\n",
        "y = data['diagnosis'].values\n",
        "x_data = data.drop(['diagnosis'], axis=1)\n",
        "\n",
        "# Normalize features\n",
        "x = (x_data - x_data.min()) / (x_data.max() - x_data.min())\n",
        "\n",
        "from sklearn.model_selection import train_test_split\n",
        "x_train, x_test, y_train, y_test = train_test_split(\n",
        "    x, y, test_size = 0.15, random_state = 42)\n",
        "\n",
        "x_train = x_train.T\n",
        "x_test = x_test.T\n",
        "y_train = y_train.T\n",
        "y_test = y_test.T\n",
        "\n",
        "print(\"x train: \", x_train.shape)\n",
        "print(\"x test: \", x_test.shape)\n",
        "print(\"y train: \", y_train.shape)\n",
        "print(\"y test: \", y_test.shape)\n",
        "\n",
        "\n",
        "def initialize_weights_and_bias(dimension):\n",
        "    w = np.random.randn(dimension, 1) * 0.01  # Initialize with small random values\n",
        "    b = 0.0\n",
        "    return w, b\n",
        "\n",
        "def sigmoid(z):\n",
        "    return 1 / (1 + np.exp(-z))\n",
        "\n",
        "def forward_backward_propagation(w, b, x_train, y_train):\n",
        "    m = x_train.shape[1]\n",
        "    z = np.dot(w.T, x_train) + b\n",
        "    y_head = sigmoid(z)\n",
        "\n",
        "    # Calculate cost\n",
        "    cost = (-1/m) * np.sum(y_train * np.log(y_head) + (1 - y_train) * np.log(1 - y_head))\n",
        "\n",
        "    # Backward propagation\n",
        "    derivative_weight = (1/m) * np.dot(x_train, (y_head - y_train).T)\n",
        "    derivative_bias = (1/m) * np.sum(y_head - y_train)\n",
        "\n",
        "    gradients = {\"derivative_weight\": derivative_weight, \"derivative_bias\": derivative_bias}\n",
        "    return cost, gradients\n",
        "\n",
        "def update(w, b, x_train, y_train, learning_rate, num_iterations):\n",
        "    costs = []\n",
        "    gradients = {}  # Store gradients\n",
        "    for i in range(num_iterations):\n",
        "        cost, grad = forward_backward_propagation(w, b, x_train, y_train)\n",
        "        w -= learning_rate * grad[\"derivative_weight\"]\n",
        "        b -= learning_rate * grad[\"derivative_bias\"]\n",
        "\n",
        "        if i % 100 == 0:\n",
        "            costs.append(cost)\n",
        "            print(f\"Cost after iteration {i}: {cost}\")\n",
        "\n",
        "    parameters = {\"weight\": w, \"bias\": b}\n",
        "    return parameters, gradients, costs\n",
        "\n",
        "\n",
        "def predict(w, b, x_test):\n",
        "    m = x_test.shape[1]\n",
        "    y_prediction = np.zeros((1, m))\n",
        "    z = sigmoid(np.dot(w.T, x_test) + b)\n",
        "\n",
        "    for i in range(z.shape[1]):\n",
        "        y_prediction[0, i] = 1 if z[0, i] > 0.5 else 0\n",
        "\n",
        "    return y_prediction\n",
        "\n",
        "def logistic_regression(x_train, y_train, x_test, y_test, learning_rate=0.01, num_iterations=1000):\n",
        "    dimension = x_train.shape[0]\n",
        "    w, b = initialize_weights_and_bias(dimension)\n",
        "    parameters, gradients, costs = update(w, b, x_train, y_train, learning_rate, num_iterations)\n",
        "\n",
        "    y_prediction_test = predict(parameters[\"weight\"], parameters[\"bias\"], x_test)\n",
        "    y_prediction_train = predict(parameters[\"weight\"], parameters[\"bias\"], x_train)\n",
        "\n",
        "    print(f\"Train accuracy: {100 - np.mean(np.abs(y_prediction_train - y_train)) * 100}%\")\n",
        "    print(f\"Test accuracy: {100 - np.mean(np.abs(y_prediction_test - y_test)) * 100}%\")\n",
        "\n",
        "# Run logistic regression\n",
        "logistic_regression(x_train, y_train, x_test, y_test, learning_rate=0.01, num_iterations=1000)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "7_PZ6lqtX3pB",
        "outputId": "4b6f8628-2896-4253-a2c2-26a43cb02dcb"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "<bound method NDFrame.head of            id diagnosis  radius_mean  texture_mean  perimeter_mean  area_mean  \\\n",
            "0      842302         M        17.99         10.38          122.80     1001.0   \n",
            "1      842517         M        20.57         17.77          132.90     1326.0   \n",
            "2    84300903         M        19.69         21.25          130.00     1203.0   \n",
            "3    84348301         M        11.42         20.38           77.58      386.1   \n",
            "4    84358402         M        20.29         14.34          135.10     1297.0   \n",
            "..        ...       ...          ...           ...             ...        ...   \n",
            "564    926424         M        21.56         22.39          142.00     1479.0   \n",
            "565    926682         M        20.13         28.25          131.20     1261.0   \n",
            "566    926954         M        16.60         28.08          108.30      858.1   \n",
            "567    927241         M        20.60         29.33          140.10     1265.0   \n",
            "568     92751         B         7.76         24.54           47.92      181.0   \n",
            "\n",
            "     smoothness_mean  compactness_mean  concavity_mean  concave points_mean  \\\n",
            "0            0.11840           0.27760         0.30010              0.14710   \n",
            "1            0.08474           0.07864         0.08690              0.07017   \n",
            "2            0.10960           0.15990         0.19740              0.12790   \n",
            "3            0.14250           0.28390         0.24140              0.10520   \n",
            "4            0.10030           0.13280         0.19800              0.10430   \n",
            "..               ...               ...             ...                  ...   \n",
            "564          0.11100           0.11590         0.24390              0.13890   \n",
            "565          0.09780           0.10340         0.14400              0.09791   \n",
            "566          0.08455           0.10230         0.09251              0.05302   \n",
            "567          0.11780           0.27700         0.35140              0.15200   \n",
            "568          0.05263           0.04362         0.00000              0.00000   \n",
            "\n",
            "     ...  texture_worst  perimeter_worst  area_worst  smoothness_worst  \\\n",
            "0    ...          17.33           184.60      2019.0           0.16220   \n",
            "1    ...          23.41           158.80      1956.0           0.12380   \n",
            "2    ...          25.53           152.50      1709.0           0.14440   \n",
            "3    ...          26.50            98.87       567.7           0.20980   \n",
            "4    ...          16.67           152.20      1575.0           0.13740   \n",
            "..   ...            ...              ...         ...               ...   \n",
            "564  ...          26.40           166.10      2027.0           0.14100   \n",
            "565  ...          38.25           155.00      1731.0           0.11660   \n",
            "566  ...          34.12           126.70      1124.0           0.11390   \n",
            "567  ...          39.42           184.60      1821.0           0.16500   \n",
            "568  ...          30.37            59.16       268.6           0.08996   \n",
            "\n",
            "     compactness_worst  concavity_worst  concave points_worst  symmetry_worst  \\\n",
            "0              0.66560           0.7119                0.2654          0.4601   \n",
            "1              0.18660           0.2416                0.1860          0.2750   \n",
            "2              0.42450           0.4504                0.2430          0.3613   \n",
            "3              0.86630           0.6869                0.2575          0.6638   \n",
            "4              0.20500           0.4000                0.1625          0.2364   \n",
            "..                 ...              ...                   ...             ...   \n",
            "564            0.21130           0.4107                0.2216          0.2060   \n",
            "565            0.19220           0.3215                0.1628          0.2572   \n",
            "566            0.30940           0.3403                0.1418          0.2218   \n",
            "567            0.86810           0.9387                0.2650          0.4087   \n",
            "568            0.06444           0.0000                0.0000          0.2871   \n",
            "\n",
            "     fractal_dimension_worst  Unnamed: 32  \n",
            "0                    0.11890          NaN  \n",
            "1                    0.08902          NaN  \n",
            "2                    0.08758          NaN  \n",
            "3                    0.17300          NaN  \n",
            "4                    0.07678          NaN  \n",
            "..                       ...          ...  \n",
            "564                  0.07115          NaN  \n",
            "565                  0.06637          NaN  \n",
            "566                  0.07820          NaN  \n",
            "567                  0.12400          NaN  \n",
            "568                  0.07039          NaN  \n",
            "\n",
            "[569 rows x 33 columns]>\n",
            "<class 'pandas.core.frame.DataFrame'>\n",
            "RangeIndex: 569 entries, 0 to 568\n",
            "Data columns (total 33 columns):\n",
            " #   Column                   Non-Null Count  Dtype  \n",
            "---  ------                   --------------  -----  \n",
            " 0   id                       569 non-null    int64  \n",
            " 1   diagnosis                569 non-null    object \n",
            " 2   radius_mean              569 non-null    float64\n",
            " 3   texture_mean             569 non-null    float64\n",
            " 4   perimeter_mean           569 non-null    float64\n",
            " 5   area_mean                569 non-null    float64\n",
            " 6   smoothness_mean          569 non-null    float64\n",
            " 7   compactness_mean         569 non-null    float64\n",
            " 8   concavity_mean           569 non-null    float64\n",
            " 9   concave points_mean      569 non-null    float64\n",
            " 10  symmetry_mean            569 non-null    float64\n",
            " 11  fractal_dimension_mean   569 non-null    float64\n",
            " 12  radius_se                569 non-null    float64\n",
            " 13  texture_se               569 non-null    float64\n",
            " 14  perimeter_se             569 non-null    float64\n",
            " 15  area_se                  569 non-null    float64\n",
            " 16  smoothness_se            569 non-null    float64\n",
            " 17  compactness_se           569 non-null    float64\n",
            " 18  concavity_se             569 non-null    float64\n",
            " 19  concave points_se        569 non-null    float64\n",
            " 20  symmetry_se              569 non-null    float64\n",
            " 21  fractal_dimension_se     569 non-null    float64\n",
            " 22  radius_worst             569 non-null    float64\n",
            " 23  texture_worst            569 non-null    float64\n",
            " 24  perimeter_worst          569 non-null    float64\n",
            " 25  area_worst               569 non-null    float64\n",
            " 26  smoothness_worst         569 non-null    float64\n",
            " 27  compactness_worst        569 non-null    float64\n",
            " 28  concavity_worst          569 non-null    float64\n",
            " 29  concave points_worst     569 non-null    float64\n",
            " 30  symmetry_worst           569 non-null    float64\n",
            " 31  fractal_dimension_worst  569 non-null    float64\n",
            " 32  Unnamed: 32              0 non-null      float64\n",
            "dtypes: float64(31), int64(1), object(1)\n",
            "memory usage: 146.8+ KB\n",
            "x train:  (30, 483)\n",
            "x test:  (30, 86)\n",
            "y train:  (483,)\n",
            "y test:  (86,)\n",
            "Cost after iteration 0: 0.6928136132424735\n",
            "Cost after iteration 100: 0.6650819203054067\n",
            "Cost after iteration 200: 0.6406057807006481\n",
            "Cost after iteration 300: 0.6182622214830192\n",
            "Cost after iteration 400: 0.5976893069647797\n",
            "Cost after iteration 500: 0.5786892834063715\n",
            "Cost after iteration 600: 0.5611076230947329\n",
            "Cost after iteration 700: 0.5448087404195209\n",
            "Cost after iteration 800: 0.529670875172974\n",
            "Cost after iteration 900: 0.5155845404270566\n",
            "Train accuracy: 90.6832298136646%\n",
            "Test accuracy: 88.37209302325581%\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "sWcy1yQqaVC3"
      },
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "GAjYhEzbjWli"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}