Skip to content

Commit 1817a85

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 8c2272ee9a921cf4e332bcb9ccf51e5d6834fbe3
1 parent 73d7416 commit 1817a85

File tree

2,072 files changed

+20510
-19734
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,072 files changed

+20510
-19734
lines changed

dev/.buildinfo

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: a7402a9c00db06ccbd715f91dd91d1c4
3+
config: 57e4b1dae32e35e9b88e1320022ed446
44
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

dev/_downloads/0b39f715b5e32f01df3d212b6d822b82/plot_calibration.py

-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
from sklearn.model_selection import train_test_split
3636

3737
n_samples = 50000
38-
n_bins = 3 # use 3 bins for calibration_curve as we have 3 clusters here
3938

4039
# Generate 3 blobs with 2 classes where the second blob contains
4140
# half positive samples and half negative samples. Probability in this
Binary file not shown.

dev/_downloads/0c15970ac17183d2bf864a9563081aeb/plot_calibration.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
},
3434
"outputs": [],
3535
"source": [
36-
"import numpy as np\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.model_selection import train_test_split\n\nn_samples = 50000\nn_bins = 3 # use 3 bins for calibration_curve as we have 3 clusters here\n\n# Generate 3 blobs with 2 classes where the second blob contains\n# half positive samples and half negative samples. Probability in this\n# blob is therefore 0.5.\ncenters = [(-5, -5), (0, 0), (5, 5)]\nX, y = make_blobs(n_samples=n_samples, centers=centers, shuffle=False, random_state=42)\n\ny[: n_samples // 2] = 0\ny[n_samples // 2 :] = 1\nsample_weight = np.random.RandomState(42).rand(y.shape[0])\n\n# split train, test for calibration\nX_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split(\n X, y, sample_weight, test_size=0.9, random_state=42\n)"
36+
"import numpy as np\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.model_selection import train_test_split\n\nn_samples = 50000\n\n# Generate 3 blobs with 2 classes where the second blob contains\n# half positive samples and half negative samples. Probability in this\n# blob is therefore 0.5.\ncenters = [(-5, -5), (0, 0), (5, 5)]\nX, y = make_blobs(n_samples=n_samples, centers=centers, shuffle=False, random_state=42)\n\ny[: n_samples // 2] = 0\ny[n_samples // 2 :] = 1\nsample_weight = np.random.RandomState(42).rand(y.shape[0])\n\n# split train, test for calibration\nX_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split(\n X, y, sample_weight, test_size=0.9, random_state=42\n)"
3737
]
3838
},
3939
{
Binary file not shown.
Binary file not shown.

dev/_downloads/133f2198d3ab792c75b39a63b0a99872/plot_cost_sensitive_learning.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@
148148
"cell_type": "markdown",
149149
"metadata": {},
150150
"source": [
151-
"In addition, the original research [1]_ defines a custom business metric. We\ncall a \"business metric\" any metric function that aims at quantifying how the\npredictions (correct or wrong) might impact the business value of deploying a\ngiven machine learning model in a specific application context. For our\ncredit prediction task, the authors provide a custom cost-matrix which\nencodes that classifying a a \"bad\" credit as \"good\" is 5 times more costly on\naverage than the opposite: it is less costly for the financing institution to\nnot grant a credit to a potential customer that will not default (and\ntherefore miss a good customer that would have otherwise both reimbursed the\ncredit and payed interests) than to grant a credit to a customer that will\ndefault.\n\nWe define a python function that weight the confusion matrix and return the\noverall cost.\n\n"
151+
"In addition, the original research [1]_ defines a custom business metric. We\ncall a \"business metric\" any metric function that aims at quantifying how the\npredictions (correct or wrong) might impact the business value of deploying a\ngiven machine learning model in a specific application context. For our\ncredit prediction task, the authors provide a custom cost-matrix which\nencodes that classifying a a \"bad\" credit as \"good\" is 5 times more costly on\naverage than the opposite: it is less costly for the financing institution to\nnot grant a credit to a potential customer that will not default (and\ntherefore miss a good customer that would have otherwise both reimbursed the\ncredit and paid interests) than to grant a credit to a customer that will\ndefault.\n\nWe define a python function that weight the confusion matrix and return the\noverall cost.\n\n"
152152
]
153153
},
154154
{
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

dev/_downloads/16260993c16a6d249d6df4cb11cf8174/plot_theilsen.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
},
3434
"outputs": [],
3535
"source": [
36-
"np.random.seed(0)\nn_samples = 200\n# Linear model y = 3*x + N(2, 0.1**2)\nx = np.random.randn(n_samples)\nw = 3.0\nc = 2.0\nnoise = 0.1 * np.random.randn(n_samples)\ny = w * x + c + noise\n# 10% outliers\ny[-20:] += -20 * x[-20:]\nX = x[:, np.newaxis]\n\nplt.scatter(x, y, color=\"indigo\", marker=\"x\", s=40)\nline_x = np.array([-3, 3])\nfor name, estimator in estimators:\n t0 = time.time()\n estimator.fit(X, y)\n elapsed_time = time.time() - t0\n y_pred = estimator.predict(line_x.reshape(2, 1))\n plt.plot(\n line_x,\n y_pred,\n color=colors[name],\n linewidth=lw,\n label=\"%s (fit time: %.2fs)\" % (name, elapsed_time),\n )\n\nplt.axis(\"tight\")\nplt.legend(loc=\"upper left\")\n_ = plt.title(\"Corrupt y\")"
36+
"np.random.seed(0)\nn_samples = 200\n# Linear model y = 3*x + N(2, 0.1**2)\nx = np.random.randn(n_samples)\nw = 3.0\nc = 2.0\nnoise = 0.1 * np.random.randn(n_samples)\ny = w * x + c + noise\n# 10% outliers\ny[-20:] += -20 * x[-20:]\nX = x[:, np.newaxis]\n\nplt.scatter(x, y, color=\"indigo\", marker=\"x\", s=40)\nline_x = np.array([-3, 3])\nfor name, estimator in estimators:\n t0 = time.time()\n estimator.fit(X, y)\n elapsed_time = time.time() - t0\n y_pred = estimator.predict(line_x.reshape(2, 1))\n plt.plot(\n line_x,\n y_pred,\n color=colors[name],\n linewidth=lw,\n label=\"%s (fit time: %.2fs)\" % (name, elapsed_time),\n )\n\nplt.axis(\"tight\")\nplt.legend(loc=\"upper right\")\n_ = plt.title(\"Corrupt y\")"
3737
]
3838
},
3939
{
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

dev/_downloads/55189006cedb95a2fc6bf8c216dab8f0/plot_robust_vs_empirical_covariance.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@
183183
plt.title("Influence of outliers on the covariance estimation")
184184
plt.xlabel("Amount of contamination (%)")
185185
plt.ylabel("RMSE")
186-
plt.legend(loc="upper center", prop=font_prop)
186+
plt.legend(loc="center", prop=font_prop)
187187

188+
plt.tight_layout()
188189
plt.show()
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

dev/_downloads/6953689dfdc5dd401dda89604bbdaefb/plot_time_series_lagged_features.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
},
3434
"outputs": [],
3535
"source": [
36-
"import numpy as np\nimport polars as pl\n\nfrom sklearn.datasets import fetch_file\n\npl.Config.set_fmt_str_lengths(20)\n\nbike_sharing_data_file = fetch_file(\n \"https://openml1.win.tue.nl/datasets/0004/44063/dataset_44063.pq\",\n sha256=\"d120af76829af0d256338dc6dd4be5df4fd1f35bf3a283cab66a51c1c6abd06a\",\n)\nbike_sharing_data_file"
36+
"import numpy as np\nimport polars as pl\n\nfrom sklearn.datasets import fetch_file\n\npl.Config.set_fmt_str_lengths(20)\n\nbike_sharing_data_file = fetch_file(\n # Original file was hosted at:\n # https://openml1.win.tue.nl/datasets/0004/44063/dataset_44063.pq\n # but is no longer reachable.\n # TODO: switch to https://data.openml.org/datasets/0004/44063/dataset_44063.pq\n # once possible.\n \"https://github.com/scikit-learn/examples-data/raw/refs/heads/master/bike-sharing-demand/dataset_44063.pq\",\n sha256=\"d120af76829af0d256338dc6dd4be5df4fd1f35bf3a283cab66a51c1c6abd06a\",\n)\nbike_sharing_data_file"
3737
]
3838
},
3939
{
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

dev/_downloads/764d061a261a2e06ad21ec9133361b2d/plot_precision_recall.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"cell_type": "markdown",
55
"metadata": {},
66
"source": [
7-
"\n# Precision-Recall\n\nExample of Precision-Recall metric to evaluate classifier output quality.\n\nPrecision-Recall is a useful measure of success of prediction when the\nclasses are very imbalanced. In information retrieval, precision is a\nmeasure of the fraction of relevant items among actually returned items while recall\nis a measure of the fraction of items that were returned among all items that should\nhave been returned. 'Relevancy' here refers to items that are\npostively labeled, i.e., true positives and false negatives.\n\nPrecision ($P$) is defined as the number of true positives ($T_p$)\nover the number of true positives plus the number of false positives\n($F_p$).\n\n\\begin{align}P = \\frac{T_p}{T_p+F_p}\\end{align}\n\nRecall ($R$) is defined as the number of true positives ($T_p$)\nover the number of true positives plus the number of false negatives\n($F_n$).\n\n\\begin{align}R = \\frac{T_p}{T_p + F_n}\\end{align}\n\nThe precision-recall curve shows the tradeoff between precision and\nrecall for different thresholds. A high area under the curve represents\nboth high recall and high precision. High precision is achieved by having\nfew false positives in the returned results, and high recall is achieved by\nhaving few false negatives in the relevant results.\nHigh scores for both show that the classifier is returning\naccurate results (high precision), as well as returning a majority of all relevant\nresults (high recall).\n\nA system with high recall but low precision returns most of the relevant items, but\nthe proportion of returned results that are incorrectly labeled is high. A\nsystem with high precision but low recall is just the opposite, returning very\nfew of the relevant items, but most of its predicted labels are correct when compared\nto the actual labels. An ideal system with high precision and high recall will\nreturn most of the relevant items, with most results labeled correctly.\n\nThe definition of precision ($\\frac{T_p}{T_p + F_p}$) shows that lowering\nthe threshold of a classifier may increase the denominator, by increasing the\nnumber of results returned. If the threshold was previously set too high, the\nnew results may all be true positives, which will increase precision. If the\nprevious threshold was about right or too low, further lowering the threshold\nwill introduce false positives, decreasing precision.\n\nRecall is defined as $\\frac{T_p}{T_p+F_n}$, where $T_p+F_n$ does\nnot depend on the classifier threshold. Changing the classifier threshold can only\nchange the numerator, $T_p$. Lowering the classifier\nthreshold may increase recall, by increasing the number of true positive\nresults. It is also possible that lowering the threshold may leave recall\nunchanged, while the precision fluctuates. Thus, precision does not necessarily\ndecrease with recall.\n\nThe relationship between recall and precision can be observed in the\nstairstep area of the plot - at the edges of these steps a small change\nin the threshold considerably reduces precision, with only a minor gain in\nrecall.\n\n**Average precision** (AP) summarizes such a plot as the weighted mean of\nprecisions achieved at each threshold, with the increase in recall from the\nprevious threshold used as the weight:\n\n$\\text{AP} = \\sum_n (R_n - R_{n-1}) P_n$\n\nwhere $P_n$ and $R_n$ are the precision and recall at the\nnth threshold. A pair $(R_k, P_k)$ is referred to as an\n*operating point*.\n\nAP and the trapezoidal area under the operating points\n(:func:`sklearn.metrics.auc`) are common ways to summarize a precision-recall\ncurve that lead to different results. Read more in the\n`User Guide <precision_recall_f_measure_metrics>`.\n\nPrecision-recall curves are typically used in binary classification to study\nthe output of a classifier. In order to extend the precision-recall curve and\naverage precision to multi-class or multi-label classification, it is necessary\nto binarize the output. One curve can be drawn per label, but one can also draw\na precision-recall curve by considering each element of the label indicator\nmatrix as a binary prediction (`micro-averaging <average>`).\n\n<div class=\"alert alert-info\"><h4>Note</h4><p>See also :func:`sklearn.metrics.average_precision_score`,\n :func:`sklearn.metrics.recall_score`,\n :func:`sklearn.metrics.precision_score`,\n :func:`sklearn.metrics.f1_score`</p></div>\n"
7+
"\n# Precision-Recall\n\nExample of Precision-Recall metric to evaluate classifier output quality.\n\nPrecision-Recall is a useful measure of success of prediction when the\nclasses are very imbalanced. In information retrieval, precision is a\nmeasure of the fraction of relevant items among actually returned items while recall\nis a measure of the fraction of items that were returned among all items that should\nhave been returned. 'Relevancy' here refers to items that are\npositively labeled, i.e., true positives and false negatives.\n\nPrecision ($P$) is defined as the number of true positives ($T_p$)\nover the number of true positives plus the number of false positives\n($F_p$).\n\n\\begin{align}P = \\frac{T_p}{T_p+F_p}\\end{align}\n\nRecall ($R$) is defined as the number of true positives ($T_p$)\nover the number of true positives plus the number of false negatives\n($F_n$).\n\n\\begin{align}R = \\frac{T_p}{T_p + F_n}\\end{align}\n\nThe precision-recall curve shows the tradeoff between precision and\nrecall for different thresholds. A high area under the curve represents\nboth high recall and high precision. High precision is achieved by having\nfew false positives in the returned results, and high recall is achieved by\nhaving few false negatives in the relevant results.\nHigh scores for both show that the classifier is returning\naccurate results (high precision), as well as returning a majority of all relevant\nresults (high recall).\n\nA system with high recall but low precision returns most of the relevant items, but\nthe proportion of returned results that are incorrectly labeled is high. A\nsystem with high precision but low recall is just the opposite, returning very\nfew of the relevant items, but most of its predicted labels are correct when compared\nto the actual labels. An ideal system with high precision and high recall will\nreturn most of the relevant items, with most results labeled correctly.\n\nThe definition of precision ($\\frac{T_p}{T_p + F_p}$) shows that lowering\nthe threshold of a classifier may increase the denominator, by increasing the\nnumber of results returned. If the threshold was previously set too high, the\nnew results may all be true positives, which will increase precision. If the\nprevious threshold was about right or too low, further lowering the threshold\nwill introduce false positives, decreasing precision.\n\nRecall is defined as $\\frac{T_p}{T_p+F_n}$, where $T_p+F_n$ does\nnot depend on the classifier threshold. Changing the classifier threshold can only\nchange the numerator, $T_p$. Lowering the classifier\nthreshold may increase recall, by increasing the number of true positive\nresults. It is also possible that lowering the threshold may leave recall\nunchanged, while the precision fluctuates. Thus, precision does not necessarily\ndecrease with recall.\n\nThe relationship between recall and precision can be observed in the\nstairstep area of the plot - at the edges of these steps a small change\nin the threshold considerably reduces precision, with only a minor gain in\nrecall.\n\n**Average precision** (AP) summarizes such a plot as the weighted mean of\nprecisions achieved at each threshold, with the increase in recall from the\nprevious threshold used as the weight:\n\n$\\text{AP} = \\sum_n (R_n - R_{n-1}) P_n$\n\nwhere $P_n$ and $R_n$ are the precision and recall at the\nnth threshold. A pair $(R_k, P_k)$ is referred to as an\n*operating point*.\n\nAP and the trapezoidal area under the operating points\n(:func:`sklearn.metrics.auc`) are common ways to summarize a precision-recall\ncurve that lead to different results. Read more in the\n`User Guide <precision_recall_f_measure_metrics>`.\n\nPrecision-recall curves are typically used in binary classification to study\nthe output of a classifier. In order to extend the precision-recall curve and\naverage precision to multi-class or multi-label classification, it is necessary\nto binarize the output. One curve can be drawn per label, but one can also draw\na precision-recall curve by considering each element of the label indicator\nmatrix as a binary prediction (`micro-averaging <average>`).\n\n<div class=\"alert alert-info\"><h4>Note</h4><p>See also :func:`sklearn.metrics.average_precision_score`,\n :func:`sklearn.metrics.recall_score`,\n :func:`sklearn.metrics.precision_score`,\n :func:`sklearn.metrics.f1_score`</p></div>\n"
88
]
99
},
1010
{
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

dev/_downloads/862c762bdafef83db4c2c9e803ed6295/plot_release_highlights_1_6_0.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@
6868
"cell_type": "markdown",
6969
"metadata": {},
7070
"source": [
71-
"## Download any dataset from the web\n\nThe function :func:`datasets.fetch_file` allows downloading a file from any given URL.\nThis convenience function provides built-in local disk caching, sha256 digest\nintegrity check and an automated retry mechanism on network error.\n\nThe goal is to provide the same convenience and reliability as dataset fetchers while\ngiving the flexibility to work with data from arbitrary online sources and file\nformats.\n\nThe dowloaded file can then be loaded with generic or domain specific functions such\nas `pandas.read_csv`, `pandas.read_parquet`, etc.\n\n"
71+
"## Download any dataset from the web\n\nThe function :func:`datasets.fetch_file` allows downloading a file from any given URL.\nThis convenience function provides built-in local disk caching, sha256 digest\nintegrity check and an automated retry mechanism on network error.\n\nThe goal is to provide the same convenience and reliability as dataset fetchers while\ngiving the flexibility to work with data from arbitrary online sources and file\nformats.\n\nThe downloaded file can then be loaded with generic or domain specific functions such\nas `pandas.read_csv`, `pandas.read_parquet`, etc.\n\n"
7272
]
7373
},
7474
{
Binary file not shown.

dev/_downloads/883c6b4b0cc369a10ddb336c09a9e4dd/plot_likelihood_ratios.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ class proportion than the target application.
6161

6262
estimator = LogisticRegression().fit(X_train, y_train)
6363
y_pred = estimator.predict(X_test)
64-
pos_LR, neg_LR = class_likelihood_ratios(y_test, y_pred)
64+
pos_LR, neg_LR = class_likelihood_ratios(y_test, y_pred, replace_undefined_by=1.0)
6565
print(f"LR+: {pos_LR:.3f}")
6666

6767
# %%
@@ -81,7 +81,7 @@ class proportion than the target application.
8181

8282
def scoring(estimator, X, y):
8383
y_pred = estimator.predict(X)
84-
pos_lr, neg_lr = class_likelihood_ratios(y, y_pred, raise_warning=False)
84+
pos_lr, neg_lr = class_likelihood_ratios(y, y_pred, replace_undefined_by=1.0)
8585
return {"positive_likelihood_ratio": pos_lr, "negative_likelihood_ratio": neg_lr}
8686

8787

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

dev/_downloads/98161c8b335acb98de356229c1005819/plot_precision_recall.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
measure of the fraction of relevant items among actually returned items while recall
1111
is a measure of the fraction of items that were returned among all items that should
1212
have been returned. 'Relevancy' here refers to items that are
13-
postively labeled, i.e., true positives and false negatives.
13+
positively labeled, i.e., true positives and false negatives.
1414
1515
Precision (:math:`P`) is defined as the number of true positives (:math:`T_p`)
1616
over the number of true positives plus the number of false positives

dev/_downloads/983937feb1a5f82dab78ed11a2af1217/plot_time_series_lagged_features.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,12 @@
4040
pl.Config.set_fmt_str_lengths(20)
4141

4242
bike_sharing_data_file = fetch_file(
43-
"https://openml1.win.tue.nl/datasets/0004/44063/dataset_44063.pq",
43+
# Original file was hosted at:
44+
# https://openml1.win.tue.nl/datasets/0004/44063/dataset_44063.pq
45+
# but is no longer reachable.
46+
# TODO: switch to https://data.openml.org/datasets/0004/44063/dataset_44063.pq
47+
# once possible.
48+
"https://github.com/scikit-learn/examples-data/raw/refs/heads/master/bike-sharing-demand/dataset_44063.pq",
4449
sha256="d120af76829af0d256338dc6dd4be5df4fd1f35bf3a283cab66a51c1c6abd06a",
4550
)
4651
bike_sharing_data_file
Binary file not shown.
Binary file not shown.
Binary file not shown.

dev/_downloads/9ca7cbe47e4cace7242fe4c5c43dfa52/plot_cost_sensitive_learning.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def fpr_score(y, y_pred, neg_label, pos_label):
141141
# average than the opposite: it is less costly for the financing institution to
142142
# not grant a credit to a potential customer that will not default (and
143143
# therefore miss a good customer that would have otherwise both reimbursed the
144-
# credit and payed interests) than to grant a credit to a customer that will
144+
# credit and paid interests) than to grant a credit to a customer that will
145145
# default.
146146
#
147147
# We define a python function that weight the confusion matrix and return the
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)