import numpy as np from sklearn.pipeline import Pipeline from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression from sklearn.model_selection import learning_curve from sklearn.model_selection import ShuffleSplit from matplotlib import pyplot as plt n_dots = 200 x = np.linspace(0, 1, n_dots) y = np.sqrt(x) + 0.2 * np.random.rand(n_dots) - 0.1 x = x.reshape(-1, 1) y = y.reshape(-1, 1) # 多项式模型,degree表示多项式的阶数 def polynomial_mode(degree=1): polynomial_features = PolynomialFeatures(degree=degree, include_bias=False) linear_regression = LinearRegression() pipeline = Pipeline([("polynomial_features", polynomial_features),("linear_regression", linear_regression)]) return pipeline # 画学习曲线,train_sizes指定训练样本数量的变化规则 def plot_learning_curve(estimator, title, x, y, ylim=None, cv=None, n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5)): # 图像的名称 plt.title(title) if ylim is not None: plt.ylim(ylim) plt.xlabel("Training examples") plt.ylabel("Score") train_sizes, train_scores, test_scores = learning_curve(estimator, x, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) plt.grid() # fill_between 函数会把模型准确性的平均值的上下方差的空间里用颜色填充 plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color='red') plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color='g') plt.plot(train_sizes, train_scores_mean, 'o-', color='r', label="Training score") plt.plot(train_sizes, test_scores_mean, 'o-', color='g', label="Cross-validation score") plt.legend(loc="best") return plt cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0) titles = ['Learning Curves (Under Fitting)', 'Learning Curves', 'Learning Curves (Over Fitting'] degrees = [1, 3, 10] plt.figure(figsize=(18, 4), dpi=200) for i in range(len(degrees)): plt.subplot(1, 3, i + 1) plot_learning_curve(polynomial_mode(degrees[i]), titles[i], x, y, ylim=(0.75, 1.01), cv=cv) plt.show()
sklearn----learning curve
最新推荐文章于 2024-03-15 20:19:38 发布