import numpy as np
from sklearn import datasets
boston = datasets.load_boston()
X = boston.data
y = boston.target
X = X[y<50.0]
y = y[y<50.0]
X.shape
(490, 13)
y.shape
(490,)
使用自己实现的LinearRegression进行多元线性回归房价预测
from playML.model_selection import train_test_split
# 1.数据预处理
X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
X_train.shape
(392, 13)
# 2.模型训练
from playML.LinearRegression import LinearRegression
reg = LinearRegression()
reg.fit_normal(X_train, y_train)
LinearRegression()
reg.coefficient_
array([-1.18919477e-01, 3.63991462e-02, -3.56494193e-02, 5.66737830e-02,
-1.16195486e+01, 3.42022185e+00, -2.31470282e-02, -1.19509560e+00,
2.59339091e-01, -1.40112724e-02, -8.36521175e-01, 7.92283639e-03,
-3.81966137e-01])
reg.coefficient_.T
array([-1.18919477e-01, 3.63991462e-02, -3.56494193e-02, 5.66737830e-02,
-1.16195486e+01, 3.42022185e+00, -2.31470282e-02, -1.19509560e+00,
2.59339091e-01, -1.40112724e-02, -8.36521175e-01, 7.92283639e-03,
-3.81966137e-01])
reg.interception_
34.16143549624022
# 4.模型预测
reg.predict(X_test)
array([18.08047724, 25.52374702, 12.93068154, 32.89616169, 24.17956679,
2.67010028, 26.64700396, 32.23851244, 13.96168643, 24.04280799,
14.93247906, 10.58513734, 30.28710828, 16.2782111 , 23.67817017,
25.64047759, 18.67821777, 24.02076592, 28.77437534, 26.93946254,
12.81354434, 27.22770353, 26.0804716 , 23.41900039, 20.79727917,
31.96786535, 14.90862058, 20.954883 , 12.92314457, 29.80207127,
35.28684545, 5.03624207, 13.10143242, 35.54317123, 15.98890703,
21.53597166, 12.47621364, 29.12864349, 27.36022467, 24.05031901,
14.35220626, 23.61433371, 10.90347719, 22.38154099, 18.62937294,
16.37126778, 24.43078261, 33.06293684, 19.19809767, 27.04404675,
18.05674457, 14.85136715, 25.08935314, 16.0884098 , 21.74619772,
16.3194766 , 24.25591698, 11.72935395, 27.92260116, 31.05867941,
20.17444189, 24.9964365 , 25.99734127, 12.14007801, 16.58246637,
27.30690012, 22.26787948, 21.72492458, 31.50402544, 14.03991351,
16.42672344, 24.77534313, 25.18133042, 18.65228238, 17.34412768,
27.90749795, 23.71553798, 14.62906156, 11.22231617, 31.4243847 ,
33.66552044, 17.66375929, 18.69989012, 17.79423031, 25.15668084,
23.66633124, 24.55578753, 26.09123112, 25.49718056, 20.28898727,
24.87506605, 33.48356492, 36.08610386, 23.07558528, 18.79472835,
31.04138456, 35.78577626, 20.84603282])
reg.score(X_test, y_test)
0.8129802602658537
Scikit-learn中的回归问题