from sklearn.datasets import load_iris
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
import numpy as np
#数据导入
np.random.seed(22)
data1, data2 = load_iris(), load_boston()
xtrain1, xtest1, ytrain1, ytest1 = train_test_split(data1.data, data1.target, train_size=0.7)
xtrain2, xtest2, ytrain2, ytest2 = train_test_split(data2.data, data2.target, train_size=0.7)
#定义为欧式距离
def vall(xtest, xtrue, ytrue):
xtest, xtrue, ytrue = np.array(xtest), np.array(xtrue), np.array(ytrue)
sque_sum = 0
for i in range(xtrue.shape[1]):
sque_sum += ((xtrue[:, i] - xtest[i]) ** 2)
sqrt = [[np.sqrt(sque_sum[j])] for j in range(len(xtrue))]
d = np.concatenate([sqrt, ytrue.reshape([-1, 1])], axis=1)
d = list(d)
d.sort(key=lambda x: x[0])
return d
#K近邻分类或回归
def Knn_C_R(xtest, xdata, ydata, classes=True, k=10):
d = vall(xtest=xtest, xtrue=xdata, ytrue=ydata)
if classes == True:
count = {}
for d_part in d[:k]:
if d_part[1] not in count.keys():
count[d_part[1]] = 1
else:
count[d_part[1]] += 1
print('当前预测类别:', max(count, key=count.get))
else:
mean1 = np.mean([i[1] for i in d[:k]])
print('当前预测值:', mean1)
#调用主函数
if __name__ == '__main__':
Knn_C_R(xtest=[15, 26, 98, 32], xdata=xtrain1, ydata=ytrain1, classes=True, k=10)
Knn_C_R(xtest=[5, 8, 6, 7, 2, 3, 5, 6, 3, 5, 5, 9, 6], xdata=xtrain2, ydata=ytrain2, classes=False, k=10)