本文总结了感知机的基本原理,并给出了python的实现代码。
参考:
- 《机器学习》 周志华
- https://github.com/datawhalechina/pumpkin-book 这个项目补全了周志华《机器学习》中省略的公式推导过程
1、感知机原理
感知机(Perceptron)由两层神经元组成,输入层接收输入,输出层是M-P神经元(又称阈值逻辑单元threshold logic unit),计算公式为:
上式中的激活函数可以选择阶跃函数或者sigmoid函数,
是输入的第
个分量和输出神经元之间的连接权值。将前面公式中的
当作哑节点,公式可以写成:
损失函数为:
其中M是分类错误的样本集,从上式可以看出,当样本分类正确的时候,损失为0。损失函数的梯度为:
因此梯度下降更新权重:
2、python实现
代码如下
# -*- coding:utf-8 -*-
# author: dtf320
"""单层感知机"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
class SinglePerceptron:
def __init__(self,eta=0.1,epoch=100):
self.eta = eta
self.epoch = epoch
def add_dummy_node(self,X):
"""
增加哑变量
:param X: [n_samples,n_features]
:return:
"""
return np.column_stack((X,-1*np.ones(shape=(X.shape[0],1))))
def sgn(self,mat):
"""
处理矩阵中的每一个数
:param mat: 矩阵
:return:
"""
d = np.array(mat)
fun = lambda x:0 if x<=0 else 1
r = map(fun,np.array(d).flat)
return np.array(list(r)).reshape(d.shape)
def fit(self,X,Y):
"""
:param X: [n_samples,n_features]
:param Y: [n_samples,dim_output]
:return:
"""
self.X = np.array(X)
self.X = self.add_dummy_node(self.X) # 增加哑变量dummy node
self.Y = np.array(Y)
self.n_samples = self.X.shape[0]
self.n_features = self.X.shape[1]
self.dim_output = self.Y.shape[1]
self.W1 = np.random.randn(self.dim_output,self.n_features)
# Yest = self.sgn(np.dot(self.W1,self.X.T)) # Y_est [dim_output,n_samples]
for j in range(self.epoch):
for i in range(self.n_samples):
X_ = self.X[i,:].reshape(1,self.n_features)
Y_ = self.Y[i,:].reshape(self.dim_output,1)
Yest = self.sgn(np.dot(self.W1,X_.T))
E_y = Y_.T - Yest # E_y [dim_output,n_samples]
delta_w = self.eta * np.dot(E_y, X_)
self.W1 += delta_w
print("epoch", j, "iter",i,"weight",self.W1)
def prediction(self,X_in):
X_in = np.array(X_in)
X_in = self.add_dummy_node(X_in)
res = self.sgn(np.dot(self.W1,X_in.T))
return res.reshape(-1,1)
def drawing_edge(self,x_range):
y = []
x = np.arange(x_range[0],x_range[1],0.1)
for cx in x:
y_ = (self.W1[0][2]-self.W1[0][0]*cx) / self.W1[0][1]
y.append(y_)
y = np.array(y)
plt.plot(x, y)
negative_idx = np.where(Y == 0)[0]
positive_idx = np.where(Y == 1)[0]
for i in negative_idx:
p1 = plt.scatter(X[i, 0], X[i, 1], c="r")
for i in positive_idx:
p2 = plt.scatter(X[i, 0], X[i, 1], c="g")
plt.legend([p1,p2],["Setosa","Versicolour"])
plt.show()
if __name__ == "__main__":
from sklearn import datasets
X,Y = datasets.load_iris(return_X_y=True) # 安德森鸢尾花数据集
Y = Y.reshape(-1,1)
# 数据集中总共有150个样本,分别为50个山鸢尾(Setosa)样本,
# 50个变色鸢尾(Versicolour)样本,50个维吉尼亚鸢尾(Virginica)样本
# 这里选取前两个品种共100个样本
X = X[0:100,:]
Y = Y[0:100,:]
# 数据集中每个样本有4个字段, [花萼长度, 花萼宽度, 花瓣长度, 花瓣宽度]
# 这里选择花萼宽度和花瓣长度两个字段
X = X[:, [1, 2]]
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.3,stratify=Y) # 分层采样
model = SinglePerceptron(eta=0.1,epoch=10)
model.fit(X_train, Y_train)
y_pred = model.prediction(X_test)
acc = accuracy_score(Y_test,y_pred)
print("*"*30)
print("Number of train samples:",len(X_train))
print("Number of test samples:",len(X_test))
print("Accuracy:",acc)
model.drawing_edge([1, 5])
结果如下所示:
Number of train samples: 70
Number of test samples: 30
Accuracy: 1.0
图中的蓝色线条是感知机计算出的两类分界线。