import numpy as np
from collections import Counter
class Naive_Bayes(object):
dataset = [{'x':[1,'S'],'y':-1},{'x':[1,'M'],'y':-1},
{'x':[1,'M'],'y':1},{'x':[1,'S'],'y':1},
{'x':[1,'S'],'y':-1},{'x':[2,'S'],'y':-1},
{'x':[2,'M'],'y':-1},{'x':[2,'M'],'y':1},
{'x':[2,'L'],'y':1},{'x':[2,'L'],'y':1},
{'x':[3,'L'],'y':1},{'x':[3,'M'],'y':1},
{'x':[3,'M'],'y':1},{'x':[3,'L'],'y':1},
{'x':[3,'L'],'y':-1}]
dim = 0 ## 维度
lamda = 0 ## 正则化项,lamda = 0,相当于极大似然估计,lamda大于0,相当于贝叶斯估计,先验概率有效。
prior_posibility = []
condition_posibility = []
def __init__(self):
total_len = len(self.dataset)
self.dim = len(self.dataset[0]['x'])
counter_y = Counter(i['y'] for i in self.dataset)
for i in counter_y.keys():
self.prior_posibility.append(dict(mkey = i,value = (counter_y[i] + self.lamda)/(total_len + len(counter_y) * self.lamda)))
tmp = []
for i in range(self.dim): ## 维度
tmp1 = []
counter_total_x = Counter(w['x'][i] for w in self.dataset)
for s in counter_y.keys():
tmp2 = []
counter_x = Counter(w['x'][i] for w in self.dataset if w['y'] == s)
for p in counter_total_x.keys():
tmp2.append(dict(mkey=p, value=(counter_x[p] + self.lamda) / (counter_y[s] + len(counter_total_x) * self.lamda)))
tmp1.append(dict(mkey = s,value = tmp2))
tmp.append(tmp1)
self.condition_posibility = tmp
def predict(self,test_data):
posibility = []
for result in range(len(self.prior_posibility)):
pos = self.prior_posibility[result]['value']
for i in range(k.dim):
for w in self.condition_posibility[i][result]['value']:
if(w['mkey'] == test_data[i]):
pos *= w['value']
break
posibility.append(pos)
return self.prior_posibility[np.argmax(posibility)]['mkey']
k = Naive_Bayes()
posibility = []
test = [2,'S']
print(test,"的预测结果为: ",k.predict(test))
结果: