2-class pattern classification problem

给定包含了5维信息的训练数据,训练出一个贝叶斯分类器和Naive Bayes分类器对测试数据进行二分类

Bayes Decision Rule

import numpy as np
from scipy.io import loadmat, savemat

data_train = loadmat('data_train.mat')['data_train']
label_train = loadmat('label_train.mat')['label_train'].ravel()  # Flatten to 1D array
data_test = loadmat('data_test.mat')['data_test']

prior_prob_0 = np.mean(label_train == -1)
prior_prob_1 = np.mean(label_train == 1)

mean_0 = np.mean(data_train[label_train == -1], axis=0)
var_0 = np.var(data_train[label_train == -1], axis=0)
mean_1 = np.mean(data_train[label_train == 1], axis=0)
var_1 = np.var(data_train[label_train == 1], axis=0)
cov_0 = np.cov(data_train[label_train == -1], rowvar=False)
cov_1 = np.cov(data_train[label_train == 1], rowvar=False)

# Define Gaussion function and use mean and variance to calculate the probability density 
def gaussian_pdf(x, mean, var):
    return (1.0 / np.sqrt(2.0 * np.pi * var)) * np.exp(- (x - mean) ** 2 / (2 * var))

# Predict
y_pred = []
for sample in data_test:
    class_0_prob = np.log(prior_prob_0)
    class_1_prob = np.log(prior_prob_1)

    for i in range(sample.shape[0]):
        class_0_prob += np.log(gaussian_pdf(sample[i], mean_0[i], var_0[i]))
        class_1_prob += np.log(gaussian_pdf(sample[i], mean_1[i], var_1[i]))

    if class_0_prob > class_1_prob:
        y_pred.append(-1)
    else:
        y_pred.append(1)

y_pred = np.array(y_pred)

# Save results
label_test = {'BDR_label_test': y_pred.reshape(-1, 1)}  # Reshape to (n_samples, 1)
savemat('BDR_label_test.mat', label_test)  # Save to BB_label_test.mat

with open('BDR_parameters.txt', 'w', encoding='utf-8') as f:
    f.write("Model parameters:\n")
    f.write("Mean class -1:\n")
    np.savetxt(f, mean_0.reshape(1, -1), fmt='%s')
    f.write("Mean class 1:\n")
    np.savetxt(f, mean_1.reshape(1, -1), fmt='%s')
    f.write("\nVariance class -1:\n")
    np.savetxt(f, var_0.reshape(1, -1), fmt='%s')
    f.write("\nVariance class 1:\n")
    np.savetxt(f, var_1.reshape(1, -1), fmt='%s')
    f.write("\nCovariance class -1:\n")
    np.savetxt(f, cov_0, fmt='%0.4f')
    f.write("\nCovariance class 1:\n")
    np.savetxt(f, cov_1, fmt='%0.4f')

通过计算训练数据中class为1和-1的均值和方差,通过高斯分布得到每个类别的条件概率和先验概率,然后将待预测数据代入到每个类别的概率密度函数中,计算最大后验概率来确定待预测数据应该归属于哪一个类别。

Naive Bayes

from sklearn.naive_bayes import GaussianNB
import numpy as np
from scipy.io import loadmat, savemat

data_train = loadmat('data_train.mat')['data_train']
label_train = loadmat('label_train.mat')['label_train'].ravel()  # Flatten to 1D array
data_test = loadmat('data_test.mat')['data_test']

# Gaussian Function of Naive Bayes
gnb = GaussianNB()

# Train
gnb.fit(data_train, label_train)

# Predict
y_pred = gnb.predict(data_test)

print("Model parameters:")
print("Mean:", gnb.theta_)    
print("Variance:", gnb.var_)  

covariances = {}
for class_label in np.unique(label_train):
    class_data = data_train[label_train == class_label]
    covariances[f'covariance_class_{class_label}'] = np.cov(class_data, rowvar=False)

with open('NB_parameters.txt', 'w', encoding='utf-8') as f:
    f.write("Model parameters:\n")
    f.write("Mean:\n")
    np.savetxt(f, gnb.theta_, fmt='%s')
    f.write("\nVariance:\n")
    np.savetxt(f, gnb.var_, fmt='%s')
    for key, cov_mat in covariances.items():
        f.write(f"{key}:\n")
        np.savetxt(f, cov_mat, fmt='%0.4f', newline='\n')  

n_samples, n_features = data_test.shape
y_pred_reshaped = np.tile(y_pred[:, np.newaxis], 1)  

label_test = {'NB_label_test': y_pred_reshaped}  
savemat('NB_label_test.mat', label_test)  

print("Predicted labels saved to label_test.mat.")

大致方法与上面的Bayes Decision Rule相同,不同的是在这里我直接调用了Naive Bayes的函数。最后输出的预测结果与Bayes Decision Rule相同,导致这一结果的主要原因是教授给的测试数据的数据与数据之间的协方差很小,而Navie Bayes和Bayes Decision Rule之间最大的区别就是Naive Bayes忽略了数据与数据之间的影响,其假设所有的特征相互独立,互相不受影响的。因此当测试数据之间的协方差很小时,这两种方法进行分类的结果将会基本一样。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值