MSC
多元散射在光谱数据中产生的原因是光线在样本中传播时,与多个其他组分或颗粒发生散射。这种散射会导致光线的路径变长,使得光在样品中的传播过程中发生偏离和扩散,从而影响到光谱信号的强度和形状
输入的文件实例列是样本 行是波段
#!/usr/bin/env python3.7
# encoding: utf-8
"""
@author: ISR
@contact: 84692429@qq.com
@file:
@time:
@desc:
对采集的光谱数据进行多元散射校正(MSC)
输入格式为csv,数据格式首行重复测量名称,首列为波段名称
"""
import numpy as np
from pandas.core.frame import DataFrame
from sklearn.linear_model import LinearRegression
import pandas as pd
def msc(data):
""""
data 横轴方向表示重复测量的数据,纵轴方向表示波段数量
"""
# 计算平均光谱,实际就是x值
s_mean = np.mean(data, axis=1)
# 行列数
r, c = data.shape
# 创建一个单位矩阵
msc_x = np.ones((r, c))
# 遍历各列,实际是各重复测量
for i in range(c):
# y值
y = data[:, i]
# 计算光谱回归系数Ki,Bi
lin = LinearRegression()
lin.fit(s_mean.reshape(-1, 1), y.reshape(-1, 1))
k = lin.coef_
b = lin.intercept_
msc_x[:, i] = (y - b) / k
return msc_x
if __name__ == '__main__':
txt_path = r"C:\Users\79420\Desktop\test\30比1\30比1.csv"
df = pd.read_csv(txt_path, header=0, index_col=0, engine="python", encoding="utf-8")
# 行列名称
columns_n = df.columns.values.tolist()
row_n = df._stat_axis.values.tolist()
print("{0}行,{1}列".format(len(row_n),len(columns_n)))
df_arr = df.values
msc_arr = msc(df_arr)
txt_path2 = r"C:\Users\79420\Desktop\test\30比1\30比1—b.csv"
dabai1_arr_data_you = DataFrame(msc_arr, index=row_n, columns=columns_n)
dabai1_arr_data_you.to_csv(txt_path2)
如果行是样本 列是波段
def MSC(data):
"""
:param data: raw spectrum data, shape (n_samples, n_features)
:return: data after MSC: shape (n_samples, n_features)
"""
n, p = data.shape
msc = np.ones((n, p))
mean = np.mean(data, axis=0) # Calculate the mean spectrum
# Linear fitting
for i in range(n):
y = data[i, :]
l = LinearRegression()
l.fit(mean.reshape(-1, 1), y.reshape(-1, 1))
k = l.coef_
b = l.intercept_
msc[i, :] = (y - b) / k
return msc