%% TCN-LSTM-Attention多变量时间序列预测,运行环境Matlab2023及以上
%% 清空环境变量
warning off % 关闭报警信息
close all % 关闭开启的图窗
clear % 清空变量
clc % 清空命令行
%% 导入数据
result = xlsread('data.xlsx');
%% 数据分析
num_samples = length(result); % 样本个数
or_dim = size(result, 2); % 原始特征+输出数目
kim = 4; % 延时步长(kim个历史数据作为自变量)
zim = 1; % 跨zim个时间点进行预测
%% 划分数据集
for i = 1: num_samples - kim - zim + 1
res(i, :) = [reshape(result(i: i + kim - 1, :), 1, kim * or_dim), result(i + kim + zim - 1, :)];
end
%% 数据集分析
outdim = 1; % 最后一列为输出
num_size = 0.7; % 训练集占数据集比例
num_train_s = round(num_size * num_samples); % 训练集样本个数
f_ = size(res, 2) - outdim; % 输入特征维度
%% 划分训练集和测试集
P_train = res(1: num_train_s, 1: f_)';
T_train = res(1: num_train_s, f_ + 1: end)';
M = size(P_train, 2);
P_test = res(num_train_s + 1: end, 1: f_)';
T_test = res(num_train_s + 1: end, f_ + 1: end)';
N = size(P_test, 2);
%% 数据归一化
[p_train, ps_input] = mapminmax(P_train, 0, 1);
p_test = mapminmax('apply', P_test, ps_input);
[t_train, ps_output] = mapminmax(T_train, 0, 1);
t_test = mapminmax('apply', T_test, ps_output);
%% 数据格式转换
pc_train{1, 1} = p_train;
pc_test {1, 1} = p_test ;
tc_train{1, 1} = t_train;
tc_test {1, 1} = t_test ;
%% 设置网络参数
numFilters = 16; % 卷积核个数
filterSize = 5; % 卷积核大小
dropoutFactor = 0.2; % 空间丢失因子
numBlocks = 1; % 残差块个数
numFeatures = f_; % 特征个数
%% 输入层结构
layer = sequenceInputLayer(numFeatures, Normalization = "rescale-symmetric", Name = "input");
%% 将输入层加入空白网络
lgraph = layerGraph(layer);
outputName = layer.Name;
%% 建立网络结构 -- 残差块
for i = 1 : numBlocks
dilationFactor = 2 ^(i - 1); % 膨胀因子
% 建立网络结构
layers = [
convolution1dLayer(filterSize, numFilters, DilationFactor = dilationFactor, ...
Padding = "causal", Name="conv1_" + i) % 一维卷积层
layerNormalizationLayer % 层归一化
spatialDropoutLayer(dropoutFactor) % 空间丢弃层
convolution1dLayer(filterSize, numFilters, ...
DilationFactor = dilationFactor, Padding = "causal") % 一维卷积层
layerNormalizationLayer % 层归一化
reluLayer % 激活层
spatialDropoutLayer(dropoutFactor) % 空间丢弃层
additionLayer(2, Name = "add_" + i)]; % 合并层
lgraph = addLayers(lgraph, layers); % 将卷积层加入到网络
lgraph = connectLayers(lgraph, outputName, "conv1_" + i); % 将模块的卷积层首层和残差结构连接
% 残差连接 -- 首层
if i == 1
layer = convolution1dLayer(1, numFilters, Name = "convSkip"); % 建立残差卷积层
lgraph = addLayers(lgraph, layer); % 将残差卷积层加入到网络
lgraph = connectLayers(lgraph, outputName, "convSkip"); % 将残差卷积层
lgraph = connectLayers(lgraph, "convSkip", "add_" + i + "/in2"); % 将跳跃残差层连接到 addtion 层 输入口2
else
lgraph = connectLayers(lgraph, outputName, "add_" + i + "/in2"); % 将残差层连接到 addtion 层 输入口2
end
% 更新输出层名字
outputName = "add_" + i;
end
%% 网络输出层
% 添加 LSTM 层
% lstmLayer = lstmLayer(hiddenSize, 'Name', 'lstm');
lstmLayer = lstmLayer(100, 'Name', 'lstm');
lgraph = addLayers(lgraph, lstmLayer);
lgraph = connectLayers(lgraph, outputName, 'lstm');
% 添加全连接层
layers = [
selfAttentionLayer(numFeatures,numFeatures,"Name","multihead-attention") % 多头注意力机制+多通道
fullyConnectedLayer(1, 'Name', 'fc')
regressionLayer];
lgraph = addLayers(lgraph, layers);
lgraph = connectLayers(lgraph, 'lstm', 'multihead-attention');
%设置权重衰减参数实现 L2 正则化
weightDecay = 0.01; % 正则化项权重
% 更新每个卷积层、LSTM 层和全连接层的权重衰减参数
numLayers = numel(lgraph.Layers); %获取网络中层的数量
for i = 1:numLayers
layer = lgraph.Layers(i);
if isa(layer, 'CausalConvolution1DLayer') || isa(layer, 'nnet.cnn.layer.FullyConnectedLayer') %检查当前层是否为因果卷积层、LSTM 层或全连接层。如果是,则执行以下操作
layer.WeightLearnRateFactor = 1;
layer.WeightL2Factor = weightDecay;
end
end
%% 设置训练参数
options = trainingOptions('adam', ... % Adam 梯度下降算法
'MaxEpochs', 1000, ... % 最大迭代次数
'MiniBatchSize',16,... % 批处理
'InitialLearnRate', 1e-3, ... % 初始学习率为0.001
'LearnRateSchedule', 'piecewise', ... % 学习率下降
'LearnRateDropFactor', 0.1, ... % 学习率下降因子 0.1
'LearnRateDropPeriod', 500, ... % 经过训练后 学习率为 0.001 * 0.1
'Shuffle', 'every-epoch', ... % 每次训练打乱数据集
'Plots', 'training-progress', ... % 画出曲线
'Verbose', 1); % 打印
%% 训练网络
[net,traininfo]= trainNetwork(pc_train, tc_train, lgraph, options);
%% 查看网络结构
analyzeNetwork(net)
%% 仿真预测
t_sim1 = predict(net, pc_train);
t_sim2 = predict(net, pc_test );
%% 格式转换
T_sim1 = double(t_sim1{1, 1});
T_sim2 = double(t_sim2{1, 1});
%% 数据反归一化
T_sim1 = mapminmax('reverse', T_sim1, ps_output);
T_sim2 = mapminmax('reverse', T_sim2, ps_output);
%% 均方根误差
error1 = sqrt(sum((T_sim1 - T_train).^2) ./ M);
error2 = sqrt(sum((T_sim2 - T_test ).^2) ./ N);
%% 相关指标计算
% R2
R1 = 1 - norm(T_train - T_sim1)^2 / norm(T_train - mean(T_train))^2;
R2 = 1 - norm(T_test - T_sim2)^2 / norm(T_test - mean(T_test ))^2;
disp(['训练集数据的R2为:', num2str(R1)])
disp(['测试集数据的R2为:', num2str(R2)])
% MAE
mae1 = sum(abs(T_sim1 - T_train)) ./ M ;
mae2 = sum(abs(T_sim2 - T_test )) ./ N ;
disp(['训练集数据的MAE为:', num2str(mae1)])
disp(['测试集数据的MAE为:', num2str(mae2)])
%% 平均绝对百分比误差MAPE
MAPE1 = mean(abs((T_train - T_sim1)./T_train));
MAPE2 = mean(abs((T_test - T_sim2)./T_test));
disp(['训练集数据的MAPE为:', num2str(MAPE1)])
disp(['测试集数据的MAPE为:', num2str(MAPE2)])
% MBE
mbe1 = sum(T_sim1 - T_train) ./ M ;
mbe2 = sum(T_sim2 - T_test ) ./ N ;
disp(['训练集数据的MBE为:', num2str(mbe1)])
disp(['测试集数据的MBE为:', num2str(mbe2)])
%均方误差 MSE
mse1 = sum((T_sim1 - T_train).^2)./M;
mse2 = sum((T_sim2 - T_test).^2)./N;
disp(['训练集数据的MSE为:', num2str(mse1)])
disp(['测试集数据的MSE为:', num2str(mse2)])
% RMSE
RMSE1 = sqrt(sumsqr(T_sim1 -