之前写过python的卷积,这次试一下c++的。没有做测试,用来面试的话应该足够了。input的layout是NHWC,weight是IC H W OC,跟pytorch不一样。如果有什么错误可以交流。
#include<bits/stdc++.h>
using namespace std;
class myConv2d {
private:
vector<vector<vector<vector<float_t>>>> input; // NHWC
vector<vector<vector<vector<float_t>>>> weight; // IC HW OC
vector<float_t> bias;
int padding = 1;
int stride = 1;
public:
vector<vector<vector<vector<float_t>>>> forward() {
vector<size_t> shape = { input.size(), input[0].size(), input[0][0].size(), input[0][0][0].size() };
vector<vector<vector<vector<float_t>>>> padded(shape[0], vector < vector<vector<float_t>>>(shape[1] + 2 * padding, vector<vector<float_t>>(shape[2] + 2 * padding, vector<float_t>(shape[3], 0.))));
// padding
for (size_t i = 0; i < shape[0]; i++) {
for (size_t j = 0; j < shape[1]; j++) {
for (size_t k = 0; k < shape[2]; k++) {
for (size_t l = 0; l < shape[3]; l++) {
padded[i][j + padding][k + padding][l] = input[i][j][k][l];
}
}
}
}
shape[1] += 2 * padding;
shape[2] += 2 * padding;
size_t h_c = weight[0].size();
size_t w_c = weight[0][0].size();
size_t h = (shape[1] - h_c) / stride + 1;
size_t w = (shape[2] - w_c) / stride + 1;
size_t b = shape[0]; // batch_size
size_t c = weight[0][0][0].size(); // OC
vector<vector<vector<vector<float_t>>>> result(b, vector < vector<vector<float_t>>>(h, vector<vector<float_t>>(w, vector<float_t>(c, 0.))));
for (size_t i = 0; i < b; i++)
for (size_t j = 0; j < h; j++)
for (size_t k = 0; k < w; k++)
for (size_t l = 0; l < c; l++) {
for (size_t m = 0; m < h_c; m++)
for (size_t n = 0; n < w_c; n++)
for (size_t ic = 0; ic < weight.size(); ic++)
result[i][j][k][l] += padded[i][j * stride + m][k * stride + n][ic] * weight[ic][m][n][l];
result[i][j][k][l] += bias[l];
}
return result;
}
};