Skip to content

Commit

Permalink
完善convtranspose dilation机制的实现
Browse files Browse the repository at this point in the history
  • Loading branch information
zjhellofss committed Aug 15, 2023
1 parent cc770eb commit 7218e64
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 48 deletions.
52 changes: 35 additions & 17 deletions source/layer/details/convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,17 @@ ConvolutionLayer::ConvolutionLayer(ConvType conv_type, uint32_t output_channel,
if (groups != 1) {
in_channel /= groups;
}

CHECK_GT(kernel_h, 0);
CHECK_GT(kernel_w, 0);
if (conv_type_ == ConvType::OpDeconv && dilation_h > 1) {
// dilation后的卷积大小
kernel_h = (kernel_h - 1) * (dilation_h_ - 1) + kernel_h;
}
if (conv_type_ == ConvType::OpDeconv && dilation_w > 1) {
// dilation后的卷积大小
kernel_w = (kernel_w - 1) * (dilation_w_ - 1) + kernel_w;
}
this->InitWeightParam(output_channel, in_channel, kernel_h, kernel_w);
if (use_bias_) {
this->InitBiasParam(output_channel, 1, 1, 1);
Expand Down Expand Up @@ -89,8 +100,15 @@ void ConvolutionLayer::set_weights(const std::vector<float>& weights) {
CHECK(kernel_count > 0);
const uint32_t kernel_count_group = kernel_count / groups_;
const uint32_t kernel_channel = this->weights_.at(0)->channels();
const uint32_t kernel_height = this->weights_.at(0)->rows();
const uint32_t kernel_width = this->weights_.at(0)->cols();
uint32_t kernel_height = this->weights_.at(0)->rows();
uint32_t kernel_width = this->weights_.at(0)->cols();
if (dilation_h_ > 1) {
kernel_height = (kernel_height + dilation_h_ - 1) / dilation_h_;
}

if (dilation_w_ > 1) {
kernel_width = (kernel_width + dilation_w_ - 1) / dilation_w_;
}

const uint32_t kernel_hw = kernel_height * kernel_width;
const uint32_t kernel_nhw = kernel_count_group * kernel_hw;
Expand All @@ -115,9 +133,11 @@ void ConvolutionLayer::set_weights(const std::vector<float>& weights) {
this->weights_.at(kernel_idx)->slice(ic);

for (uint32_t kw = 0; kw < kernel_width; ++kw) {
float* kernel_ptr = kernel_channel_mat.colptr(kw);
uint32_t kw_dilation = kw * dilation_w_;
float* kernel_ptr = kernel_channel_mat.colptr(kw_dilation);
for (uint32_t kh = 0; kh < kernel_height; ++kh) {
*(kernel_ptr + kh) = sub_weights.at(
uint32_t kh_dilation = kh * dilation_h_;
*(kernel_ptr + kh_dilation) = sub_weights.at(
kernel_offset + channel_offset + kh * kernel_width + kw);
}
}
Expand Down Expand Up @@ -159,9 +179,10 @@ InferStatus ConvolutionLayer::Forward(
}

const uint32_t kernel_count = this->weights_.size();
const uint32_t kernel_h = this->weights_.at(0)->rows();
const uint32_t kernel_w = this->weights_.at(0)->cols();
const uint32_t kernel_c = this->weights_.at(0)->channels();

uint32_t kernel_h = this->weights_.at(0)->rows();
uint32_t kernel_w = this->weights_.at(0)->cols();
const uint32_t row_len = kernel_h * kernel_w;
CHECK(kernel_h > 0 && kernel_w > 0 && kernel_c > 0)
<< "The size of kernel matrix in the convolution layer should be greater "
Expand Down Expand Up @@ -192,13 +213,11 @@ InferStatus ConvolutionLayer::Forward(
const uint32_t input_c = input->channels();
const uint32_t input_h = input->rows();
const uint32_t input_w = input->cols();
CHECK(input_h > 0 && input_w > 0);

const uint32_t input_padded_h = input_h + 2 * padding_h_;
const uint32_t input_padded_w = input_w + 2 * padding_w_;

CHECK(input_padded_h >= kernel_h && input_padded_w >= kernel_w);

CHECK(input_h > 0 && input_w > 0);
const auto [output_h, output_w] = CalcOutputSize(
conv_type_ == ConvType ::OpConv ? input_padded_h : input_h,
conv_type_ == ConvType ::OpConv ? input_padded_w : input_w, kernel_h,
Expand Down Expand Up @@ -267,15 +286,12 @@ void ConvolutionLayer::DeconvCol2ImWithBias(
CHECK(input_h > 0 && input_w > 0);
CHECK(output_tensor != nullptr && !output_tensor->empty());

uint32_t size_h = (input_h - 1) * stride_h_ + kernel_h + output_padding_h_;
uint32_t size_w = (input_w - 1) * stride_w_ + kernel_w + output_padding_w_;
CHECK(size_h > kernel_h && size_w > kernel_w);
arma::fmat output_padding(output_h + 2 * padding_h_,
output_w + 2 * padding_w_);

uint32_t slide_count_w = (size_w - kernel_w) / stride_w_ + 1;
uint32_t slide_count_h = (size_h - kernel_h) / stride_h_ + 1;
#pragma omp parallel for
uint32_t slide_count_w = input_w;
uint32_t slide_count_h = input_h;

for (uint32_t index = 0; index < slide_count_w * slide_count_h; ++index) {
uint32_t x = index / slide_count_h;
uint32_t y = index % slide_count_h;
Expand All @@ -290,13 +306,16 @@ void ConvolutionLayer::DeconvCol2ImWithBias(
for (uint32_t col = 0; col < gemm_cols; ++col) {
float* gemm_ptr = gemm_column.colptr(col);
float* output_ptr = output_padding.colptr(offset_x + col);
memcpy(output_ptr + offset_y, gemm_ptr, sizeof(float) * gemm_rows);
for (uint32_t row = 0; row < gemm_rows; ++row) {
*(output_ptr + offset_y + row) += *(gemm_ptr + row);
}
}
}

kernel_index = kernel_index + group * kernel_count_group;
arma::fmat output(output_tensor->matrix_raw_ptr(kernel_index), output_h,
output_w, false, true);

output =
output_padding.submat(padding_h_, padding_w_, output_h + padding_h_ - 1,
output_w + padding_w_ - 1);
Expand Down Expand Up @@ -647,7 +666,6 @@ ParseParameterAttrStatus ConvolutionLayer::CreateInstance(
conv_type = ConvType::OpConv;
} else if (op->type == "nn.ConvTranspose2d") {
conv_type = ConvType::OpDeconv;
CHECK(dilation_h == 1 && dilation_w == 1);
} else {
LOG(FATAL) << "Unknown convolution type: " << op->type;
}
Expand Down
125 changes: 95 additions & 30 deletions test/test_layer/test_deconv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,33 +116,98 @@ TEST(test_layer, deconv_group2) {
}
}

//TEST(test_layer, deconv_group_dilation1) {
// using namespace kuiper_infer;
// RuntimeGraph graph("tmp/resnet/demo_deconv_dpt.pnnx.param",
// "tmp/resnet/demo_deconv_dpt.pnnx.bin");
//
// graph.Build();
// const uint32_t batch_size = 1;
// std::vector<std::shared_ptr<Tensor<float>>> inputs;
//
// for (int i = 0; i < batch_size; ++i) {
// std::shared_ptr<Tensor<float>> input =
// std::make_shared<Tensor<float>>(16, 16, 31);
// input->Ones();
// inputs.push_back(input);
// }
//
// graph.set_inputs("pnnx_input_0", inputs);
// graph.Forward(false);
//
// std::vector<sftensor> outputs = graph.get_outputs("pnnx_output_0");
// arma::fmat real_data =
// CSVDataLoader::LoadData("tmp/resnet/test_convtranspose_d.csv");
// const auto& outputs_values = outputs.front()->values(true);
//
// for (int i = 0; i < outputs_values.size(); ++i) {
// ASSERT_LE(std::abs(real_data.at(i) - outputs_values.at(i)), 2e-6f)
// << i << " real: " << real_data.at(i)
// << " predict: " << outputs_values.at(i) << " i: " << i;
// }
//}
TEST(test_layer, deconv_group_dilation1) {
using namespace kuiper_infer;
RuntimeGraph graph("tmp/resnet/demo_deconv_d_samplept.pnnx.param",
"tmp/resnet/demo_deconv_d_samplept.pnnx.bin");

graph.Build();
const uint32_t batch_size = 1;
std::vector<std::shared_ptr<Tensor<float>>> inputs;

for (int i = 0; i < batch_size; ++i) {
std::shared_ptr<Tensor<float>> input =
std::make_shared<Tensor<float>>(1, 2, 2);
input->at(0, 0, 0) = 1;
input->at(0, 1, 1) = 1;
input->Show();
inputs.push_back(input);
}

graph.set_inputs("pnnx_input_0", inputs);
graph.Forward(false);

std::vector<sftensor> outputs = graph.get_outputs("pnnx_output_0");
arma::fmat real_data =
CSVDataLoader::LoadData("tmp/resnet/test_convtranspose_d_sample.csv");
const auto& outputs_values = outputs.front()->values(true);

outputs.front()->Show();
for (int i = 0; i < outputs_values.size(); ++i) {
ASSERT_LE(std::abs(real_data.at(i) - outputs_values.at(i)), 2e-5f)
<< i << " real: " << real_data.at(i)
<< " predict: " << outputs_values.at(i) << " i: " << i;
}
}

TEST(test_layer, deconv_group_dilation2) {
using namespace kuiper_infer;
RuntimeGraph graph("tmp/resnet/demo_deconv_dpt11.pnnx.param",
"tmp/resnet/demo_deconv_dpt11.pnnx.bin");

graph.Build();
const uint32_t batch_size = 1;
std::vector<std::shared_ptr<Tensor<float>>> inputs;

for (int i = 0; i < batch_size; ++i) {
std::shared_ptr<Tensor<float>> input =
std::make_shared<Tensor<float>>(16, 16, 31);
input->Ones();
inputs.push_back(input);
}

graph.set_inputs("pnnx_input_0", inputs);
graph.Forward(false);

std::vector<sftensor> outputs = graph.get_outputs("pnnx_output_0");
arma::fmat real_data =
CSVDataLoader::LoadData("tmp/resnet/test_convtranspose_d.csv");
const auto& outputs_values = outputs.front()->values(true);

for (int i = 0; i < outputs_values.size(); ++i) {
ASSERT_LE(std::abs(real_data.at(i) - outputs_values.at(i)), 2e-6f)
<< i << " real: " << real_data.at(i)
<< " predict: " << outputs_values.at(i);
}
}

TEST(test_layer, deconv_group_dilation3) {
using namespace kuiper_infer;
RuntimeGraph graph("tmp/resnet/demo_deconv_dpt31.pnnx.param",
"tmp/resnet/demo_deconv_dpt31.pnnx.bin");

graph.Build();
const uint32_t batch_size = 1;
std::vector<std::shared_ptr<Tensor<float>>> inputs;

for (int i = 0; i < batch_size; ++i) {
std::shared_ptr<Tensor<float>> input =
std::make_shared<Tensor<float>>(16, 16, 31);
input->Ones();
inputs.push_back(input);
}

graph.set_inputs("pnnx_input_0", inputs);
graph.Forward(false);

std::vector<sftensor> outputs = graph.get_outputs("pnnx_output_0");
arma::fmat real_data =
CSVDataLoader::LoadData("tmp/resnet/test_convtranspose_d31.csv");
const auto& outputs_values = outputs.front()->values(true);

for (int i = 0; i < outputs_values.size(); ++i) {
ASSERT_LE(std::abs(real_data.at(i) - outputs_values.at(i)), 2e-6f)
<< i << " real: " << real_data.at(i)
<< " predict: " << outputs_values.at(i);
}
}

0 comments on commit 7218e64

Please sign in to comment.