From 7218e64a5273735754aba9678100fd928ee53b35 Mon Sep 17 00:00:00 2001
From: zjhellofss <hellofss@foxmail.com>
Date: Tue, 15 Aug 2023 21:22:27 +0800
Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E5=96=84convtranspose=20dilation?=
 =?UTF-8?q?=E6=9C=BA=E5=88=B6=E7=9A=84=E5=AE=9E=E7=8E=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 source/layer/details/convolution.cpp |  52 +++++++----
 test/test_layer/test_deconv.cpp      | 125 ++++++++++++++++++++-------
 tmp                                  |   2 +-
 3 files changed, 131 insertions(+), 48 deletions(-)
diff --git a/source/layer/details/convolution.cpp b/source/layer/details/convolution.cpp
index 8326c901..a459a59d 100644
--- a/source/layer/details/convolution.cpp
+++ b/source/layer/details/convolution.cpp
@@ -52,6 +52,17 @@ ConvolutionLayer::ConvolutionLayer(ConvType conv_type, uint32_t output_channel,
   if (groups != 1) {
     in_channel /= groups;
   }
+
+  CHECK_GT(kernel_h, 0);
+  CHECK_GT(kernel_w, 0);
+  if (conv_type_ == ConvType::OpDeconv && dilation_h > 1) {
+    // dilation后的卷积大小
+    kernel_h = (kernel_h - 1) * (dilation_h_ - 1) + kernel_h;
+  }
+  if (conv_type_ == ConvType::OpDeconv && dilation_w > 1) {
+    // dilation后的卷积大小
+    kernel_w = (kernel_w - 1) * (dilation_w_ - 1) + kernel_w;
+  }
   this->InitWeightParam(output_channel, in_channel, kernel_h, kernel_w);
   if (use_bias_) {
     this->InitBiasParam(output_channel, 1, 1, 1);
@@ -89,8 +100,15 @@ void ConvolutionLayer::set_weights(const std::vector<float>& weights) {
     CHECK(kernel_count > 0);
     const uint32_t kernel_count_group = kernel_count / groups_;
     const uint32_t kernel_channel = this->weights_.at(0)->channels();
-    const uint32_t kernel_height = this->weights_.at(0)->rows();
-    const uint32_t kernel_width = this->weights_.at(0)->cols();
+    uint32_t kernel_height = this->weights_.at(0)->rows();
+    uint32_t kernel_width = this->weights_.at(0)->cols();
+    if (dilation_h_ > 1) {
+      kernel_height = (kernel_height + dilation_h_ - 1) / dilation_h_;
+    }
+
+    if (dilation_w_ > 1) {
+      kernel_width = (kernel_width + dilation_w_ - 1) / dilation_w_;
+    }
 
     const uint32_t kernel_hw = kernel_height * kernel_width;
     const uint32_t kernel_nhw = kernel_count_group * kernel_hw;
@@ -115,9 +133,11 @@ void ConvolutionLayer::set_weights(const std::vector<float>& weights) {
               this->weights_.at(kernel_idx)->slice(ic);
 
           for (uint32_t kw = 0; kw < kernel_width; ++kw) {
-            float* kernel_ptr = kernel_channel_mat.colptr(kw);
+            uint32_t kw_dilation = kw * dilation_w_;
+            float* kernel_ptr = kernel_channel_mat.colptr(kw_dilation);
             for (uint32_t kh = 0; kh < kernel_height; ++kh) {
-              *(kernel_ptr + kh) = sub_weights.at(
+              uint32_t kh_dilation = kh * dilation_h_;
+              *(kernel_ptr + kh_dilation) = sub_weights.at(
                   kernel_offset + channel_offset + kh * kernel_width + kw);
             }
           }
@@ -159,9 +179,10 @@ InferStatus ConvolutionLayer::Forward(
   }
 
   const uint32_t kernel_count = this->weights_.size();
-  const uint32_t kernel_h = this->weights_.at(0)->rows();
-  const uint32_t kernel_w = this->weights_.at(0)->cols();
   const uint32_t kernel_c = this->weights_.at(0)->channels();
+
+  uint32_t kernel_h = this->weights_.at(0)->rows();
+  uint32_t kernel_w = this->weights_.at(0)->cols();
   const uint32_t row_len = kernel_h * kernel_w;
   CHECK(kernel_h > 0 && kernel_w > 0 && kernel_c > 0)
       << "The size of kernel matrix in the convolution layer should be greater "
@@ -192,13 +213,11 @@ InferStatus ConvolutionLayer::Forward(
     const uint32_t input_c = input->channels();
     const uint32_t input_h = input->rows();
     const uint32_t input_w = input->cols();
+    CHECK(input_h > 0 && input_w > 0);
 
     const uint32_t input_padded_h = input_h + 2 * padding_h_;
     const uint32_t input_padded_w = input_w + 2 * padding_w_;
 
-    CHECK(input_padded_h >= kernel_h && input_padded_w >= kernel_w);
-
-    CHECK(input_h > 0 && input_w > 0);
     const auto [output_h, output_w] = CalcOutputSize(
         conv_type_ == ConvType ::OpConv ? input_padded_h : input_h,
         conv_type_ == ConvType ::OpConv ? input_padded_w : input_w, kernel_h,
@@ -267,15 +286,12 @@ void ConvolutionLayer::DeconvCol2ImWithBias(
   CHECK(input_h > 0 && input_w > 0);
   CHECK(output_tensor != nullptr && !output_tensor->empty());
 
-  uint32_t size_h = (input_h - 1) * stride_h_ + kernel_h + output_padding_h_;
-  uint32_t size_w = (input_w - 1) * stride_w_ + kernel_w + output_padding_w_;
-  CHECK(size_h > kernel_h && size_w > kernel_w);
   arma::fmat output_padding(output_h + 2 * padding_h_,
                             output_w + 2 * padding_w_);
 
-  uint32_t slide_count_w = (size_w - kernel_w) / stride_w_ + 1;
-  uint32_t slide_count_h = (size_h - kernel_h) / stride_h_ + 1;
-#pragma omp parallel for
+  uint32_t slide_count_w = input_w;
+  uint32_t slide_count_h = input_h;
+
   for (uint32_t index = 0; index < slide_count_w * slide_count_h; ++index) {
     uint32_t x = index / slide_count_h;
     uint32_t y = index % slide_count_h;
@@ -290,13 +306,16 @@ void ConvolutionLayer::DeconvCol2ImWithBias(
     for (uint32_t col = 0; col < gemm_cols; ++col) {
       float* gemm_ptr = gemm_column.colptr(col);
       float* output_ptr = output_padding.colptr(offset_x + col);
-      memcpy(output_ptr + offset_y, gemm_ptr, sizeof(float) * gemm_rows);
+      for (uint32_t row = 0; row < gemm_rows; ++row) {
+        *(output_ptr + offset_y + row) += *(gemm_ptr + row);
+      }
     }
   }
 
   kernel_index = kernel_index + group * kernel_count_group;
   arma::fmat output(output_tensor->matrix_raw_ptr(kernel_index), output_h,
                     output_w, false, true);
+
   output =
       output_padding.submat(padding_h_, padding_w_, output_h + padding_h_ - 1,
                             output_w + padding_w_ - 1);
@@ -647,7 +666,6 @@ ParseParameterAttrStatus ConvolutionLayer::CreateInstance(
     conv_type = ConvType::OpConv;
   } else if (op->type == "nn.ConvTranspose2d") {
     conv_type = ConvType::OpDeconv;
-    CHECK(dilation_h == 1 && dilation_w == 1);
   } else {
     LOG(FATAL) << "Unknown convolution type: " << op->type;
   }
diff --git a/test/test_layer/test_deconv.cpp b/test/test_layer/test_deconv.cpp
index aea5bc0a..68b033f5 100644
--- a/test/test_layer/test_deconv.cpp
+++ b/test/test_layer/test_deconv.cpp
@@ -116,33 +116,98 @@ TEST(test_layer, deconv_group2) {
   }
 }
 
-//TEST(test_layer, deconv_group_dilation1) {
-//  using namespace kuiper_infer;
-//  RuntimeGraph graph("tmp/resnet/demo_deconv_dpt.pnnx.param",
-//                     "tmp/resnet/demo_deconv_dpt.pnnx.bin");
-//
-//  graph.Build();
-//  const uint32_t batch_size = 1;
-//  std::vector<std::shared_ptr<Tensor<float>>> inputs;
-//
-//  for (int i = 0; i < batch_size; ++i) {
-//    std::shared_ptr<Tensor<float>> input =
-//        std::make_shared<Tensor<float>>(16, 16, 31);
-//    input->Ones();
-//    inputs.push_back(input);
-//  }
-//
-//  graph.set_inputs("pnnx_input_0", inputs);
-//  graph.Forward(false);
-//
-//  std::vector<sftensor> outputs = graph.get_outputs("pnnx_output_0");
-//  arma::fmat real_data =
-//      CSVDataLoader::LoadData("tmp/resnet/test_convtranspose_d.csv");
-//  const auto& outputs_values = outputs.front()->values(true);
-//
-//  for (int i = 0; i < outputs_values.size(); ++i) {
-//    ASSERT_LE(std::abs(real_data.at(i) - outputs_values.at(i)), 2e-6f)
-//        << i << " real: " << real_data.at(i)
-//        << " predict: " << outputs_values.at(i) << " i: " << i;
-//  }
-//}
\ No newline at end of file
+TEST(test_layer, deconv_group_dilation1) {
+  using namespace kuiper_infer;
+  RuntimeGraph graph("tmp/resnet/demo_deconv_d_samplept.pnnx.param",
+                     "tmp/resnet/demo_deconv_d_samplept.pnnx.bin");
+
+  graph.Build();
+  const uint32_t batch_size = 1;
+  std::vector<std::shared_ptr<Tensor<float>>> inputs;
+
+  for (int i = 0; i < batch_size; ++i) {
+    std::shared_ptr<Tensor<float>> input =
+        std::make_shared<Tensor<float>>(1, 2, 2);
+    input->at(0, 0, 0) = 1;
+    input->at(0, 1, 1) = 1;
+    input->Show();
+    inputs.push_back(input);
+  }
+
+  graph.set_inputs("pnnx_input_0", inputs);
+  graph.Forward(false);
+
+  std::vector<sftensor> outputs = graph.get_outputs("pnnx_output_0");
+  arma::fmat real_data =
+      CSVDataLoader::LoadData("tmp/resnet/test_convtranspose_d_sample.csv");
+  const auto& outputs_values = outputs.front()->values(true);
+
+  outputs.front()->Show();
+  for (int i = 0; i < outputs_values.size(); ++i) {
+    ASSERT_LE(std::abs(real_data.at(i) - outputs_values.at(i)), 2e-5f)
+        << i << " real: " << real_data.at(i)
+        << " predict: " << outputs_values.at(i) << " i: " << i;
+  }
+}
+
+TEST(test_layer, deconv_group_dilation2) {
+  using namespace kuiper_infer;
+  RuntimeGraph graph("tmp/resnet/demo_deconv_dpt11.pnnx.param",
+                     "tmp/resnet/demo_deconv_dpt11.pnnx.bin");
+
+  graph.Build();
+  const uint32_t batch_size = 1;
+  std::vector<std::shared_ptr<Tensor<float>>> inputs;
+
+  for (int i = 0; i < batch_size; ++i) {
+    std::shared_ptr<Tensor<float>> input =
+        std::make_shared<Tensor<float>>(16, 16, 31);
+    input->Ones();
+    inputs.push_back(input);
+  }
+
+  graph.set_inputs("pnnx_input_0", inputs);
+  graph.Forward(false);
+
+  std::vector<sftensor> outputs = graph.get_outputs("pnnx_output_0");
+  arma::fmat real_data =
+      CSVDataLoader::LoadData("tmp/resnet/test_convtranspose_d.csv");
+  const auto& outputs_values = outputs.front()->values(true);
+
+  for (int i = 0; i < outputs_values.size(); ++i) {
+    ASSERT_LE(std::abs(real_data.at(i) - outputs_values.at(i)), 2e-6f)
+        << i << " real: " << real_data.at(i)
+        << " predict: " << outputs_values.at(i);
+  }
+}
+
+TEST(test_layer, deconv_group_dilation3) {
+  using namespace kuiper_infer;
+  RuntimeGraph graph("tmp/resnet/demo_deconv_dpt31.pnnx.param",
+                     "tmp/resnet/demo_deconv_dpt31.pnnx.bin");
+
+  graph.Build();
+  const uint32_t batch_size = 1;
+  std::vector<std::shared_ptr<Tensor<float>>> inputs;
+
+  for (int i = 0; i < batch_size; ++i) {
+    std::shared_ptr<Tensor<float>> input =
+        std::make_shared<Tensor<float>>(16, 16, 31);
+    input->Ones();
+    inputs.push_back(input);
+  }
+
+  graph.set_inputs("pnnx_input_0", inputs);
+  graph.Forward(false);
+
+  std::vector<sftensor> outputs = graph.get_outputs("pnnx_output_0");
+  arma::fmat real_data =
+      CSVDataLoader::LoadData("tmp/resnet/test_convtranspose_d31.csv");
+  const auto& outputs_values = outputs.front()->values(true);
+
+  for (int i = 0; i < outputs_values.size(); ++i) {
+    ASSERT_LE(std::abs(real_data.at(i) - outputs_values.at(i)), 2e-6f)
+        << i << " real: " << real_data.at(i)
+        << " predict: " << outputs_values.at(i);
+  }
+}
\ No newline at end of file
diff --git a/tmp b/tmp
index ab936755..84f3ae85 160000
--- a/tmp
+++ b/tmp
@@ -1 +1 @@
-Subproject commit ab936755fcf8f1db4246d69e1a59180365d28c19
+Subproject commit 84f3ae857901f05743014fc72f3931239e0e8625