Skip to content

Commit

Permalink
Add upsample_aa op series. (#1106)
Browse files Browse the repository at this point in the history
- [x] _upsample_bicubic2d_aa
- [x] _upsample_bicubic2d_aa.out
- [x] _upsample_bicubic2d_aa_backward
- [x] _upsample_bicubic2d_aa_backward.grad_input
- [x] _upsample_bilinear2d_aa
- [x] _upsample_bilinear2d_aa.out
- [x] _upsample_bilinear2d_aa_backward
- [x] _upsample_bilinear2d_aa_backward.grad_input

---------

Co-authored-by: Yutao Xu <yutao.xu@intel.com>
  • Loading branch information
Kanya-Mo and xytintel authored Nov 28, 2024
1 parent 518bea4 commit 43957e2
Show file tree
Hide file tree
Showing 8 changed files with 866 additions and 2 deletions.
109 changes: 109 additions & 0 deletions src/ATen/native/xpu/UpSample.h
Original file line number Diff line number Diff line change
Expand Up @@ -316,4 +316,113 @@ static void upsample_increment_value_bounded(
return {nbatch, channels, output_width};
}

namespace upsample_antialias {

// taken from
// https://github.com/python-pillow/Pillow/blob/6812205f18ca4ef54372e87e1a13ce4a859434df/
// src/libImaging/Resample.c#L20-L29
struct BilinearFilterFunctor {
template <typename accscalar_t>
accscalar_t operator()(accscalar_t x) const {
if (x < 0) {
x = -x;
}
if (x < 1) {
return 1 - x;
}
return 0;
}

static const int size = 2;
};

// taken from
// https://github.com/python-pillow/Pillow/blob/6812205f18ca4ef54372e87e1a13ce4a859434df/
// src/libImaging/Resample.c#L46-L62
struct BicubicFilterFunctor {
template <typename accscalar_t>
accscalar_t operator()(accscalar_t x) const {
// https://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm
const accscalar_t a = -0.5;
if (x < 0) {
x = -x;
}
if (x < 1) {
return ((a + 2) * x - (a + 3)) * x * x + 1;
}
if (x < 2) {
return (((x - 5) * x + 8) * x - 4) * a;
}
return 0;
}

static const int size = 4;
};

template <typename accscalar_t>
static inline void _compute_weights_span(
const int i,
const int input_size,
const accscalar_t scale,
const accscalar_t support,
int& xmin,
int& xsize,
accscalar_t& center) {
center = scale * (i + static_cast<accscalar_t>(0.5));
xmin =
max(static_cast<int>(center - support + static_cast<accscalar_t>(0.5)),
static_cast<int>(0));
xsize =
min(static_cast<int>(center + support + static_cast<accscalar_t>(0.5)),
input_size) -
xmin;
}

template <typename scalar_t, typename accscalar_t, typename interp_filter_t>
static inline void _compute_weights(
scalar_t* wt_ptr,
const accscalar_t scale,
int interp_size,
const interp_filter_t& interp_filter,
accscalar_t xmin_m_center,
int xsize) {
accscalar_t invscale = (scale >= 1.0) ? 1.0 / scale : 1.0;
accscalar_t total_w = 0.0;
int j = 0;
for (j = 0; j < xsize; j++) {
accscalar_t w = interp_filter(
(j + xmin_m_center + static_cast<accscalar_t>(0.5)) * invscale);
wt_ptr[j] = static_cast<scalar_t>(w);
total_w += w;
}
for (j = 0; j < xsize; j++) {
if (total_w != 0.0) {
wt_ptr[j] /= total_w;
}
}
for (; j < interp_size; j++) {
wt_ptr[j] = static_cast<scalar_t>(0.0);
}
}

template <typename scalar_t, typename accscalar_t>
static inline accscalar_t interpolate_aa_single_dim(
const scalar_t* src,
const scalar_t* weights,
int size) {
scalar_t t = static_cast<accscalar_t>(*src);
scalar_t wts = static_cast<accscalar_t>(weights[0]);
accscalar_t output = t * wts;

int j = 1;
for (; j < size; j++) {
wts = static_cast<accscalar_t>(weights[j]);
t = static_cast<accscalar_t>(*(src + j));
output += t * wts;
}
return output;
}

} // namespace upsample_antialias

} // namespace at::native::xpu
28 changes: 28 additions & 0 deletions src/ATen/native/xpu/UpSampleBicubic2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
#include <ATen/core/Tensor.h>
#include <ATen/native/xpu/UpSample.h>
#include <ATen/native/xpu/sycl/UpSampleBicubic2dKernels.h>
#include <ATen/native/xpu/sycl/UpSampleBilinear2dKernels.h>
#include <comm/RegisterUtils.h>

#include <xpu/ATen/ops/upsample_bicubic2d_backward_native.h>
#include <xpu/ATen/ops/upsample_bicubic2d_native.h>
#include <xpu/ATen/ops/_upsample_bicubic2d_aa_backward_native.h>
#include <xpu/ATen/ops/_upsample_bicubic2d_aa_native.h>
namespace at {
namespace native {
TORCH_IMPL_FUNC(upsample_bicubic2d_out_xpu)
Expand Down Expand Up @@ -37,5 +40,30 @@ TORCH_IMPL_FUNC(upsample_bicubic2d_backward_out_xpu)
scales_h,
scales_w);
}

TORCH_IMPL_FUNC(_upsample_bicubic2d_aa_out_xpu)
(const Tensor& input,
IntArrayRef output_size,
bool align_corners,
std::optional<double> scales_h,
std::optional<double> scales_w,
const Tensor& output) {
xpu::_upsample_bicubic2d_aa_out_kernel(
output, input, output_size, align_corners, scales_h, scales_w);
}

TORCH_IMPL_FUNC(_upsample_bicubic2d_aa_backward_out_xpu)
(const Tensor& grad_output,
IntArrayRef output_size,
IntArrayRef input_size,
bool align_corners,
std::optional<double> scales_h,
std::optional<double> scales_w,
const Tensor& grad_input) {
// Nondeterministic because of atomicAdd usage
globalContext().alertNotDeterministic("upsample_bicubic2d_aa_backward_out_xpu");
xpu::_upsample_bicubic2d_aa_backward_out_kernel(
grad_input, grad_output, output_size, input_size, align_corners, scales_h, scales_w);
}
} // namespace native
} // namespace at
27 changes: 27 additions & 0 deletions src/ATen/native/xpu/UpSampleBilinear2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#include <xpu/ATen/ops/upsample_bilinear2d_backward_native.h>
#include <xpu/ATen/ops/upsample_bilinear2d_native.h>
#include <xpu/ATen/ops/_upsample_bilinear2d_aa_backward_native.h>
#include <xpu/ATen/ops/_upsample_bilinear2d_aa_native.h>

namespace at {
namespace native {
Expand Down Expand Up @@ -38,5 +40,30 @@ TORCH_IMPL_FUNC(upsample_bilinear2d_backward_out_xpu)
scales_w);
}

TORCH_IMPL_FUNC(_upsample_bilinear2d_aa_out_xpu)
(const Tensor& input,
IntArrayRef output_size,
bool align_corners,
std::optional<double> scales_h,
std::optional<double> scales_w,
const Tensor& output) {
xpu::_upsample_bilinear2d_aa_out_kernel(
output, input, output_size, align_corners, scales_h, scales_w);
}

TORCH_IMPL_FUNC(_upsample_bilinear2d_aa_backward_out_xpu)
(const Tensor& grad_output,
IntArrayRef output_size,
IntArrayRef input_size,
bool align_corners,
std::optional<double> scales_h,
std::optional<double> scales_w,
const Tensor& grad_input) {
// Nondeterministic because of atomicAdd usage
globalContext().alertNotDeterministic("upsample_bilinear2d_aa_backward_out_xpu");
xpu::_upsample_bilinear2d_aa_backward_out_kernel(
grad_input, grad_output, output_size, input_size, align_corners, scales_h, scales_w);
}

} // namespace native
} // namespace at
2 changes: 0 additions & 2 deletions src/ATen/native/xpu/XPUFallback.template
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,8 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) {
"_thnn_fused_gru_cell",
"_to_sparse_csr",
"triangular_solve.X",
"_upsample_bilinear2d_aa.out",
"_validate_compressed_sparse_indices",
"vdot",
"_upsample_bicubic2d_aa.out",
};
for (auto& op_name : fallback_list) {
m.impl(
Expand Down
Loading

0 comments on commit 43957e2

Please sign in to comment.