Skip to content

Commit

Permalink
Merge pull request #36 from octu0/v1.19.2
Browse files Browse the repository at this point in the history
v1.19.2
  • Loading branch information
octu0 authored Dec 15, 2021
2 parents 2e9a9ad + dd6fe2b commit ec681a0
Show file tree
Hide file tree
Showing 111 changed files with 293 additions and 292 deletions.
4 changes: 2 additions & 2 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
*.out
/Halide-Runtime
/Halide-Runtime*
.git
_benchmark
_benchmark/
testdata/
102 changes: 51 additions & 51 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,57 +19,57 @@ darwin/amd64 Intel(R) Core(TM) i7-8569U CPU @ 2.80GHz

```
src 320x240
BenchmarkJIT/cloneimg : 0.00766ms
BenchmarkJIT/convert_from_argb : 0.02316ms
BenchmarkJIT/convert_from_abgr : 0.04758ms
BenchmarkJIT/convert_from_bgra : 0.03296ms
BenchmarkJIT/convert_from_rabg : 0.03178ms
BenchmarkJIT/convert_from_yuv_420 : 0.02924ms
BenchmarkJIT/convert_from_yuv_444 : 0.02670ms
BenchmarkJIT/convert_to_yuv_420 : 0.32821ms
BenchmarkJIT/convert_to_yuv_444 : 0.12315ms
BenchmarkJIT/rotate0 : 0.00747ms
BenchmarkJIT/rotate90 : 0.02517ms
BenchmarkJIT/rotate180 : 0.00742ms
BenchmarkJIT/rotate270 : 0.04581ms
BenchmarkJIT/crop : 0.05918ms
BenchmarkJIT/scale : 0.13376ms
BenchmarkJIT/scale_box : 0.21878ms
BenchmarkJIT/scale_linear : 0.19792ms
BenchmarkJIT/scale_gaussian : 0.22788ms
BenchmarkJIT/blend_normal : 0.08359ms
BenchmarkJIT/blend_sub : 0.08202ms
BenchmarkJIT/blend_add : 0.08355ms
BenchmarkJIT/blend_diff : 0.08441ms
BenchmarkJIT/grayscale : 0.04577ms
BenchmarkJIT/invert : 0.04085ms
BenchmarkJIT/brightness : 0.04380ms
BenchmarkJIT/gammacorrection : 0.08333ms
BenchmarkJIT/contrast : 0.01454ms
BenchmarkJIT/boxblur : 0.11781ms
BenchmarkJIT/gaussianblur : 0.32183ms
BenchmarkJIT/blockmozaic : 0.27573ms
BenchmarkJIT/erosion : 0.12029ms
BenchmarkJIT/dilation : 0.14710ms
BenchmarkJIT/morphology_open : 0.10283ms
BenchmarkJIT/morphology_close : 0.10364ms
BenchmarkJIT/morphology_gradient : 0.08212ms
BenchmarkJIT/emboss$1 : 0.05245ms
BenchmarkJIT/laplacian : 0.03190ms
BenchmarkJIT/highpass : 0.03757ms
BenchmarkJIT/gradient : 0.03272ms
BenchmarkJIT/edgedetect : 0.02667ms
BenchmarkJIT/sobel : 0.06262ms
BenchmarkJIT/canny : 0.29069ms
BenchmarkJIT/canny_dilate : 0.34596ms
BenchmarkJIT/canny_morphology_open : 0.39083ms
BenchmarkJIT/canny_morphology_close : 0.38398ms
BenchmarkJIT/match_template_sad : 5.67450ms
BenchmarkJIT/match_template_ssd : 4.17133ms
BenchmarkJIT/match_template_ncc : 8.03216ms
BenchmarkJIT/prepared_match_template_ncc : 6.23168ms
BenchmarkJIT/match_template_zncc : 12.00244ms
BenchmarkJIT/prepared_match_template_zncc : 10.88519ms
BenchmarkJIT/cloneimg : 0.00752ms
BenchmarkJIT/convert_from_argb : 0.02369ms
BenchmarkJIT/convert_from_abgr : 0.03711ms
BenchmarkJIT/convert_from_bgra : 0.02472ms
BenchmarkJIT/convert_from_rabg : 0.03139ms
BenchmarkJIT/convert_from_yuv_420 : 0.02957ms
BenchmarkJIT/convert_from_yuv_444 : 0.02586ms
BenchmarkJIT/convert_to_yuv_420 : 0.05634ms
BenchmarkJIT/convert_to_yuv_444 : 0.06963ms
BenchmarkJIT/rotate0 : 0.00739ms
BenchmarkJIT/rotate90 : 0.02580ms
BenchmarkJIT/rotate180 : 0.00746ms
BenchmarkJIT/rotate270 : 0.02557ms
BenchmarkJIT/crop : 0.06071ms
BenchmarkJIT/scale : 0.14003ms
BenchmarkJIT/scale_box : 0.19936ms
BenchmarkJIT/scale_linear : 0.19754ms
BenchmarkJIT/scale_gaussian : 0.22766ms
BenchmarkJIT/blend_normal : 0.08383ms
BenchmarkJIT/blend_sub : 0.08447ms
BenchmarkJIT/blend_add : 0.08394ms
BenchmarkJIT/blend_diff : 0.08423ms
BenchmarkJIT/grayscale : 0.03839ms
BenchmarkJIT/invert : 0.04330ms
BenchmarkJIT/brightness : 0.04931ms
BenchmarkJIT/gammacorrection : 0.08158ms
BenchmarkJIT/contrast : 0.01506ms
BenchmarkJIT/boxblur : 0.12091ms
BenchmarkJIT/gaussianblur : 0.32293ms
BenchmarkJIT/blockmozaic : 0.27398ms
BenchmarkJIT/erosion : 0.12039ms
BenchmarkJIT/dilation : 0.12439ms
BenchmarkJIT/morphology_open : 0.10255ms
BenchmarkJIT/morphology_close : 0.10472ms
BenchmarkJIT/morphology_gradient : 0.08321ms
BenchmarkJIT/emboss$1 : 0.05385ms
BenchmarkJIT/laplacian : 0.03204ms
BenchmarkJIT/highpass : 0.03783ms
BenchmarkJIT/gradient : 0.03303ms
BenchmarkJIT/edgedetect : 0.02638ms
BenchmarkJIT/sobel : 0.06399ms
BenchmarkJIT/canny : 0.29472ms
BenchmarkJIT/canny_dilate : 0.36258ms
BenchmarkJIT/canny_morphology_open : 0.39542ms
BenchmarkJIT/canny_morphology_close : 0.40479ms
BenchmarkJIT/match_template_sad : 6.64854ms
BenchmarkJIT/match_template_ssd : 4.76639ms
BenchmarkJIT/match_template_ncc : 9.37937ms
BenchmarkJIT/prepared_match_template_ncc : 6.85107ms
BenchmarkJIT/match_template_zncc : 13.29085ms
BenchmarkJIT/prepared_match_template_zncc : 12.07535ms
```

## AOT benchmarks
Expand Down
117 changes: 72 additions & 45 deletions blurry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -900,41 +900,11 @@ Func convert_from_yuv_420_fn(Func in_y, Func in_u, Func in_v, Param<int32_t> wid
);
}

Func convert_to_yuv_444_fn(Func input, Param<int32_t> width, Param<int32_t> height) {
Var x("x"), y("y");
Var xo("xo"), xi("xi");
Var yo("yo"), yi("yi");
Var ti("ti");

Region src_bounds = {{0, width},{0, height},{0, 3}};
Func in = readUI8(BoundaryConditions::constant_exterior(input, 0, src_bounds), "in");

Expr y_max_w = width;
Expr y_max_h = height;
Expr uv_max_h = y_max_h + y_max_h;

Func yuv = rgb_to_yuv444(in, "rgb_to_yuv444");

Func f = Func("convert_to_yuv_444");
Expr value = select(
y < y_max_h, yuv(x, y, 0),
y_max_h <= y && y < uv_max_h, yuv(x, (y - y_max_h), 1),
uv_max_h <= y, yuv(x, (y - uv_max_h), 2),
likely(float_0)
);
f(x, y) = cast<uint8_t>(value);

f.compute_at(in, yo)
.split(y, yi, yo, 4)
.parallel(yi)
.vectorize(x, 16);
return f;
}

Func convert_to_yuv_420_fn(Func input, Param<int32_t> width, Param<int32_t> height) {
Pipeline convert_to_yuv_420_fn(Func input, Param<int32_t> width, Param<int32_t> height) {
Var x("x"), y("y"), ch("ch");
Var xo("xo"), xi("xi");
Var yo("yo"), yi("yi");
Var ti("ti");

Region src_bounds = {{0, width},{0, height},{0, 3}};
Func in = readUI8(BoundaryConditions::constant_exterior(input, 0, src_bounds), "in");
Expand Down Expand Up @@ -962,20 +932,77 @@ Func convert_to_yuv_420_fn(Func input, Param<int32_t> width, Param<int32_t> heig
yuv(kx + 1, ky + 1, ch)
) / 4.f;

Func f = Func("convert_to_yuv_420");
Expr value = select(
y < y_max_h, yuv(x, y, 0),
y_max_h <= y && y < u_max_h && x < uv_width, yuv444to420(x, y - y_max_h, 1),
u_max_h <= y && y < v_max_h && x < uv_width, yuv444to420(x, y - u_max_h, 2),
likely(float_0)
);
f(x, y) = cast<uint8_t>(value);
Func fn_y = Func("fn_y");
Func fn_u = Func("fn_u");
Func fn_v = Func("fn_v");
fn_y(x, y) = cast<uint8_t>(yuv(x, y, 0));
fn_u(x, y) = cast<uint8_t>(yuv444to420(x, y, 1));
fn_v(x, y) = cast<uint8_t>(yuv444to420(x, y, 2));

f.compute_at(in, yo)
.split(y, yi, yo, 4)
.parallel(yi)
.vectorize(x, 8);
return f;
in.compute_root();
fn_y.compute_at(yuv, ti)
.store_at(yuv, ti)
.tile(x, y, xo, yo, xi, yi, 32, 32)
.fuse(xo, yo, ti)
.parallel(ti)
.vectorize(xi, 32);
fn_u.compute_at(yuv444to420, ti)
.store_at(yuv444to420, ti)
.tile(x, y, xo, yo, xi, yi, 32, 32)
.fuse(xo, yo, ti)
.parallel(ti)
.vectorize(xi, 32);
fn_v.compute_at(yuv444to420, ti)
.store_at(yuv444to420, ti)
.tile(x, y, xo, yo, xi, yi, 32, 32)
.fuse(xo, yo, ti)
.parallel(ti)
.vectorize(xi, 32);
return Pipeline({fn_y, fn_u, fn_v});
}

Pipeline convert_to_yuv_444_fn(Func input, Param<int32_t> width, Param<int32_t> height) {
Var x("x"), y("y");
Var xo("xo"), xi("xi");
Var yo("yo"), yi("yi");
Var ti("ti");

Region src_bounds = {{0, width},{0, height},{0, 3}};
Func in = readUI8(BoundaryConditions::constant_exterior(input, 0, src_bounds), "in");

Expr y_max_w = width;
Expr y_max_h = height;
Expr uv_max_h = y_max_h + y_max_h;

Func yuv = rgb_to_yuv444(in, "rgb_to_yuv444");

Func fn_y = Func("fn_y");
Func fn_u = Func("fn_u");
Func fn_v = Func("fn_v");
fn_y(x, y) = cast<uint8_t>(yuv(x, y, 0));
fn_u(x, y) = cast<uint8_t>(yuv(x, y, 1));
fn_v(x, y) = cast<uint8_t>(yuv(x, y, 2));

in.compute_root();
fn_y.compute_at(yuv, ti)
.store_at(yuv, ti)
.tile(x, y, xo, yo, xi, yi, 32, 32)
.fuse(xo, yo, ti)
.parallel(ti)
.vectorize(xi, 32);
fn_u.compute_at(yuv, ti)
.store_at(yuv, ti)
.tile(x, y, xo, yo, xi, yi, 32, 32)
.fuse(xo, yo, ti)
.parallel(ti)
.vectorize(xi, 32);
fn_v.compute_at(yuv, ti)
.store_at(yuv, ti)
.tile(x, y, xo, yo, xi, yi, 32, 32)
.fuse(xo, yo, ti)
.parallel(ti)
.vectorize(xi, 32);
return Pipeline({fn_y, fn_u, fn_v});
}

Func rotate0_fn(Func input, Param<int32_t> width, Param<int32_t> height) {
Expand Down
8 changes: 4 additions & 4 deletions blurry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ Func convert_from_rabg_fn(Func input, Param<int32_t> width, Param<int32_t> heigh

Func convert_from_bgra_fn(Func input, Param<int32_t> width, Param<int32_t> height);

Func convert_from_yuv_444_fn(Func in_y, Func in_u, Func in_v, Param<int32_t> width, Param<int32_t> height);

Func convert_from_yuv_420_fn(Func in_y, Func in_u, Func in_v, Param<int32_t> width, Param<int32_t> height);

Func convert_to_yuv_444_fn(Func input, Param<int32_t> width, Param<int32_t> height);
Func convert_from_yuv_444_fn(Func in_y, Func in_u, Func in_v, Param<int32_t> width, Param<int32_t> height);

Pipeline convert_to_yuv_420_fn(Func input, Param<int32_t> width, Param<int32_t> height);

Func convert_to_yuv_420_fn(Func input, Param<int32_t> width, Param<int32_t> height);
Pipeline convert_to_yuv_444_fn(Func input, Param<int32_t> width, Param<int32_t> height);

Func rotate0_fn(Func input, Param<int32_t> width, Param<int32_t> height);

Expand Down
43 changes: 33 additions & 10 deletions blurry_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,36 +233,59 @@ int benchmark_convert_from_yuv_444() {
), width, height);
}

int benchmark_convert_to_yuv_444() {
int benchmark_convert_to_yuv_420() {
Buffer<uint8_t> buf_src = load_and_convert_image("./testdata/src.png");
Param<int32_t> _width{"width", buf_src.get()->width()};
Param<int32_t> _height{"height", buf_src.get()->height()};

int32_t y_width = buf_src.get()->width();
int32_t uv_width = buf_src.get()->width();
int32_t uv_width = buf_src.get()->width() / 2;
int32_t y_height = buf_src.get()->height();
int32_t uv_height = buf_src.get()->height();
int32_t uv_height = buf_src.get()->height() / 2;

return jit_benchmark_bounds(convert_to_yuv_444_fn(
Pipeline pipe = convert_to_yuv_420_fn(
wrapFunc(buf_src, "buf_src"),
_width, _height
), y_width, y_height + uv_height + uv_height);
);
pipe.compile_jit(get_jit_target_from_environment());

Buffer<uint8_t> buf_y(y_width, y_height);
Buffer<uint8_t> buf_u(uv_width, uv_height);
Buffer<uint8_t> buf_v(uv_width, uv_height);

double result = benchmark(100, 10, [&]() {
pipe.realize({buf_y, buf_u, buf_v});
});
printf("BenchmarkJIT/%-30s: %-3.5fms\n", "convert_to_yuv_420", result * 1e3);
return 0;
}

int benchmark_convert_to_yuv_420() {
int benchmark_convert_to_yuv_444() {
Buffer<uint8_t> buf_src = load_and_convert_image("./testdata/src.png");
Param<int32_t> _width{"width", buf_src.get()->width()};
Param<int32_t> _height{"height", buf_src.get()->height()};

int32_t y_width = buf_src.get()->width();
int32_t uv_width = buf_src.get()->width() / 2;
int32_t uv_width = buf_src.get()->width();
int32_t y_height = buf_src.get()->height();
int32_t uv_height = buf_src.get()->height() / 2;
int32_t uv_height = buf_src.get()->height();

return jit_benchmark_bounds(convert_to_yuv_420_fn(
Pipeline pipe = convert_to_yuv_444_fn(
wrapFunc(buf_src, "buf_src"),
_width, _height
), y_width, y_height + uv_height + uv_height);
);
pipe.compile_jit(get_jit_target_from_environment());

Buffer<uint8_t> buf_y(y_width, y_height);
Buffer<uint8_t> buf_u(uv_width, uv_height);
Buffer<uint8_t> buf_v(uv_width, uv_height);

double result = benchmark(100, 10, [&]() {
pipe.realize({buf_y, buf_u, buf_v});
});

printf("BenchmarkJIT/%-30s: %-3.5fms\n", "convert_to_yuv_444", result * 1e3);
return 0;
}

int benchmark_rotate0(Buffer<uint8_t> buf_src, Param<int32_t> width, Param<int32_t> height) {
Expand Down
Loading

0 comments on commit ec681a0

Please sign in to comment.