diff --git a/src/backends/torch/torchdataaug.cc b/src/backends/torch/torchdataaug.cc index fc75230cc..a3778fbac 100644 --- a/src/backends/torch/torchdataaug.cc +++ b/src/backends/torch/torchdataaug.cc @@ -24,50 +24,136 @@ namespace dd { + void write_image_with_bboxes(const cv::Mat &src, + const std::vector> &bboxes, + const std::string fpath, int &ii) + { + cv::Mat src_bb = src.clone(); + for (size_t bb = 0; bb < bboxes.size(); ++bb) + { + cv::Rect r(bboxes[bb][0], bboxes[bb][1], bboxes[bb][2] - bboxes[bb][0], + bboxes[bb][3] - bboxes[bb][1]); + cv::rectangle(src_bb, r, cv::Scalar(255, 0, 0), 1, 8, 0); + } + cv::imwrite(fpath + "/test_aug_" + std::to_string(ii) + ".png", src_bb); + ++ii; + } + void TorchImgRandAugCV::augment(cv::Mat &src) { // apply augmentation - if (_mirror) - applyMirror(src); - if (_rotate) - applyRotate(src); - if (_geometry) - applyGeometry(src); - - // should be last, in this order - if (_cutout > 0.0) - applyCutout(src); - if (_crop_size > 0) - applyCrop(src); + applyGeometry(src, _geometry_params); + applyCutout(src, _cutout_params); + + // these transforms do affect dimensions + applyCrop(src, _crop_params); + applyMirror(src); + applyRotate(src); + } + + void + TorchImgRandAugCV::augment_with_bbox(cv::Mat &src, + std::vector &targets) + { + torch::Tensor t = targets[0]; + int nbbox = t.size(0); + std::vector> bboxes; + for (int bb = 0; bb < nbbox; ++bb) + { + std::vector bbox; + for (int d = 0; d < 4; ++d) + { + bbox.push_back(t[bb][d].item()); + } + bboxes.push_back(bbox); // add (xmin, ymin, xmax, ymax) + } + + bool mirror = applyMirror(src); + if (mirror) + { + applyMirrorBBox(bboxes, static_cast(src.cols)); + } + int rot = applyRotate(src); + if (rot > 0) + { + applyRotateBBox(bboxes, static_cast(src.cols), + static_cast(src.rows), rot); + } + // XXX: no cutout with bboxes (yet) + GeometryParams geoparams = _geometry_params; + cv::Mat src_c = src.clone(); + applyGeometry(src_c, geoparams, true); + if (!geoparams._lambda.empty()) + { + // geometry on bboxes + std::vector> bboxes_c = bboxes; + applyGeometryBBox(bboxes_c, geoparams, src_c.cols, src_c.rows); + if (!bboxes_c.empty()) // some bboxes remain + { + src = src_c; + bboxes = bboxes_c; + } + } + + // replacing the initial bboxes with the transformed ones. + nbbox = bboxes.size(); + for (int bb = 0; bb < nbbox; ++bb) + { + for (int d = 0; d < 4; ++d) + { + t[bb][d] = bboxes.at(bb).at(d); + } + } } - void TorchImgRandAugCV::applyMirror(cv::Mat &src) + bool TorchImgRandAugCV::applyMirror(cv::Mat &src) { + if (!_mirror) + return false; + + bool mirror = false; #pragma omp critical { - if (_bernouilli(_rnd_gen)) - { - cv::Mat dst; - cv::flip(src, dst, 1); - src = dst; - } + mirror = _bernouilli(_rnd_gen); } + if (mirror) + { + cv::Mat dst; + cv::flip(src, dst, 1); + src = dst; + } + return mirror; + } + + void + TorchImgRandAugCV::applyMirrorBBox(std::vector> &bboxes, + const float &img_width) + { + for (size_t i = 0; i < bboxes.size(); ++i) + { + float xmin = bboxes.at(i)[0]; + bboxes.at(i)[0] = img_width - bboxes.at(i)[2]; // xmin = width - xmax + bboxes.at(i)[2] = img_width - xmin; + } } - void TorchImgRandAugCV::applyRotate(cv::Mat &src) + int TorchImgRandAugCV::applyRotate(cv::Mat &src) { + if (!_rotate) + return -1; + int rot = 0; #pragma omp critical { rot = _uniform_int_rotate(_rnd_gen); } if (rot == 0) - return; - else if (rot == 1) // 90 + return rot; + else if (rot == 1) // 270 { cv::Mat dst; cv::transpose(src, dst); - cv::flip(dst, src, 1); + cv::flip(dst, src, 0); } else if (rot == 2) // 180 { @@ -75,73 +161,137 @@ namespace dd cv::flip(src, dst, -1); src = dst; } - else if (rot == 3) // 270 + else if (rot == 3) // 90 { cv::Mat dst; cv::transpose(src, dst); - cv::flip(dst, src, 0); + cv::flip(dst, src, 1); } + return rot; } - void TorchImgRandAugCV::applyCrop(cv::Mat &src) + void + TorchImgRandAugCV::applyRotateBBox(std::vector> &bboxes, + const float &img_width, + const float &img_height, const int &rot) { + std::vector> nbboxes; + for (size_t i = 0; i < bboxes.size(); ++i) + { + std::vector bbox = bboxes.at(i); + std::vector nbox; + if (rot == 1) // 90 + { + nbox.push_back(bbox[1]); // xmin <- ymin + nbox.push_back(img_height - bbox[2]); // ymin <- height-xmax + nbox.push_back(bbox[3]); // xmax <- ymax + nbox.push_back(img_height - bbox[0]); // ymax <- height-xmin + } + else if (rot == 2) // 180 + { + nbox.push_back(img_width - bbox[2]); // xmin <- width-xmax + nbox.push_back(img_height - bbox[3]); // ymin <- height-ymax + nbox.push_back(img_width - bbox[0]); // xmax <- width-xmin + nbox.push_back(img_height - bbox[1]); // ymax <- height-ymin + } + else if (rot == 3) // 270 + { + nbox.push_back(img_width - bbox[3]); // xmin <- width-ymax + nbox.push_back(bbox[0]); // ymin <- xmin + nbox.push_back(img_width - bbox[1]); // xmax <- width-ymin + nbox.push_back(bbox[2]); // ymax <- xmax + } + nbboxes.push_back(nbox); + } + bboxes = nbboxes; + } + + void TorchImgRandAugCV::applyCrop(cv::Mat &src, CropParams &cp, + const bool &store_rparams) + { + if (cp._crop_size <= 0) + return; + int crop_x = 0; int crop_y = 0; #pragma omp critical { - crop_x = _uniform_int_crop_x(_rnd_gen); - crop_y = _uniform_int_crop_y(_rnd_gen); + crop_x = cp._uniform_int_crop_x(_rnd_gen); + crop_y = cp._uniform_int_crop_y(_rnd_gen); } - cv::Rect crop(crop_x, crop_y, _crop_size, _crop_size); + cv::Rect crop(crop_x, crop_y, cp._crop_size, cp._crop_size); cv::Mat dst = src(crop).clone(); src = dst; + + if (store_rparams) + { + cp._crop_x = crop_x; + cp._crop_y = crop_y; + } } - void TorchImgRandAugCV::applyCutout(cv::Mat &src) + void TorchImgRandAugCV::applyCutout(cv::Mat &src, CutoutParams &cp, + const bool &store_rparams) { + if (cp._prob == 0.0) + return; + // Draw random between 0 and 1 float r1 = 0.0; #pragma omp critical { r1 = _uniform_real_1(_rnd_gen); } - if (r1 > _cutout) + if (r1 > cp._prob) return; #pragma omp critical { // get shape and area to erase - float s = _uniform_real_cutout_s(_rnd_gen) * _img_width - * _img_height; // area - float r = _uniform_real_cutout_r(_rnd_gen); // aspect ratio + int w = 0, h = 0, rect_x = 0, rect_y = 0; + if (cp._w == 0 && cp._h == 0) + { + float s = cp._uniform_real_cutout_s(_rnd_gen) * cp._img_width + * cp._img_height; // area + float r = cp._uniform_real_cutout_r(_rnd_gen); // aspect ratio - int w = std::min(_img_width, + w = std::min(cp._img_width, static_cast(std::floor(std::sqrt(s / r)))); - int h = std::min(_img_height, + h = std::min(cp._img_height, static_cast(std::floor(std::sqrt(s * r)))); - std::uniform_int_distribution distx(0, _img_width - w); - std::uniform_int_distribution disty(0, _img_height - h); - int rect_x = distx(_rnd_gen); - int rect_y = disty(_rnd_gen); + std::uniform_int_distribution distx(0, cp._img_width - w); + std::uniform_int_distribution disty(0, cp._img_height - h); + rect_x = distx(_rnd_gen); + rect_y = disty(_rnd_gen); + } // erase cv::Rect rect(rect_x, rect_y, w, h); cv::Mat selected_area = src(rect); if (selected_area.channels() == 3) cv::randu(selected_area, - cv::Scalar(_cutout_vl, _cutout_vl, _cutout_vl), - cv::Scalar(_cutout_vh, _cutout_vh, _cutout_vh)); + cv::Scalar(cp._cutout_vl, cp._cutout_vl, cp._cutout_vl), + cv::Scalar(cp._cutout_vh, cp._cutout_vh, cp._cutout_vh)); else - cv::randu(selected_area, cv::Scalar(_cutout_vl), - cv::Scalar(_cutout_vh)); + cv::randu(selected_area, cv::Scalar(cp._cutout_vl), + cv::Scalar(cp._cutout_vh)); + + if (store_rparams) + { + cp._w = w; + cp._h = h; + cp._rect_x = rect_x; + cp._rect_y = rect_y; + } } } void TorchImgRandAugCV::getEnlargedImage(const cv::Mat &in_img, + const GeometryParams &cp, cv::Mat &in_img_enlarged) { int pad_mode = cv::BORDER_REFLECT101; - switch (_geometry_pad_mode) + switch (cp._geometry_pad_mode) { case 1: // constant pad_mode = cv::BORDER_CONSTANT; @@ -160,6 +310,7 @@ namespace dd } void TorchImgRandAugCV::getQuads(const int &rows, const int &cols, + const GeometryParams &cp, cv::Point2f (&inputQuad)[4], cv::Point2f (&outputQuad)[4]) { @@ -171,11 +322,11 @@ namespace dd x1 = 2 * cols - 1; y0 = rows; y1 = 2 * rows - 1; - if (_geometry_zoom_out || _geometry_zoom_in) + if (cp._geometry_zoom_out || cp._geometry_zoom_in) { - bool zoom_in = _geometry_zoom_in; - bool zoom_out = _geometry_zoom_out; - if (_geometry_zoom_out && _geometry_zoom_in) + bool zoom_in = cp._geometry_zoom_in; + bool zoom_out = cp._geometry_zoom_out; + if (cp._geometry_zoom_out && cp._geometry_zoom_in) { if (_bernouilli(_rnd_gen)) zoom_in = false; @@ -186,8 +337,8 @@ namespace dd float x0min, x0max, y0min, y0max; if (zoom_in) { - x0max = cols + cols * _geometry_zoom_factor; - y0max = rows + rows * _geometry_zoom_factor; + x0max = cols + cols * cp._geometry_zoom_factor; + y0max = rows + rows * cp._geometry_zoom_factor; } else { @@ -196,8 +347,8 @@ namespace dd } if (zoom_out) { - x0min = cols - cols * _geometry_zoom_factor; - y0min = rows - rows * _geometry_zoom_factor; + x0min = cols - cols * cp._geometry_zoom_factor; + y0min = rows - rows * cp._geometry_zoom_factor; } else { @@ -221,55 +372,59 @@ namespace dd outputQuad[1] = cv::Point2f(cols - 1, 0); outputQuad[2] = cv::Point2f(cols - 1, rows - 1); outputQuad[3] = cv::Point2f(0, rows - 1); - if (_geometry_persp_horizontal) + if (cp._geometry_persp_horizontal) { if (_bernouilli(_rnd_gen)) { // seen from right outputQuad[0].y - = rows * _geometry_persp_factor * _uniform_real_1(_rnd_gen); + = rows * cp._geometry_persp_factor * _uniform_real_1(_rnd_gen); outputQuad[3].y = rows - outputQuad[0].y; } else { // seen from left outputQuad[1].y - = rows * _geometry_persp_factor * _uniform_real_1(_rnd_gen); + = rows * cp._geometry_persp_factor * _uniform_real_1(_rnd_gen); outputQuad[2].y = rows - outputQuad[1].y; } } - if (_geometry_persp_vertical) + if (cp._geometry_persp_vertical) { if (_bernouilli(_rnd_gen)) { // seen from above outputQuad[3].x - = cols * _geometry_persp_factor * _uniform_real_1(_rnd_gen); + = cols * cp._geometry_persp_factor * _uniform_real_1(_rnd_gen); outputQuad[2].x = cols - outputQuad[3].x; } else { // seen from below outputQuad[0].x - = cols * _geometry_persp_factor * _uniform_real_1(_rnd_gen); + = cols * cp._geometry_persp_factor * _uniform_real_1(_rnd_gen); outputQuad[1].x = cols - outputQuad[0].x; } } } - void TorchImgRandAugCV::applyGeometry(cv::Mat &src) + void TorchImgRandAugCV::applyGeometry(cv::Mat &src, GeometryParams &cp, + const bool &store_rparams) { + if (!cp._prob) + return; + // enlarge image float g1 = 0.0; #pragma omp critical { g1 = _uniform_real_1(_rnd_gen); } - if (g1 > _geometry) + if (g1 > cp._prob) return; cv::Mat src_enlarged; - getEnlargedImage(src, src_enlarged); + getEnlargedImage(src, cp, src_enlarged); // Input Quadilateral or Image plane coordinates cv::Point2f inputQuad[4]; @@ -279,11 +434,116 @@ namespace dd // get perpective matrix #pragma omp critical { - getQuads(src.rows, src.cols, inputQuad, outputQuad); + getQuads(src.rows, src.cols, cp, inputQuad, outputQuad); } // warp perspective cv::Mat lambda = cv::getPerspectiveTransform(inputQuad, outputQuad); cv::warpPerspective(src_enlarged, src, lambda, src.size()); + + if (store_rparams) + cp._lambda = lambda; + } + + void TorchImgRandAugCV::warpBBoxes(std::vector> &bboxes, + cv::Mat lambda) + { + std::vector> nbboxes; + for (size_t i = 0; i < bboxes.size(); ++i) + { + std::vector bbox = bboxes.at(i); + std::vector origBBox; + std::vector warpedBBox; + + cv::Point2f p1; + p1.x = bbox[0]; // xmin + p1.y = bbox[1]; // ymin + origBBox.push_back(p1); + cv::Point2f p2; + p2.x = bbox[2]; // xmax + p2.y = bbox[3]; // ymax + origBBox.push_back(p2); + cv::Point2f p3; + p3.x = bbox[0]; // xmin + p3.y = bbox[3]; // ymax + origBBox.push_back(p3); + cv::Point2f p4; + p4.x = bbox[2]; // xmax + p4.y = bbox[1]; // ymin + origBBox.push_back(p4); + + cv::perspectiveTransform(origBBox, warpedBBox, lambda); + float xmin = warpedBBox[0].x; + float ymin = warpedBBox[0].y; + float xmax = warpedBBox[0].x; + float ymax = warpedBBox[0].y; + for (int i = 1; i < 4; ++i) + { + if (warpedBBox[i].x < xmin) + xmin = warpedBBox[i].x; + if (warpedBBox[i].x > xmax) + xmax = warpedBBox[i].x; + if (warpedBBox[i].y < ymin) + ymin = warpedBBox[i].y; + if (warpedBBox[i].y > ymax) + ymax = warpedBBox[i].y; + } + + std::vector nbox = { xmin, ymin, xmax, ymax }; + nbboxes.push_back(nbox); + } + bboxes = nbboxes; + } + + void TorchImgRandAugCV::filterBBoxes(std::vector> &bboxes, + const GeometryParams &cp, + const int &img_width, + const int &img_height) + { + std::vector> nbboxes; + for (size_t i = 0; i < bboxes.size(); ++i) + { + std::vector bbox = bboxes.at(i); + if (bbox[2] >= 0.0 && bbox[0] <= img_width && bbox[3] >= 0.0 + && bbox[1] <= img_height) + { + std::vector nbbox; + nbbox.push_back(std::max(0.0f, bbox[0])); // xmin + nbbox.push_back(std::max(0.0f, bbox[1])); // ymin + nbbox.push_back( + std::min(static_cast(img_width), bbox[2])); // xmax + nbbox.push_back( + std::min(static_cast(img_height), bbox[3])); // ymax + float surfbb = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]); + float surfnbb = (nbbox[2] - nbbox[0]) * (nbbox[3] - nbbox[1]); + if (surfnbb > cp._geometry_bbox_intersect + * surfbb) // keep bboxes that are at least 75% + // as big as the original + { + nbboxes.push_back(nbbox); + } + } + } + bboxes = nbboxes; + } + + void TorchImgRandAugCV::applyGeometryBBox( + std::vector> &bboxes, const GeometryParams &cp, + const int &img_width, const int &img_height) + { + // XXX: fix (enlarged bboxes for constant padding) + for (size_t i = 0; i < bboxes.size(); ++i) + { + bboxes[i][0] += img_width; + bboxes[i][2] += img_width; + bboxes[i][1] += img_height; + bboxes[i][3] += img_height; + } + + // use cp lambda on bboxes + warpBBoxes(bboxes, cp._lambda); + + // filter bboxes + filterBBoxes(bboxes, cp, img_width, img_height); } } diff --git a/src/backends/torch/torchdataaug.h b/src/backends/torch/torchdataaug.h index d7446289c..c71dfa425 100644 --- a/src/backends/torch/torchdataaug.h +++ b/src/backends/torch/torchdataaug.h @@ -23,32 +23,44 @@ #define TORCHDATAAUG_H #include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#pragma GCC diagnostic pop #include namespace dd { - class TorchImgRandAugCV + class ImgAugParams { public: - TorchImgRandAugCV() + ImgAugParams() : _img_width(224), _img_height(224) { } - TorchImgRandAugCV( - const int &img_width, const int &img_height, const bool &mirror, - const bool &rotate, const int &crop_size, const float &cutout, - const bool &geometry, const bool &geometry_persp_horizontal, - const bool &geometry_persp_vertical, const bool &geometry_zoom_out, - const bool &geometry_zoom_in, const int &geometry_pad_mode) - : _img_width(img_width), _img_height(img_height), _mirror(mirror), - _rotate(rotate), _crop_size(crop_size), _cutout(cutout), - _geometry(geometry), - _geometry_persp_horizontal(geometry_persp_horizontal), - _geometry_persp_vertical(geometry_persp_vertical), - _geometry_zoom_out(geometry_zoom_out), - _geometry_zoom_in(geometry_zoom_in), - _geometry_pad_mode(geometry_pad_mode), _uniform_real_1(0.0, 1.0), - _bernouilli(0.5), _uniform_int_rotate(0, 3) + ImgAugParams(const int &img_width, const int &img_height) + : _img_width(img_width), _img_height(img_height) + { + } + + ~ImgAugParams() + { + } + + int _img_width; + int _img_height; + }; + + class CropParams : public ImgAugParams + { + public: + CropParams() : ImgAugParams() + { + } + + CropParams(const int &crop_size, const int &img_width, + const int &img_height) + : ImgAugParams(img_width, img_height), _crop_size(crop_size) { if (_crop_size > 0) { @@ -57,49 +69,86 @@ namespace dd _uniform_int_crop_y = std::uniform_int_distribution( 0, _img_height - _crop_size); } - if (_cutout > 0.0) - { - _uniform_real_cutout_s - = std::uniform_real_distribution(_cutout_sl, _cutout_sh); - _uniform_real_cutout_r - = std::uniform_real_distribution(_cutout_rl, _cutout_rh); - } } - ~TorchImgRandAugCV() + ~CropParams() { } - void augment(cv::Mat &src); + // default params + int _crop_size = -1; + std::uniform_int_distribution _uniform_int_crop_x; + std::uniform_int_distribution _uniform_int_crop_y; - protected: - void applyMirror(cv::Mat &src); - void applyRotate(cv::Mat &src); - void applyCrop(cv::Mat &src); - void applyCutout(cv::Mat &src); - void applyGeometry(cv::Mat &src); + // randomized params + int _crop_x = 0; + int _crop_y = 0; + }; - private: - void getEnlargedImage(const cv::Mat &in_img, cv::Mat &in_img_enlarged); - void getQuads(const int &rows, const int &cols, - cv::Point2f (&inputQuad)[4], cv::Point2f (&outputQuad)[4]); + class CutoutParams : public ImgAugParams + { + public: + CutoutParams() : ImgAugParams() + { + } - private: - int _img_width = 224; - int _img_height = 224; + CutoutParams(const float &prob, const int &img_width, + const int &img_height) + : ImgAugParams(img_width, img_height), _prob(prob) + { + _uniform_real_cutout_s + = std::uniform_real_distribution(_cutout_sl, _cutout_sh); + _uniform_real_cutout_r + = std::uniform_real_distribution(_cutout_rl, _cutout_rh); + } - // augmentation options & parameter - bool _mirror = false; - bool _rotate = false; - int _crop_size = -1; - float _cutout = 0.0; + ~CutoutParams() + { + } + + // default params + float _prob = 0.0; float _cutout_sl = 0.02; /**< min proportion of erased area wrt image. */ float _cutout_sh = 0.4; /**< max proportion of erased area wrt image. */ float _cutout_rl = 0.3; /**< min aspect ratio of erased area. */ float _cutout_rh = 3.0; /**< max aspect ratio of erased area. */ int _cutout_vl = 0; /**< min erased area pixel value. */ int _cutout_vh = 255; /**< max erased area pixel value. */ - float _geometry = 0.0; + + // randomized params + int _rect_x = 0; + int _rect_y = 0; + int _w = 0; + int _h = 0; + + std::uniform_real_distribution _uniform_real_cutout_s; + std::uniform_real_distribution _uniform_real_cutout_r; + }; + + class GeometryParams + { + public: + GeometryParams() + { + } + + GeometryParams(const float &prob, const bool &geometry_persp_horizontal, + const bool &geometry_persp_vertical, + const bool &geometry_zoom_out, const bool &geometry_zoom_in, + const int &geometry_pad_mode) + : _prob(prob), _geometry_persp_horizontal(geometry_persp_horizontal), + _geometry_persp_vertical(geometry_persp_vertical), + _geometry_zoom_out(geometry_zoom_out), + _geometry_zoom_in(geometry_zoom_in), + _geometry_pad_mode(geometry_pad_mode) + { + } + + ~GeometryParams() + { + } + + float _prob = 0.0; bool _geometry_persp_horizontal = true; /**< horizontal perspective change. */ bool _geometry_persp_vertical = true; /**< vertical perspective change. */ @@ -113,6 +162,73 @@ namespace dd image corners be in 1.25 or 0.75. */ uint8_t _geometry_pad_mode = 1; /**< filling around images, 1: constant, 2: mirrored, 3: repeat nearest. */ + float _geometry_bbox_intersect + = 0.75; /**< warped bboxes must at least have a 75% intersect with the + original bbox, otherwise they are filtered out.*/ + cv::Mat _lambda; /**< warp perspective matrix. */ + }; + + class TorchImgRandAugCV + { + public: + TorchImgRandAugCV() + { + } + + TorchImgRandAugCV(const bool &mirror, const bool &rotate, + const CropParams &crop_params, + const CutoutParams &cutout_params, + const GeometryParams &geometry_params) + : _mirror(mirror), _rotate(rotate), _crop_params(crop_params), + _cutout_params(cutout_params), _geometry_params(geometry_params), + _uniform_real_1(0.0, 1.0), _bernouilli(0.5), + _uniform_int_rotate(0, 3) + { + } + + ~TorchImgRandAugCV() + { + } + + void augment(cv::Mat &src); + void augment_with_bbox(cv::Mat &src, std::vector &targets); + + protected: + bool applyMirror(cv::Mat &src); + void applyMirrorBBox(std::vector> &bboxes, + const float &img_width); + int applyRotate(cv::Mat &src); + void applyRotateBBox(std::vector> &bboxes, + const float &img_width, const float &img_height, + const int &rot); + void applyCrop(cv::Mat &src, CropParams &cp, + const bool &store_rparams = false); + void applyCutout(cv::Mat &src, CutoutParams &cp, + const bool &store_rparams = false); + void applyGeometry(cv::Mat &src, GeometryParams &cp, + const bool &store_rparams = false); + void applyGeometryBBox(std::vector> &bboxes, + const GeometryParams &cp, const int &img_width, + const int &img_height); + + private: + void getEnlargedImage(const cv::Mat &in_img, const GeometryParams &cp, + cv::Mat &in_img_enlarged); + void getQuads(const int &rows, const int &cols, const GeometryParams &cp, + cv::Point2f (&inputQuad)[4], cv::Point2f (&outputQuad)[4]); + void warpBBoxes(std::vector> &bboxes, cv::Mat lambda); + void filterBBoxes(std::vector> &bboxes, + const GeometryParams &cp, const int &img_width, + const int &img_height); + + private: + // augmentation options & parameter + bool _mirror = false; + bool _rotate = false; + + CropParams _crop_params; + CutoutParams _cutout_params; + GeometryParams _geometry_params; // random generators std::default_random_engine _rnd_gen; @@ -120,10 +236,6 @@ namespace dd _uniform_real_1; /**< random real uniform between 0 and 1. */ std::bernoulli_distribution _bernouilli; std::uniform_int_distribution _uniform_int_rotate; - std::uniform_int_distribution _uniform_int_crop_x; - std::uniform_int_distribution _uniform_int_crop_y; - std::uniform_real_distribution _uniform_real_cutout_s; - std::uniform_real_distribution _uniform_real_cutout_r; }; } diff --git a/src/backends/torch/torchdataset.cc b/src/backends/torch/torchdataset.cc index 8b7002f0b..4360c3bb8 100644 --- a/src/backends/torch/torchdataset.cc +++ b/src/backends/torch/torchdataset.cc @@ -1,6 +1,6 @@ /** * DeepDetect - * Copyright (c) 2019-2020 Jolibrain + * Copyright (c) 2019-2021 Jolibrain * Author: Guillaume Infantes * Louis Jean * Emmanuel Benazera @@ -166,7 +166,8 @@ namespace dd const std::string &targets, cv::Mat &bgr, std::vector &targett, - const bool &bw) + const bool &bw, const int &width, + const int &height) { std::vector img_data(datas.begin(), datas.end()); bgr = cv::Mat(img_data, true); @@ -174,6 +175,19 @@ namespace dd bw ? CV_LOAD_IMAGE_GRAYSCALE : CV_LOAD_IMAGE_COLOR); std::stringstream targetstream(targets); torch::load(targett, targetstream); + if (bgr.cols != width || bgr.rows != height) + { + float w_ratio = static_cast(width) / bgr.cols; + float h_ratio = static_cast(height) / bgr.rows; + cv::resize(bgr, bgr, cv::Size(width, height), 0, 0, cv::INTER_CUBIC); + for (int bb = 0; bb < (int)targett[0].size(0); ++bb) + { + targett[0][bb][0] *= w_ratio; + targett[0][bb][1] *= h_ratio; + targett[0][bb][2] *= w_ratio; + targett[0][bb][3] *= h_ratio; + } + } } // add image batch @@ -420,10 +434,17 @@ namespace dd cv::Mat bgr; torch::Tensor targett; - read_image_from_db(datas, targets, bgr, t, inputc->_bw); + read_image_from_db(datas, targets, bgr, t, inputc->_bw, + inputc->width(), inputc->height()); // data augmentation can apply here, with OpenCV - _img_rand_aug_cv.augment(bgr); + if (!_test) + { + if (_bbox) + _img_rand_aug_cv.augment_with_bbox(bgr, t); + else + _img_rand_aug_cv.augment(bgr); + } torch::Tensor imgt = image_to_tensor(bgr, inputc->height(), inputc->width()); diff --git a/src/backends/torch/torchdataset.h b/src/backends/torch/torchdataset.h index 781f5d95f..6062efeca 100644 --- a/src/backends/torch/torchdataset.h +++ b/src/backends/torch/torchdataset.h @@ -83,6 +83,7 @@ namespace dd bool _image = false; /**< whether an image dataset. */ bool _bbox = false; /**< true if bbox detection dataset */ + bool _test = false; /**< whether a test set */ TorchImgRandAugCV _img_rand_aug_cv; /**< image data augmentation policy. */ /** @@ -103,7 +104,7 @@ namespace dd _indices(d._indices), _lfiles(d._lfiles), _batches(d._batches), _dbFullName(d._dbFullName), _inputc(d._inputc), _classification(d._classification), _image(d._image), _bbox(d._bbox), - _img_rand_aug_cv(d._img_rand_aug_cv) + _test(d._test), _img_rand_aug_cv(d._img_rand_aug_cv) { } @@ -330,7 +331,8 @@ namespace dd void read_image_from_db(const std::string &datas, const std::string &targets, cv::Mat &bgr, std::vector &targett, - const bool &bw); + const bool &bw, const int &width, + const int &height); }; /** @@ -350,8 +352,8 @@ namespace dd TorchMultipleDataset(const TorchMultipleDataset &d) : _inputc(d._inputc), _image(d._image), _bbox(d._bbox), _classification(d._classification), _dbFullNames(d._dbFullNames), - _datasets_names(d._datasets_names), _db(d._db), _backend(d._backend), - _dbPrefix(d._dbPrefix), _logger(d._logger), + _datasets_names(d._datasets_names), _test(d._test), _db(d._db), + _backend(d._backend), _dbPrefix(d._dbPrefix), _logger(d._logger), _batches_per_transaction(d._batches_per_transaction), _datasets(d._datasets) { @@ -508,6 +510,7 @@ namespace dd _datasets[id]._inputc = _inputc; _datasets[id]._image = _image; _datasets[id]._bbox = _bbox; + _datasets[id]._test = _test; _datasets[id]._classification = _classification; _datasets[id].set_db_params(_db, _backend, _dbPrefix + "_" + std::to_string(id)); @@ -523,6 +526,7 @@ namespace dd bool _classification = true; /**< whether a classification dataset. */ std::vector _dbFullNames; std::vector _datasets_names; + bool _test = false; /**< wheater a test set */ protected: bool _db = false; diff --git a/src/backends/torch/torchinputconns.h b/src/backends/torch/torchinputconns.h index 5ca6ef1b4..6661f21ec 100644 --- a/src/backends/torch/torchinputconns.h +++ b/src/backends/torch/torchinputconns.h @@ -258,6 +258,7 @@ namespace dd _bbox = ad.get("bbox").get(); _dataset._bbox = _bbox; _test_datasets._bbox = _bbox; + _test_datasets._test = true; } void fillup_parameters(const APIData &ad_input) diff --git a/src/backends/torch/torchlib.cc b/src/backends/torch/torchlib.cc index f6374fd73..83a3f5a39 100644 --- a/src/backends/torch/torchlib.cc +++ b/src/backends/torch/torchlib.cc @@ -605,47 +605,47 @@ namespace dd bool has_rotate = ad_mllib.has("rotate") && ad_mllib.get("rotate").get(); this->_logger->info("rotate: {}", has_rotate); - int crop_size = -1; + CropParams crop_params; if (ad_mllib.has("crop_size")) { - crop_size = ad_mllib.get("crop_size").get(); + int crop_size = ad_mllib.get("crop_size").get(); + crop_params + = CropParams(crop_size, inputc.width(), inputc.height()); this->_logger->info("crop_size : {}", crop_size); } - float cutout = 0.0; + CutoutParams cutout_params; if (ad_mllib.has("cutout")) { - cutout = ad_mllib.get("cutout").get(); + float cutout = ad_mllib.get("cutout").get(); + cutout_params + = CutoutParams(cutout, inputc.width(), inputc.height()); this->_logger->info("cutout: {}", cutout); } - float geometry = 0.0; - bool geometry_persp_vertical = false; - bool geometry_persp_horizontal = false; - bool geometry_zoom_out = false; - bool geometry_zoom_in = false; - int geometry_pad_mode = 1; + GeometryParams geometry_params; APIData ad_geometry = ad_mllib.getobj("geometry"); if (!ad_geometry.empty()) { - geometry = ad_geometry.get("prob").get(); - this->_logger->info("geometry: {}", geometry); + geometry_params._prob = ad_geometry.get("prob").get(); + this->_logger->info("geometry: {}", geometry_params._prob); if (ad_geometry.has("persp_vertical")) - geometry_persp_vertical + geometry_params._geometry_persp_vertical = ad_geometry.get("persp_vertical").get(); if (ad_geometry.has("persp_horizontal")) - geometry_persp_horizontal + geometry_params._geometry_persp_horizontal = ad_geometry.get("persp_horizontal").get(); if (ad_geometry.has("zoom_out")) - geometry_zoom_out = ad_geometry.get("zoom_out").get(); + geometry_params._geometry_zoom_out + = ad_geometry.get("zoom_out").get(); if (ad_geometry.has("zoom_in")) - geometry_zoom_in = ad_geometry.get("zoom_in").get(); + geometry_params._geometry_zoom_in + = ad_geometry.get("zoom_in").get(); if (ad_geometry.has("pad_mode")) - geometry_pad_mode = ad_geometry.get("pad_mode").get(); + geometry_params._geometry_pad_mode + = ad_geometry.get("pad_mode").get(); } - inputc._dataset._img_rand_aug_cv = TorchImgRandAugCV( - inputc.width(), inputc.height(), has_mirror, has_rotate, crop_size, - cutout, geometry, geometry_persp_horizontal, - geometry_persp_vertical, geometry_zoom_out, geometry_zoom_in, - geometry_pad_mode); + inputc._dataset._img_rand_aug_cv + = TorchImgRandAugCV(has_mirror, has_rotate, crop_params, + cutout_params, geometry_params); } // solver params