diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b4103b..ed1241f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -94,7 +94,7 @@ endif() ################### # See https://github.com/hunter-packages/check_ci_tag when changing VERSION -project(acf VERSION 0.1.7) +project(acf VERSION 0.1.8) set(ACF_ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}") diff --git a/cmake/Hunter/config.cmake b/cmake/Hunter/config.cmake index 4c46ab8..8a0a5ee 100644 --- a/cmake/Hunter/config.cmake +++ b/cmake/Hunter/config.cmake @@ -1,17 +1,14 @@ -if(IOS OR ANDROID) - # local workaround for protobuf compiler crash with Xcode 8.1 - # see https://github.com/elucideye/acf/issues/41 - set(opencv_cmake_args - WITH_PROTOBUF=OFF - BUILD_PROTOBUF=OFF - BUILD_LIBPROTOBUF_FROM_SOURCES=NO - BUILD_opencv_dnn=OFF - - WITH_JASPER=OFF - BUILD_JASPER=OFF +# the oepncv protobuf isn't friendly to a lot of compilers, skip it by default +set(opencv_cmake_args + WITH_PROTOBUF=OFF + BUILD_PROTOBUF=OFF + BUILD_LIBPROTOBUF_FROM_SOURCES=NO + BUILD_opencv_dnn=OFF + + WITH_JASPER=OFF + BUILD_JASPER=OFF ) - hunter_config(OpenCV VERSION ${HUNTER_OpenCV_VERSION} CMAKE_ARGS ${opencv_cmake_args}) -endif() +hunter_config(OpenCV VERSION ${HUNTER_OpenCV_VERSION} CMAKE_ARGS ${opencv_cmake_args}) ### ogles_gpgpu ### set(ogles_gpgpu_cmake_args diff --git a/src/app/acf/GLDetector.cpp b/src/app/acf/GLDetector.cpp index 157c54f..49ab1f6 100644 --- a/src/app/acf/GLDetector.cpp +++ b/src/app/acf/GLDetector.cpp @@ -66,7 +66,7 @@ void GLDetector::init(const cv::Mat& I) computePyramid(I, m_impl->Pcpu); const int shrink = opts.pPyramid->pChns->shrink.get(); const auto sizes = getPyramidSizes(m_impl->Pcpu, shrink); - static const bool doGray = false; + static const bool doGray = false; const ogles_gpgpu::Size2d inputSize(I.cols, I.rows); m_impl->acf = std::make_shared(nullptr, inputSize, sizes, m_impl->featureKind, doGray, shrink); m_impl->acf->setDoLuvTransfer(false); @@ -84,7 +84,7 @@ const acf::Detector::Pyramid& GLDetector::getPyramid(const cv::Mat& input, const (*m_impl->context)(); // Fill in the pyramid: - (*m_impl->acf)({{ input.cols, input.rows }, void_ptr(input.ptr()), true, 0, DFLT_TEXTURE_FORMAT}); + (*m_impl->acf)({ { input.cols, input.rows }, void_ptr(input.ptr()), true, 0, DFLT_TEXTURE_FORMAT }); glFlush(); m_impl->acf->fill(m_impl->Pgpu, m_impl->Pcpu); @@ -129,7 +129,7 @@ std::vector getPyramidSizes(acf::Detector::Pyramid& Pcpu, i void GLDetector::clear() { - m_impl->size = {0,0}; + m_impl->size = { 0, 0 }; } cv::Mat GLDetector::draw(bool doGpu) diff --git a/src/app/acf/GLDetector.h b/src/app/acf/GLDetector.h index 7ee707c..2511609 100644 --- a/src/app/acf/GLDetector.h +++ b/src/app/acf/GLDetector.h @@ -37,7 +37,7 @@ class GLDetector : public acf::Detector cv::Mat draw(bool gpu); // debug routine void clear(); - + protected: void init(const cv::Mat& I); void initContext(); diff --git a/src/app/acf/acf.cpp b/src/app/acf/acf.cpp index cbc80d1..c6ffd88 100644 --- a/src/app/acf/acf.cpp +++ b/src/app/acf/acf.cpp @@ -51,7 +51,7 @@ using ObjectDetectorPtr = std::shared_ptr; using AcfPtr = std::shared_ptr; using RectVec = std::vector; -static void randomShapes(cv::Mat &image, int n); +static void randomShapes(cv::Mat& image, int n); struct VideoSource { @@ -65,16 +65,16 @@ struct VideoSource { filenames = util::cli::expand(filename); } - - VideoSource(int n) : m_n(n) // random frames + + VideoSource(int n) + : m_n(n) // random frames { - } virtual Frame operator()(int i) { Frame frame; - if(filenames.size()) + if (filenames.size()) { frame.name = filenames[i]; frame.image = cv::imread(filenames[i], cv::IMREAD_COLOR); @@ -83,9 +83,9 @@ struct VideoSource { frame.name = std::to_string(i); frame.image = cv::Mat::zeros(640, 480, CV_8UC3); - randomShapes(frame.image, rand()%32); + randomShapes(frame.image, rand() % 32); } - + return frame; } @@ -244,7 +244,7 @@ int gauze_main(int argc, char** argv) } std::shared_ptr video; - if(doRandom) + if (doRandom) { video = std::make_shared(1000); } @@ -308,9 +308,9 @@ int gauze_main(int argc, char** argv) // Get thread specific segmenter lazily: auto& detector = manager[std::this_thread::get_id()]; assert(detector); - + auto winSize = detector->getWindowSize(); - if(!detector->getIsRowMajor()) + if (!detector->getIsRowMajor()) { std::swap(winSize.width, winSize.height); } @@ -367,7 +367,7 @@ int gauze_main(int argc, char** argv) { maxScore = *iter; } - + if (doPyramids) { // The "--pyramid" command line option can be used to visualize the @@ -378,7 +378,7 @@ int gauze_main(int argc, char** argv) // method in order to ensure the CPU pyramid will be computed for each // frame. #if defined(ACF_DO_GPU) - if(acf::GLDetector *handle = dynamic_cast(detector.get())) + if (acf::GLDetector* handle = dynamic_cast(detector.get())) { cv::Mat Pcpu = handle->draw(false); cv::Mat Pgpu = handle->draw(true); @@ -450,17 +450,20 @@ int main(int argc, char** argv) { try { - const std::string home=getenv("HOME"); - std::vector args(argc); + std::string home; +#if !(defined(_WIN32) || defined(_WIN64)) + home = getenv("HOME"); +#endif + std::vector args(argc); args[0] = argv[0]; - + std::vector storage(argc); - for(int i = 0; i < argc; i++) + for (int i = 0; i < argc; i++) { storage[i] = std::regex_replace(std::string(argv[i]), std::regex("HOME"), home); args[i] = const_cast(storage[i].c_str()); } - + return gauze_main(argc, &args.front()); } catch (std::exception& e) @@ -534,60 +537,63 @@ static cv::Rect2f operator*(const cv::Rect2f& roi, float scale) return { roi.x * scale, roi.y * scale, roi.width * scale, roi.height * scale }; } -static void randomEllipse(cv::Mat &image, int n) +static void randomEllipse(cv::Mat& image, int n) { - for(int i = 0; i < n; i++) + for (int i = 0; i < n; i++) { - const cv::Point2f center(rand()%image.cols, rand()%image.rows); - const cv::Size2f size(rand()%image.cols, rand()%image.rows); - const cv::RotatedRect ellipse(center, size, static_cast(rand() % 1000)/1000.f * M_PI); - const cv::Scalar bgr(rand()%255, rand()%255, rand()%255); + const cv::Point2f center(rand() % image.cols, rand() % image.rows); + const cv::Size2f size(rand() % image.cols, rand() % image.rows); + const cv::RotatedRect ellipse(center, size, static_cast(rand() % 1000) / 1000.f * M_PI); + const cv::Scalar bgr(rand() % 255, rand() % 255, rand() % 255); cv::ellipse(image, ellipse, bgr, -1); } } -static void randomRectangle(cv::Mat &image, int n) +static void randomRectangle(cv::Mat& image, int n) { - for(int i = 0; i < n; i++) + for (int i = 0; i < n; i++) { const cv::Point p1(rand() % image.cols, rand() % image.rows); const cv::Point p2(rand() % image.cols, rand() % image.rows); - - if((rand() % 8) > 4) + + if ((rand() % 8) > 4) { cv::randu(image(cv::Rect(p1, p2)), cv::Scalar::all(0), cv::Scalar::all(255)); } else { - const cv::Scalar bgr(rand()%255, rand()%255, rand()%255); + const cv::Scalar bgr(rand() % 255, rand() % 255, rand() % 255); cv::rectangle(image, p1, p2, bgr, -1); } } } -static void randomLines(cv::Mat &image, int n) +static void randomLines(cv::Mat& image, int n) { - for(int i = 0; i < n; i++) + for (int i = 0; i < n; i++) { const cv::Point u1(rand() % image.cols, rand() % image.rows); const cv::Point u2(rand() % image.cols, rand() % image.rows); - const cv::Scalar bgr(rand()%255, rand()%255, rand()%255); - cv::line(image, u1, u2, bgr, (rand() % 16)+1, 8); + const cv::Scalar bgr(rand() % 255, rand() % 255, rand() % 255); + cv::line(image, u1, u2, bgr, (rand() % 16) + 1, 8); } } // Provide a simple mechanism for testing the ACF pyramids (GPU and CPU) // without the need for reading actual images. This was added initially // to aid testing on mobile devices. -static void randomShapes(cv::Mat &image, int n) +static void randomShapes(cv::Mat& image, int n) { - for(int i = 0; i < n; i++) + for (int i = 0; i < n; i++) { - switch(rand()%3) + switch (rand() % 3) { - case 0: randomLines(image, 1); - case 1: randomRectangle(image, 1); - case 2: randomEllipse(image, 1); + case 0: + randomLines(image, 1); + case 1: + randomRectangle(image, 1); + case 2: + randomEllipse(image, 1); } } } diff --git a/src/app/acf/mat2cpb.cpp b/src/app/acf/mat2cpb.cpp index 2b9a82c..44d50f6 100644 --- a/src/app/acf/mat2cpb.cpp +++ b/src/app/acf/mat2cpb.cpp @@ -83,7 +83,7 @@ int gauze_main(int argc, char** argv) acf::Detector acf(sInput); save_cpb(sOutput, acf); - + return 0; } diff --git a/src/app/pipeline/CMakeLists.txt b/src/app/pipeline/CMakeLists.txt index 3a46116..018958a 100644 --- a/src/app/pipeline/CMakeLists.txt +++ b/src/app/pipeline/CMakeLists.txt @@ -9,6 +9,9 @@ set(acf_srcs pipeline.cpp GPUDetectionPipeline.h GPUDetectionPipeline.cpp + + VideoCaptureImage.h + VideoCaptureImage.cpp # Simple line segment shader for the usual green box annotations: lines.h @@ -38,16 +41,6 @@ target_compile_definitions(${test_app} PUBLIC ACF_DO_GPU=1) set_property(TARGET ${test_app} PROPERTY FOLDER "app/console") install(TARGETS ${test_app} DESTINATION bin) -set_target_properties( - ${test_app} - PROPERTIES - MACOSX_BUNDLE_INFO_PLIST "${CMAKE_CURRENT_LIST_DIR}/plist.in" # file sharing - XCODE_ATTRIBUTE_PRODUCT_NAME "${test_app}" - XCODE_ATTRIBUTE_BUNDLE_IDENTIFIER "com.elucideye.acf.${test_app}" - XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER "com.elucideye.acf.${test_app}" - XCODE_ATTRIBUTE_TARGETED_DEVICE_FAMILY "1,2" # iPhone/iPad -) - ############# ### TEST #### ############# @@ -58,7 +51,7 @@ gauze_add_test( NAME ${test_name} COMMAND ${test_app} --input=$ - --repeat=64 + --repeat=600 --model=$ --minimum=128 --calibration=0.01 diff --git a/src/app/pipeline/GPUDetectionPipeline.cpp b/src/app/pipeline/GPUDetectionPipeline.cpp index 5da6c76..c0f6bed 100644 --- a/src/app/pipeline/GPUDetectionPipeline.cpp +++ b/src/app/pipeline/GPUDetectionPipeline.cpp @@ -14,6 +14,14 @@ static void chooseBest(std::vector& objects, std::vector& scores); +#define ACF_DEBUG_PYRAMIDS 0 + +#if ACF_DEBUG_PYRAMIDS +#include +static cv::Mat draw(const acf::Detector::Pyramid& pyramid); +static void logPyramid(const std::string& filename, const acf::Detector::Pyramid& P); +#endif + template void push_fifo(Container& container, const typename Container::value_type& value, int size) { @@ -74,6 +82,10 @@ struct GPUDetectionPipeline::Impl std::vector callbacks; + bool doOptimizedPipeline = true; + bool doCpuACF = false; + bool doAnnotations = true; + uint64_t frameIndex = 0; float ACFScale = 1.f; float acfCalibration = 0.f; @@ -94,8 +106,7 @@ struct GPUDetectionPipeline::Impl double read = 0.0; double detect = 0.0; double complete = 0.0; - } - log; + } log; }; GPUDetectionPipeline::GPUDetectionPipeline(DetectionPtr& detector, const cv::Size& inputSize, std::size_t n, int rotation, int minObjectWidth) @@ -109,7 +120,7 @@ GPUDetectionPipeline::~GPUDetectionPipeline() { try { - if(impl && impl->scene.valid()) + if (impl && impl->scene.valid()) { // If this has already been retrieved it will throw impl->scene.get(); // block on any abandoned calls @@ -120,6 +131,11 @@ GPUDetectionPipeline::~GPUDetectionPipeline() } } +GLuint GPUDetectionPipeline::getInputTexture() +{ + return impl->acf->getInputTexId(); +} + void GPUDetectionPipeline::operator+=(const DetectionCallback& callback) { impl->callbacks.push_back(callback); @@ -230,6 +246,14 @@ int GPUDetectionPipeline::computeDetectionWidth(const cv::Size& inputSizeUp) con void GPUDetectionPipeline::fill(acf::Detector::Pyramid& P) { impl->acf->fill(P, impl->P); + +#if ACF_DEBUG_PYRAMIDS + // One can compare CPU and GPU pyramids using logging like this: + //std::string home = "."; + //cv::Mat channels = impl->acf->getChannels(); + //cv::imwrite(home + "/acf_channels.png", channels); + //logPyramid(home + "/acf_pyramid.png", P); +#endif } void GPUDetectionPipeline::computeAcf(const ogles_gpgpu::FrameInput& frame, bool doLuv, bool doDetection) @@ -247,7 +271,7 @@ void GPUDetectionPipeline::computeAcf(const ogles_gpgpu::FrameInput& frame, bool GLuint GPUDetectionPipeline::paint(const Detections& scene, GLuint inputTexture) { - //if(impl->lines) + if (scene.roi.size()) { std::vector> segments; for (const auto& r : scene.roi) @@ -316,24 +340,20 @@ int GPUDetectionPipeline::detect(const ogles_gpgpu::FrameInput& frame, Detection return 0; } -std::pair GPUDetectionPipeline::operator()(const ogles_gpgpu::FrameInput& frame2, bool doDetection) +std::pair GPUDetectionPipeline::runFast(const ogles_gpgpu::FrameInput& frame2, bool doDetection) { ogles_gpgpu::FrameInput frame1; frame1.size = frame2.size; - util::ScopeTimeLogger logger = [&](double elapsed) { impl->log.complete += elapsed; }; - Detections scene2(impl->frameIndex), scene1, scene0, *outputScene = &scene2; if (impl->fifo->getBufferCount() > 0) { util::ScopeTimeLogger logger = [&](double elapsed) { impl->log.read += elapsed; }; - - // read GPU results for frame n-1 - // Here we always trigger GPU pipeline reads - // to ensure upright + redeuced grayscale images will - // be available for regression, even if we won't be using ACF detection. + // Read GPU results for frame n-1. + // Here we always trigger GPU pipeline reads to ensure upright + redeuced grayscale images + // will be available for regression, even if we won't be using ACF detection. impl->acf->getChannels(); if (impl->acf->getChannelStatus()) @@ -365,14 +385,21 @@ std::pair GPUDetectionPipeline::operator()(const ogles_gpgpu scene0 = impl->scene.get(); // scene n-2 texture0 = (*impl->fifo)[-2]->getOutputTexId(); // texture n-2 - outputTexture = paint(scene0, texture0); outputScene = &scene0; + if (impl->doAnnotations) + { + outputTexture = paint(scene0, texture0); + } + else + { + outputTexture = texture0; + } } // Run CPU detection + regression for frame n-1 impl->scene = impl->threads->process([scene1, frame1, this]() { util::ScopeTimeLogger logger = [&](double elapsed) { impl->log.detect += elapsed; }; - + Detections sceneOut = scene1; detect(frame1, sceneOut, scene1.P != nullptr); return sceneOut; @@ -390,25 +417,135 @@ std::pair GPUDetectionPipeline::operator()(const ogles_gpgpu // Add the current frame to FIFO impl->fifo->useTexture(texture2, 1); impl->fifo->render(); + push_fifo(impl->scenePrimitives, *outputScene, impl->history); + + return std::make_pair(outputTexture, *outputScene); +} + +auto GPUDetectionPipeline::runSimple(const ogles_gpgpu::FrameInput& frame1, bool doDetection) -> DetectionTex +{ + // Run GPU based processing on current thread and package results as a task for CPU + // processing so that it will be available on the next frame. This method will compute + // ACF output using shaders on the GPU, and may optionally extract other GPU related + // features. + Detections scene1(impl->frameIndex), *outputScene = nullptr; // time: n+1 and n + preprocess(frame1, scene1, doDetection); + + // Initialize input texture with ACF upright texture: + GLuint texture1 = impl->acf->first()->getOutputTexId(), outputTexture = 0; + + detect(frame1, scene1, doDetection); + + outputScene = &scene1; + if (impl->doAnnotations) + { + outputTexture = paint(scene1, texture1); + } + else + { + outputTexture = texture1; + } - // Clear face motion estimate, update window + // Add the current frame to FIFO + impl->fifo->useTexture(texture1, 1); + impl->fifo->render(); push_fifo(impl->scenePrimitives, *outputScene, impl->history); + return std::make_pair(outputTexture, *outputScene); +} + +auto GPUDetectionPipeline::run(const FrameInput& frame2, bool doDetection) -> DetectionTex +{ + if (impl->doOptimizedPipeline) + { + return runFast(frame2, doDetection); + } + else + { + return runSimple(frame2, doDetection); + } +} + +auto GPUDetectionPipeline::operator()(const FrameInput& frame2, bool doDetection) -> DetectionTex +{ + util::ScopeTimeLogger logger = [&](double elapsed) { impl->log.complete += elapsed; }; + + std::pair result = run(frame2, doDetection); + for (auto& c : impl->callbacks) { - c(outputTexture, *outputScene); + c(result.first, result.second); } - return std::make_pair(outputTexture, *outputScene); + return result; } -std::map GPUDetectionPipeline::summary() +void GPUDetectionPipeline::preprocess(const FrameInput& frame, Detections& scene, bool doDetection) { - return + if (impl->doCpuACF) { - {"read", impl->log.read}, - {"detect", impl->log.detect}, - {"complete", impl->log.complete} + scene.P = createAcfCpu(frame, doDetection); + } + else + { + scene.P = createAcfGpu(frame, doDetection); + } +} + +std::shared_ptr GPUDetectionPipeline::createAcfGpu(const FrameInput& frame, bool doDetection) +{ + computeAcf(frame, false, doDetection); + + std::shared_ptrP)> P; + + // Here we always trigger channel processing + // to ensure grayscale images will be available + // for regression, even if we won't be using ACF detection. + cv::Mat acf = impl->acf->getChannels(); + + if (doDetection) + { + assert(acf.type() == CV_8UC1); + assert(acf.channels() == 1); + + if (impl->acf->getChannelStatus()) + { + P = std::make_sharedP)>(); + fill(*P); + } + } + + return P; +} + +std::shared_ptr GPUDetectionPipeline::createAcfCpu(const FrameInput& frame, bool doDetection) +{ + computeAcf(frame, true, doDetection); + + std::shared_ptrP)> P; + if (doDetection) + { + cv::Mat acf = impl->acf->getChannels(); + assert(acf.type() == CV_8UC1); + assert(acf.channels() == 1); + + P = std::make_sharedP)>(); + + MatP LUVp = impl->acf->getLuvPlanar(); + impl->detector->setIsLuv(true); + impl->detector->setIsTranspose(true); + impl->detector->computePyramid(LUVp, *P); + } + + return P; +} + +std::map GPUDetectionPipeline::summary() +{ + return { + { "read", impl->log.read }, + { "detect", impl->log.detect }, + { "complete", impl->log.complete } }; } @@ -430,3 +567,41 @@ static void chooseBest(std::vector& objects, std::vector& scor scores = { scores[best] }; } } + +#if ACF_DEBUG_PYRAMIDS + +static cv::Mat draw(const acf::Detector::Pyramid& pyramid) +{ + cv::Mat canvas; + std::vector levels; + for (int i = 0; i < pyramid.nScales; i++) + { + // Concatenate the transposed faces, so they are compatible with the GPU layout + cv::Mat Ccpu; + std::vector images; + for (const auto& image : pyramid.data[i][0].get()) + { + images.push_back(image.t()); + } + cv::vconcat(images, Ccpu); + + // Instead of upright: + //cv::vconcat(pyramid.data[i][0].get(), Ccpu); + + if (levels.size()) + { + cv::copyMakeBorder(Ccpu, Ccpu, 0, levels.front().rows - Ccpu.rows, 0, 0, cv::BORDER_CONSTANT); + } + + levels.push_back(Ccpu); + } + cv::hconcat(levels, canvas); + return canvas; +} + +static void logPyramid(const std::string& filename, const acf::Detector::Pyramid& P) +{ + cv::Mat canvas = draw(P); + cv::imwrite(filename, canvas); +} +#endif // ACF_DEBUG_PYRAMIDS diff --git a/src/app/pipeline/GPUDetectionPipeline.h b/src/app/pipeline/GPUDetectionPipeline.h index a6a3f5b..15e8b63 100644 --- a/src/app/pipeline/GPUDetectionPipeline.h +++ b/src/app/pipeline/GPUDetectionPipeline.h @@ -37,25 +37,37 @@ class GPUDetectionPipeline using HighResolutionClock = std::chrono::high_resolution_clock; using TimePoint = HighResolutionClock::time_point; // ; using DetectionPtr = std::shared_ptr; + using DetectionTex = std::pair; using DetectionCallback = std::function; + using FrameInput = ogles_gpgpu::FrameInput; GPUDetectionPipeline(DetectionPtr& detector, const cv::Size& inputSize, std::size_t n, int rotation, int minObjectWidth); virtual ~GPUDetectionPipeline(); - // This method receives an input frame descriptor (pixel buffer or texture ID) on which to run - // ACF object detection. The doDetection parameter is provided in order to allow the user to - // control the duty cycle of the detector (perhaps adaptively). The detection pipeline introduces + GLuint getInputTexture(); + + // This method receives an input frame descriptor (pixel buffer or texture ID) on which to run + // ACF object detection. The doDetection parameter is provided in order to allow the user to + // control the duty cycle of the detector (perhaps adaptively). The detection pipeline introduces // two frames of latency so that the GPU->CPU overhead can be hidden. For input frame N, the results // are returned for frame N-2 (along with the corresponding texture ID). - std::pair operator()(const ogles_gpgpu::FrameInput& frame, bool doDetection=true); - + DetectionTex operator()(const ogles_gpgpu::FrameInput& frame, bool doDetection = true); + void operator+=(const DetectionCallback& callback); std::map summary(); - + void setDoGlobalNMS(bool flag); protected: + DetectionTex run(const FrameInput& frame2, bool doDetection); + DetectionTex runSimple(const ogles_gpgpu::FrameInput& frame, bool doDetection = true); + DetectionTex runFast(const ogles_gpgpu::FrameInput& frame, bool doDetection = true); + + void preprocess(const ogles_gpgpu::FrameInput& frame, Detections& scene, bool doDetection); + + std::shared_ptr createAcfGpu(const FrameInput& frame, bool doDetection); + std::shared_ptr createAcfCpu(const FrameInput& frame, bool doDetection); // Allow user defined object detection drawing via inheritance. virtual GLuint paint(const Detections& scene, GLuint inputTexture); diff --git a/src/app/pipeline/VideoCaptureImage.cpp b/src/app/pipeline/VideoCaptureImage.cpp new file mode 100644 index 0000000..e89199c --- /dev/null +++ b/src/app/pipeline/VideoCaptureImage.cpp @@ -0,0 +1,66 @@ +#include "VideoCaptureImage.h" + +VideoCaptureImage::VideoCaptureImage(const cv::Mat& image, int frames) + : image(image) + , frames(frames) +{ +} + +VideoCaptureImage::VideoCaptureImage(const std::string& filename, int frames) + : frames(frames) +{ + image = cv::imread(filename, cv::IMREAD_COLOR); +} + +VideoCaptureImage::~VideoCaptureImage() = default; + +void VideoCaptureImage::setRepeat(int n) +{ + frames = n; +} + +bool VideoCaptureImage::grab() +{ + return false; +} + +bool VideoCaptureImage::isOpened() const +{ + return !image.empty(); +} + +void VideoCaptureImage::release() +{ + image.release(); +} + +bool VideoCaptureImage::open(const cv::String& filename) +{ + image = cv::imread(filename); + return !image.empty(); +} + +bool VideoCaptureImage::read(cv::OutputArray image) +{ + if (++index <= frames) + { + image.assign(this->image); + return true; + } + return false; +} + +double VideoCaptureImage::get(int propId) const +{ + switch (propId) + { + case CV_CAP_PROP_FRAME_WIDTH: + return static_cast(image.cols); + case CV_CAP_PROP_FRAME_HEIGHT: + return static_cast(image.rows); + case CV_CAP_PROP_FRAME_COUNT: + return static_cast(frames); + default: + return 0.0; + } +} diff --git a/src/app/pipeline/VideoCaptureImage.h b/src/app/pipeline/VideoCaptureImage.h new file mode 100644 index 0000000..7bbb4d7 --- /dev/null +++ b/src/app/pipeline/VideoCaptureImage.h @@ -0,0 +1,36 @@ +/*! -*-c++-*- + @file VideoCaptureImage.h + @author David Hirvonen + @brief Present cv::Mat as an cv::VideoCaptureImag + + \copyright Copyright 2018 Elucideye, Inc. All rights reserved. + \license{This project is released under the 3 Clause BSD License.} + +*/ + +#ifndef __acf_VideoCaptureImage_h__ +#define __acf_VideoCaptureImage_h__ + +#include + +class VideoCaptureImage : public cv::VideoCapture +{ +public: + VideoCaptureImage(const cv::Mat& image, int frames = 100); + VideoCaptureImage(const std::string& filename, int frames = 100); + virtual ~VideoCaptureImage(); + + void setRepeat(int n); + virtual bool grab(); + virtual bool isOpened() const; + virtual void release(); + virtual bool open(const cv::String& filename); + virtual bool read(cv::OutputArray image); + double get(int propId) const; + + cv::Mat image; + int frames = 0; + int index = -1; +}; + +#endif // __acf_VideoCaptureImage_h__ diff --git a/src/app/pipeline/pipeline.cpp b/src/app/pipeline/pipeline.cpp index 2a09c33..98337e5 100644 --- a/src/app/pipeline/pipeline.cpp +++ b/src/app/pipeline/pipeline.cpp @@ -39,10 +39,12 @@ acf-pipeline \ --input=0 \ - --model=${SOME_PATH_VAR}/drishti-assets/drishti_face_gray_80x80.cpb + --model=${SOME_PATH_VAR}/drishti-assets/drishti_face_gray_80x80.cpb \ --minimum=200 \ --calibration=0.01 \ - --window + --global \ + --window \ + --size=1920x1080 In the above command, "minimum=200" means we are ignoring all faces less than 200 pixels wide. You should set this to the largest value @@ -58,13 +60,14 @@ */ #if defined(ACF_ADD_TO_STRING) -# include // first +#include // first #endif #include #include #include "GPUDetectionPipeline.h" +#include "VideoCaptureImage.h" #include #include @@ -75,6 +78,7 @@ #include #include +#include // clang-format off #ifdef ANDROID @@ -93,81 +97,6 @@ void* void_ptr(const T* ptr) static std::shared_ptr create(const std::string& filename); static cv::Size getSize(cv::VideoCapture& video); -class VideoCaptureImage : public cv::VideoCapture -{ -public: - VideoCaptureImage(const cv::Mat &image, int frames=100) - : image(image) - , frames(frames) - { - } - - VideoCaptureImage(const std::string &filename, int frames=100) - : frames(frames) - { - image = cv::imread(filename, cv::IMREAD_COLOR); - } - - virtual ~VideoCaptureImage() - { - - } - - void setRepeat(int n) - { - frames = n; - } - - virtual bool grab () - { - return false; - } - - virtual bool isOpened () const - { - return !image.empty(); - } - - virtual void release() - { - image.release(); - } - - virtual bool open (const cv::String &filename) - { - image = cv::imread(filename); - return !image.empty(); - } - virtual bool read (cv::OutputArray image) - { - if(++index <= frames) - { - image.assign(this->image); - return true; - } - return false; - } - - double get(int propId) const - { - switch (propId) - { - case CV_CAP_PROP_FRAME_WIDTH: - return static_cast(image.cols); - case CV_CAP_PROP_FRAME_HEIGHT: - return static_cast(image.rows); - case CV_CAP_PROP_FRAME_COUNT: - return static_cast(frames); - default: - return 0.0; - } - } - - cv::Mat image; - int frames = 0; - int index = -1; -}; - struct Application { // clang-format off @@ -178,7 +107,8 @@ struct Application float acfCalibration, int minWidth, bool window, - float resolution + float resolution, + const cv::Size &sizeIn = {} ) : resolution(resolution) // clang-format on { @@ -189,6 +119,20 @@ struct Application // http://answers.opencv.org/answers/761/revisions/ video = create(input); + // ::::::::::::::::::: CAVEAT :::::::::::::::::::::::::::::: + // Using a MAX resolution approach will not work in all cases. + // It may lead to strange behavior: all gray, all black + very slow. + // You may have to specify the desired resolution explicitly as shown below + //video->set(cv::CAP_PROP_FRAME_WIDTH, 16000.0); + //video->set(cv::CAP_PROP_FRAME_HEIGHT, 16000.0); + + if (sizeIn.area()) + { + // If the resolution is known in advance you can set it explicitly like this: + video->set(cv::CAP_PROP_FRAME_WIDTH, static_cast(sizeIn.width)); + video->set(cv::CAP_PROP_FRAME_HEIGHT, static_cast(sizeIn.height)); + } + // Create an OpenGL context: const auto size = getSize(*video); context = aglet::GLContext::create(aglet::GLContext::kAuto, window ? "acf" : "", size.width, size.height); @@ -221,10 +165,10 @@ struct Application { this->logger = logger; } - + void setRepeat(int n) { - if(VideoCaptureImage *cap = dynamic_cast(video.get())) + if (VideoCaptureImage* cap = dynamic_cast(video.get())) { cap->setRepeat(n); } @@ -235,16 +179,10 @@ struct Application pipeline->setDoGlobalNMS(flag); } - bool update() + virtual cv::Mat grab() { cv::Mat frame; - (*video) >> frame; - - if (frame.empty()) - { - return false; // indicate failure, exit loop - } - + (*video) >> frame; std::cout << "MU: " << cv::mean(frame) << std::endl; if (frame.channels() == 3) { // ogles_gpgpu supports both {BGR,RGB}A and NV{21,12} inputs, and @@ -257,8 +195,26 @@ struct Application cv::cvtColor(frame, frame, cv::COLOR_BGR2BGRA); // assume all others are GL_BGRA #endif } + return frame; + }; + + virtual cv::Mat getFrameInput(ogles_gpgpu::FrameInput& input) + { + cv::Mat frame = grab(); + input = { { frame.cols, frame.rows }, void_ptr(frame.data), true, false, TEXTURE_FORMAT }; + return frame; + } + + virtual bool update() + { + ogles_gpgpu::FrameInput frame; + cv::Mat storage = getFrameInput(frame); + if (storage.empty()) + { + return false; + } - auto result = (*pipeline)({ { frame.cols, frame.rows }, void_ptr(frame.data), true, false, TEXTURE_FORMAT }, true); + auto result = (*pipeline)(frame, true); if (logger) { @@ -269,13 +225,13 @@ struct Application { show(result.first); } - + counter++; return true; // continue sequence } - void show(GLuint texture) + virtual void show(GLuint texture) { auto& geometry = context->getGeometry(); display->setOffset(geometry.tx, geometry.ty); @@ -287,40 +243,87 @@ struct Application float resolution = 1.f; std::shared_ptr logger; - std::shared_ptr context; std::shared_ptr display; - std::shared_ptr video; std::shared_ptr detector; std::shared_ptr pipeline; - + std::size_t counter = 0; }; +struct ApplicationBenchmark : public Application +{ + // clang-format off + ApplicationBenchmark + ( + const std::string &input, + const std::string &model, + float acfCalibration, + int minWidth, + bool window, + float resolution, + const cv::Size &size = {} + ) + : Application(input,model,acfCalibration,minWidth,window,resolution,size) + // clang-format on + { + } + + virtual cv::Mat getFrameInput(ogles_gpgpu::FrameInput& input) + { + if (counter > 256) + { + return cv::Mat(); + } + + static cv::Mat frame = grab(); // for the benchmark we can repeat the first frame + input = { { frame.cols, frame.rows }, void_ptr(frame.data), true, false, TEXTURE_FORMAT }; + if (counter++ > 0) + { + input.inputTexture = pipeline->getInputTexture(); + input.pixelBuffer = nullptr; + } + + return frame; + } +}; + +static std::vector split(const string& input, const string& regex) +{ + // passing -1 as the submatch index parameter performs splitting + std::regex re(regex); + std::sregex_token_iterator first{ input.begin(), input.end(), re, -1 }, last; + return { first, last }; +} + int gauze_main(int argc, char** argv) { auto logger = util::Logger::create("acf-pipeline"); - for(int i = 0; i < argc; i++) + for (int i = 0; i < argc; i++) { logger->info("arg[{}] = {}", i, argv[i]); } - bool help = false, doWindow = false, doGlobal = false; + bool help = false, doWindow = false, doGlobal = false, doBenchmark = false; float resolution = 1.f, acfCalibration = 0.f; std::string sInput, sOutput, sModel; int minWidth = 0, repeat = 1; + std::string sDimensions; + const int argumentCount = argc; cxxopts::Options options("acf-pipeline", "GPU accelerated ACF object detection (see Piotr's toolbox)"); // clang-format off options.add_options() ("i,input", "Input file", cxxopts::value(sInput)) + ("size", "Input video dimensions: wxh", cxxopts::value(sDimensions)) ("o,output", "Output directory", cxxopts::value(sOutput)) ("m,model", "Model file", cxxopts::value(sModel)) ("c,calibration", "ACF calibration", cxxopts::value(acfCalibration)) + ("b,benchmark", "Run benchmark by repeating first input texture", cxxopts::value(doBenchmark)) ("r,resolution", "Resolution", cxxopts::value(resolution)) ("g,global", "Globl nms", cxxopts::value(doGlobal)) ("w,window", "Window", cxxopts::value(doWindow)) @@ -337,6 +340,18 @@ int gauze_main(int argc, char** argv) return 0; } + cv::Size size; // video dimensions + if (!sDimensions.empty()) + { + std::vector dimensions = split(sDimensions, "x"); + if (!dimensions.size()) + { + logger->error("Must specify input dimensions in format: x, received {}", sDimensions); + return 1; + } + size = { std::stoi(dimensions[0]), std::stoi(dimensions[1]) }; + } + if (sModel.empty()) { logger->error("Must specify a valid model"); @@ -349,35 +364,43 @@ int gauze_main(int argc, char** argv) return 1; } - Application app(sInput, sModel, acfCalibration, minWidth, doWindow, resolution); - app.setLogger(logger); - app.setRepeat(repeat); - app.setDoGlobalNMS(doGlobal); + std::shared_ptr app; + if (doBenchmark) + { + app = std::make_shared(sInput, sModel, acfCalibration, minWidth, doWindow, resolution, size); + } + else + { + app = std::make_shared(sInput, sModel, acfCalibration, minWidth, doWindow, resolution, size); + } + + app->setLogger(logger); + app->setRepeat(repeat); + app->setDoGlobalNMS(doGlobal); std::size_t count = 0; - aglet::GLContext::RenderDelegate delegate = [&]() -> bool - { - bool status = app.update(); - if(status) + aglet::GLContext::RenderDelegate delegate = [&]() -> bool { + bool status = app->update(); + if (status) { count++; } return status; }; - double seconds = 0.0; + double seconds = 0.0; { // Process all frames (main loop) and record the total time: util::ScopeTimeLogger timer = [&](double total) { seconds = total; }; - (*app.context)(delegate); + (*app->context)(delegate); } - const double fps = (seconds > 0.0) ? static_cast(count)/seconds : 0.0; + const double fps = (seconds > 0.0) ? static_cast(count) / seconds : 0.0; logger->info("ACF FULL: FPS={}", fps); - if(count > 0) + if (count > 0) { - auto summary = app.pipeline->summary(); - for(auto &entry : summary) + auto summary = app->pipeline->summary(); + for (auto& entry : summary) { entry.second /= static_cast(count); logger->info("\tACF STAGE {} = {}", entry.first, entry.second); @@ -397,7 +420,7 @@ static std::shared_ptr create(const std::string& filename) } else { - if(filename.find(".png") != std::string::npos) + if (filename.find(".png") != std::string::npos) { return std::make_shared(filename); } diff --git a/src/lib/acf/ACF.cpp b/src/lib/acf/ACF.cpp index 3bb2654..88dbcf3 100644 --- a/src/lib/acf/ACF.cpp +++ b/src/lib/acf/ACF.cpp @@ -14,8 +14,8 @@ #include #include - #include +#include // for iota ACF_NAMESPACE_BEGIN @@ -256,6 +256,14 @@ int Detector::operator()(const MatP& IpTranspose, std::vector& objects return (*this)(P, objects, scores); } +static std::vector create_random_indices(int n) +{ + std::vector indices(n); + std::iota(indices.begin(), indices.end(), 0); + std::random_shuffle(indices.begin(), indices.end()); + return indices; +} + // Multiscale search: int Detector::operator()(const Pyramid& P, std::vector& objects, std::vector* scores) { @@ -266,37 +274,59 @@ int Detector::operator()(const Pyramid& P, std::vector& objects, std:: auto modelDs = *(opts.modelDs); auto shift = (modelDsPad - modelDs) / 2 - pad; - std::vector bbs; - for (int i = 0; i < P.nScales; i++) - { - DetectionVec ds; + // Here we create random indices so that (on average) for each `const cv::Range &r` slice + // in the cv::parallel_for_(const cv::Range &r, ...) call, the total ACF Pyramid area + // for all levels (specified by Range::{start,end}) will be equal for every thread. + auto scales = create_random_indices(P.nScales); + std::vector bbs_(P.nScales); - // ROI fields indicates row major storage, else column major: - if (P.rois.size() > i) - { - acfDetect1(P.data[i][0], P.rois[i], shrink, modelDsPad, *(opts.stride), *(opts.cascThr), ds); - } - else + std::function worker = [&](const cv::Range& r) { + for (int j = r.start; j < r.end; j++) { - acfDetect1(P.data[i][0], {}, shrink, modelDsPad, *(opts.stride), *(opts.cascThr), ds); - } + int i = scales[j]; - // Scale up the detections - for (auto& bb : ds) - { - //std::cout << bb.weight << std::endl; - cv::Size size(cv::Size2d(modelDs) / P.scales[i]); - bb.roi.x = double(bb.roi.x + shift.width) / P.scaleshw[i].width; - bb.roi.y = double(bb.roi.y + shift.height) / P.scaleshw[i].height; - bb.roi.width = size.width; - bb.roi.height = size.height; + DetectionVec ds; - std::swap(bb.roi.x, bb.roi.y); // TODO: review + // ROI fields indicates row major storage, else column major: + if (P.rois.size() > i) + { + acfDetect1(P.data[i][0], P.rois[i], shrink, modelDsPad, *(opts.stride), *(opts.cascThr), ds); + } + else + { + acfDetect1(P.data[i][0], {}, shrink, modelDsPad, *(opts.stride), *(opts.cascThr), ds); + } - std::swap(bb.roi.width, bb.roi.height); // TRANSPOSE + // Scale up the detections + for (auto& bb : ds) + { + cv::Size size(cv::Size2d(modelDs) / P.scales[i]); + bb.roi.x = double(bb.roi.x + shift.width) / P.scaleshw[i].width; + bb.roi.y = double(bb.roi.y + shift.height) / P.scaleshw[i].height; + bb.roi.width = size.width; + bb.roi.height = size.height; + + std::swap(bb.roi.x, bb.roi.y); + std::swap(bb.roi.width, bb.roi.height); + } + std::copy(ds.begin(), ds.end(), std::back_inserter(bbs_[i])); } - std::copy(ds.begin(), ds.end(), std::back_inserter(bbs)); + }; + + if (m_doParallel) + { + cv::parallel_for_({ 0, P.nScales }, worker); + } + else + { + worker({ 0, P.nScales }); + } + + for (int i = 1; i < bbs_.size(); i++) + { + std::copy(bbs_[i].begin(), bbs_[i].end(), std::back_inserter(bbs_[0])); } + auto& bbs = bbs_[0]; if (m_doNms) { diff --git a/src/lib/acf/ACF.h b/src/lib/acf/ACF.h index 99e6e75..5e19c58 100644 --- a/src/lib/acf/ACF.h +++ b/src/lib/acf/ACF.h @@ -449,7 +449,7 @@ class ACF_EXPORT Detector : public acf::ObjectDetector Size2dVec& scaleshw ); // clang-format on - + static int convTri(const MatP& I, MatP& J, double r = 1.0, int s = 1); // clang-format off @@ -466,7 +466,7 @@ class ACF_EXPORT Detector : public acf::ObjectDetector ); // clang-format on - // clang-format off + // clang-format off static int gradientHist ( const cv::Mat& M, @@ -481,6 +481,16 @@ class ACF_EXPORT Detector : public acf::ObjectDetector ); // clang-format on + virtual void setDoParallel(bool flag) + { + m_doParallel = flag; + } + + virtual bool getDoParallel() const + { + return m_doParallel; + } + virtual void setDetectionScorePruneRatio(double ratio) { m_detectionScorePruneRatio = ratio; @@ -515,7 +525,7 @@ class ACF_EXPORT Detector : public acf::ObjectDetector DetectionVec& objects ); // clang-format on - + int bbNms(const DetectionVec& bbsIn, const Options::Nms& pNms, DetectionVec& bbs); int acfModify(const Detector::Modify& params); @@ -593,6 +603,7 @@ class ACF_EXPORT Detector : public acf::ObjectDetector std::shared_ptr m_streamLogger; double m_detectScorePruneRatio = 0.0; + bool m_doParallel = true; bool m_isLuv = false; bool m_isTranspose = false; diff --git a/src/lib/acf/ACFIO.cpp b/src/lib/acf/ACFIO.cpp index c013514..0d16784 100644 --- a/src/lib/acf/ACFIO.cpp +++ b/src/lib/acf/ACFIO.cpp @@ -165,15 +165,15 @@ int Detector::deserialize(ParserNodeDetector& detector_) auto&& pJitter_ = opts_.create("pJitter", opts_->pJitter); pJitter_.parse, decltype((*pJitter_)->flip)>("flip", (*pJitter_)->flip); } - catch(...) + catch (...) { opts_->pJitter->flip.set("jitter", false, true, 0); } opts_.parse, decltype(opts_->winsSave)>("winsSave", opts_->winsSave); } - - clf.thrsU8 = clf.thrs * 255.0; // add uint8_t compatible thresholds + + clf.thrs.convertTo(clf.thrsU8, CV_8UC1, 255.0f); // add uint8_t compatible thresholds return 0; } diff --git a/src/lib/acf/ACFIOArchive.h b/src/lib/acf/ACFIOArchive.h index 96cbfce..9666e62 100644 --- a/src/lib/acf/ACFIOArchive.h +++ b/src/lib/acf/ACFIOArchive.h @@ -95,7 +95,7 @@ void Detector::Classifier::serialize(Archive& ar, const std::uint32_t version) if (Archive::is_loading::value) { - thrsU8 = thrs * 255.0; // precompute uint8_t thresholds + thrs.convertTo(thrsU8, CV_8UC1, 255.0f); // precompute uint8_t thresholds } } diff --git a/src/lib/acf/GPUACF.cpp b/src/lib/acf/GPUACF.cpp index c41229b..ea924e5 100644 --- a/src/lib/acf/GPUACF.cpp +++ b/src/lib/acf/GPUACF.cpp @@ -82,7 +82,7 @@ struct ACF::Impl rgb2luvProc->add(luvTransposeOut.get()); } } - + void initACF(const SizeVec& scales, FeatureKind kind) { // Rotation + rescale and ACF pipeline: @@ -99,7 +99,7 @@ struct ACF::Impl smoothNormGradProc = util::make_unique(1.0); smoothGradHistProcA = util::make_unique(1.0); smoothGradHistProcB = util::make_unique(1.0); - + // Reduction: reduceRgbProc = util::make_unique(); reduceLuvProc = util::make_unique(); @@ -147,7 +147,7 @@ struct ACF::Impl // ((( histA -> smooth(histA) ))) gradHistProcA->add(smoothGradHistProcA.get()); smoothGradHistProcA->add(reduceGradHistProcA.get()); - + // ((( histB -> smooth(histB) ))) gradHistProcB->add(smoothGradHistProcB.get()); smoothGradHistProcB->add(reduceGradHistProcB.get()); @@ -180,7 +180,7 @@ struct ACF::Impl CV_Assert(false); } } - + // This provides a map for unpacking/swizzling OpenGL textures (i.e., RGBA or BGRA) to user // memory using NEON optimized instructions. ChannelSpecification getACFChannelSpecification(MatP& acf) const @@ -248,7 +248,7 @@ struct ACF::Impl return ChannelSpecification(); // clang-format on } - + bool needsTextures() const { bool status = false; @@ -277,16 +277,16 @@ struct ACF::Impl float m_grayscaleScale = 1.0f; bool m_hasGrayscaleOutput = false; cv::Mat m_grayscale; - + int m_shrink = 4; std::unique_ptr rotationProc; // make sure we have an unmodified upright image std::unique_ptr rgb2luvProc; std::unique_ptr pyramidProc; - std::unique_ptr gradProc; // (1.0); - std::unique_ptr normProc; // (5, true, 0.005); - std::unique_ptr gradHistProcA; // (6, 0, 1.f); - std::unique_ptr gradHistProcB; // (6, 4, 1.f); + std::unique_ptr gradProc; // (1.0); + std::unique_ptr normProc; // (5, true, 0.005); + std::unique_ptr gradHistProcA; // (6, 0, 1.f); + std::unique_ptr gradHistProcB; // (6, 4, 1.f); // Reduction: std::unique_ptr reduceRgbProc; // initial reduction @@ -294,8 +294,8 @@ struct ACF::Impl std::unique_ptr reduceNormGradProc; std::unique_ptr reduceGradHistProcA; // (1); std::unique_ptr reduceGradHistProcB; // (1); - std::unique_ptr reduceForGrayProc; // (optional) reduce for grayscale output - + std::unique_ptr reduceForGrayProc; // (optional) reduce for grayscale output + // Strategic smoothing (hand tuned to match ACF output) std::unique_ptr smoothProc; std::unique_ptr smoothNormGradProc; @@ -303,7 +303,7 @@ struct ACF::Impl std::unique_ptr smoothGradHistProcB; // #### OUTPUT ### - std::unique_ptr luvTransposeOut; // transposed LUV output + std::unique_ptr luvTransposeOut; // transposed LUV output // Multi-texture swizzle (one or the other for 7 vs 10 channels) std::unique_ptr mergeProcLUVG; diff --git a/src/lib/acf/draw.cpp b/src/lib/acf/draw.cpp index de903a2..5ed912d 100644 --- a/src/lib/acf/draw.cpp +++ b/src/lib/acf/draw.cpp @@ -17,31 +17,31 @@ ACF_NAMESPACE_BEGIN // This function demonstrates how to visualize a pyramid structure: cv::Mat draw(acf::Detector::Pyramid& pyramid) { - cv::Mat canvas; - std::vector levels; - for (int i = 0; i < pyramid.nScales; i++) - { - // Concatenate the transposed faces, so they are compatible with the GPU layout - cv::Mat Ccpu; - std::vector images; - for (const auto& image : pyramid.data[i][0].get()) - { - images.push_back(image.t()); - } - cv::vconcat(images, Ccpu); - - // Instead of upright: - //cv::vconcat(pyramid.data[i][0].get(), Ccpu); - - if (levels.size()) - { - cv::copyMakeBorder(Ccpu, Ccpu, 0, levels.front().rows - Ccpu.rows, 0, 0, cv::BORDER_CONSTANT); - } - - levels.push_back(Ccpu); - } - cv::hconcat(levels, canvas); - return canvas; + cv::Mat canvas; + std::vector levels; + for (int i = 0; i < pyramid.nScales; i++) + { + // Concatenate the transposed faces, so they are compatible with the GPU layout + cv::Mat Ccpu; + std::vector images; + for (const auto& image : pyramid.data[i][0].get()) + { + images.push_back(image.t()); + } + cv::vconcat(images, Ccpu); + + // Instead of upright: + //cv::vconcat(pyramid.data[i][0].get(), Ccpu); + + if (levels.size()) + { + cv::copyMakeBorder(Ccpu, Ccpu, 0, levels.front().rows - Ccpu.rows, 0, 0, cv::BORDER_CONSTANT); + } + + levels.push_back(Ccpu); + } + cv::hconcat(levels, canvas); + return canvas; } ACF_NAMESPACE_END diff --git a/src/lib/acf/gpu/multipass/triangle_pass.cpp b/src/lib/acf/gpu/multipass/triangle_pass.cpp index fa77ef1..596da47 100644 --- a/src/lib/acf/gpu/multipass/triangle_pass.cpp +++ b/src/lib/acf/gpu/multipass/triangle_pass.cpp @@ -82,7 +82,7 @@ std::string fragmentShaderForTriangle(int blurRadius, bool doNorm = false, int p ss << "void main()\n"; ss << "{\n"; ss << " vec4 sum = vec4(0.0);\n"; - ss << " vec4 center = texture2D(inputImageTexture, blurCoordinates[" << numberOfOffsets/2 << "]);\n"; + ss << " vec4 center = texture2D(inputImageTexture, blurCoordinates[" << numberOfOffsets / 2 << "]);\n"; for (int currentBlurCoordinateIndex = 0; currentBlurCoordinateIndex < numberOfOffsets; currentBlurCoordinateIndex++) { diff --git a/src/lib/acf/toolbox/acfDetect1.cpp b/src/lib/acf/toolbox/acfDetect1.cpp index 7be6d66..31a2802 100644 --- a/src/lib/acf/toolbox/acfDetect1.cpp +++ b/src/lib/acf/toolbox/acfDetect1.cpp @@ -163,12 +163,12 @@ const cv::Mat& Detector::Classifier::getScaledThresholds(int type) const { switch (type) { - case CV_32FC1: - CV_Assert(!thrs.empty()); - return thrs; case CV_8UC1: - CV_Assert(!thrsU8.empty()); + CV_Assert(!thrsU8.empty() && (thrsU8.type() == CV_8UC1)); return thrsU8; + case CV_32FC1: + CV_Assert(!thrs.empty() && (thrs.type() == CV_32FC1)); + return thrs; default: CV_Assert(type == CV_32FC1 || type == CV_8UC1); } @@ -176,13 +176,15 @@ const cv::Mat& Detector::Classifier::getScaledThresholds(int type) const } template -std::shared_ptr allocDetector(const MatP& I, const cv::Mat &thrs, DetectionSink* sink) +std::shared_ptr allocDetector(const MatP& I, const cv::Mat& thrs, DetectionSink* sink) { switch (I.depth()) { case CV_8UC1: + CV_Assert(thrs.type() == CV_8UC1); return std::make_shared>(I[0].ptr(), thrs.ptr(), sink); case CV_32FC1: + CV_Assert(thrs.type() == CV_32FC1); return std::make_shared>(I[0].ptr(), thrs.ptr(), sink); default: CV_Assert(I.depth() == CV_8UC1 || I.depth() == CV_32FC1); @@ -190,7 +192,7 @@ std::shared_ptr allocDetector(const MatP& I, const cv::Mat &thr return nullptr; // unused: for static analyzer } -std::shared_ptr allocDetector(const MatP& I, const cv::Mat &thrs, DetectionSink* sink, int depth) +std::shared_ptr allocDetector(const MatP& I, const cv::Mat& thrs, DetectionSink* sink, int depth) { // Enforce compile time constants in inner tree search: switch (depth) @@ -219,6 +221,7 @@ std::shared_ptr allocDetector(const MatP& I, const cv::Mat &thr return nullptr; } +// clang-format off auto Detector::createDetector ( const MatP& I, @@ -228,6 +231,7 @@ auto Detector::createDetector int stride, DetectionSink* sink ) +// clang-format on const -> DetectionParamPtr { int modelHt = modelDsPad.height; @@ -295,6 +299,7 @@ auto Detector::createDetector // // 3/21/2015: Rework arithmetic for row-major storage order +// clang-format off void Detector::acfDetect1 ( const MatP& I, @@ -305,6 +310,7 @@ void Detector::acfDetect1 double cascThr, std::vector& objects ) +// clang-format on { DetectionSink detections; auto detector = createDetector(I, rois, shrink, modelDsPad, stride, &detections); diff --git a/src/lib/acf/toolbox/gradientMex.cpp b/src/lib/acf/toolbox/gradientMex.cpp index e6bbdfb..0bcde5d 100644 --- a/src/lib/acf/toolbox/gradientMex.cpp +++ b/src/lib/acf/toolbox/gradientMex.cpp @@ -121,7 +121,7 @@ class ACosTable #endif return a1[i]; } - + const int max() { return +(n + b - 1); @@ -185,10 +185,10 @@ void gradMag(float* I, float* M, float* O, int h, int w, int d, bool full) _Gx = (__m128*)Gx; Gy = (float*)alMalloc(s, 16); _Gy = (__m128*)Gy; - + __m128 upper = SET(static_cast(ACosTable::getInstance().max())); __m128 lower = SET(static_cast(ACosTable::getInstance().min())); - + // compute gradient magnitude and orientation for each column for (x = 0; x < w; x++) { diff --git a/src/lib/acf/ut/test-acf.cpp b/src/lib/acf/ut/test-acf.cpp index a9c4b67..fe12853 100644 --- a/src/lib/acf/ut/test-acf.cpp +++ b/src/lib/acf/ut/test-acf.cpp @@ -244,7 +244,7 @@ class ACFTest : public ::testing::Test // State: // 1) Allocates acf::Detector // 2) Allocates ogles_gpgpu::ACF - + void initGPUAndCreatePyramid(acf::Detector::Pyramid& Pgpu, ogles_gpgpu::ACF::FeatureKind kind) { m_detector = create(modelFilename); @@ -253,7 +253,7 @@ class ACFTest : public ::testing::Test acf::Detector::Pyramid Pcpu; m_detector->setIsTranspose(true); m_detector->computePyramid(m_IpT, Pcpu); - const int shrink = m_detector->opts.pPyramid->pChns->shrink.get(); + const int shrink = m_detector->opts.pPyramid->pChns->shrink.get(); auto sizes = getPyramidSizes(Pcpu); static const bool doGray = false; ogles_gpgpu::Size2d inputSize(image.cols, image.rows); @@ -265,7 +265,7 @@ class ACFTest : public ::testing::Test cv::Mat input = image; // Fill in the pyramid: - (*m_acf)({{ input.cols, input.rows }, input.ptr(), true, 0, DFLT_TEXTURE_FORMAT}); + (*m_acf)({ { input.cols, input.rows }, input.ptr(), true, 0, DFLT_TEXTURE_FORMAT }); glFlush(); m_acf->fill(Pgpu, Pcpu); } @@ -585,7 +585,6 @@ TEST_F(ACFTest, ACFCaltechDetector) } #endif // defined(ACF_SERIALIZE_WITH_CVMATIO) - // ### utility ### // http://stackoverflow.com/a/32647694