From 66922a3b5dc87d290e858817ec3e421466f20979 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Brunner?= Date: Mon, 20 Nov 2023 16:00:45 +0100 Subject: [PATCH] Add AWS S3 support --- .dockerignore | 6 ++ .github/workflows/main.yaml | 17 +++++ CMakeLists.txt | 20 ++++++ Dockerfile | 21 ++++++ Dockerfile-aws | 35 ++++++++++ README.md | 4 ++ modules/unsuck/unsuck.hpp | 105 ++++++++++++++++++++++++++++- src/executable_extract_area.cpp | 38 ++++++++++- src/executable_extract_profile.cpp | 49 ++++++++++++-- 9 files changed, 283 insertions(+), 12 deletions(-) create mode 100644 .dockerignore create mode 100644 .github/workflows/main.yaml create mode 100644 Dockerfile create mode 100644 Dockerfile-aws diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..8f917e6 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +/.git +/build +/Dockerfile* +/.dockerignore +/docs +/README.md diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml new file mode 100644 index 0000000..4fe8608 --- /dev/null +++ b/.github/workflows/main.yaml @@ -0,0 +1,17 @@ +name: Build + +on: + - push + +jobs: + main: + name: Build with AWS + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - run: docker build --file=Dockerfile --tag=test . + - run: docker run test extract_profile --help && docker run test extract_area --help + + - run: docker build --file=Dockerfile-aws --tag=aws . + - run: docker run aws extract_profile --help && docker run aws extract_area --help diff --git a/CMakeLists.txt b/CMakeLists.txt index 45f8df9..57f14f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,8 @@ cmake_minimum_required(VERSION 3.16) set(CMAKE_SUPPRESS_REGENERATION true) +option(WITH_AWS_SDK "Build with AWS SDK" OFF) + project(CPotree LANGUAGES CXX) set(CMAKE_CXX_STANDARD 20) @@ -25,6 +27,15 @@ set(BROTLI_DIR "${PROJECT_SOURCE_DIR}/libs/brotli") add_subdirectory(${LASZIP_DIR}) add_subdirectory(${BROTLI_DIR}) +####################### +# Initialize AWS SDK +####################### + +if (WITH_AWS_SDK) + add_definitions(-DWITH_AWS_SDK) + find_package(AWSSDK REQUIRED COMPONENTS s3) +endif (WITH_AWS_SDK) + ############################################### # COPY LICENSE FILES TO BINARY DIRECTORY ############################################### @@ -75,6 +86,11 @@ add_executable(extract_profile ./src/executable_extract_profile.cpp ) + +if (WITH_AWS_SDK) +target_link_libraries(extract_profile ${AWSSDK_LINK_LIBRARIES} ${AWSSDK_PLATFORM_DEPS}) +endif (WITH_AWS_SDK) + target_link_libraries(extract_profile laszip) target_link_libraries(extract_profile brotlienc-static) target_link_libraries(extract_profile brotlidec-static) @@ -107,6 +123,10 @@ add_executable(extract_area ./src/executable_extract_area.cpp ) +if (WITH_AWS_SDK) +target_link_libraries(extract_area ${AWSSDK_LINK_LIBRARIES} ${AWSSDK_PLATFORM_DEPS}) +endif (WITH_AWS_SDK) + target_link_libraries(extract_area laszip) target_link_libraries(extract_area brotlienc-static) target_link_libraries(extract_area brotlidec-static) diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..2048a9c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,21 @@ +FROM ubuntu:23.10 + +RUN --mount=type=cache,target=/var/lib/apt/lists \ + --mount=type=cache,target=/var/cache,sharing=locked \ + apt-get update \ + && apt-get install --yes build-essential git cmake python3 \ + zlib1g-dev libssl-dev libcurlpp-dev + +WORKDIR /app + +COPY . . + +WORKDIR /app/build + +RUN --mount=type=cache,target=/app/build \ + cmake .. \ + && make \ + && cp extract_area extract_profile /usr/bin \ + && cp liblaszip.so /usr/lib + +RUN extract_profile --help diff --git a/Dockerfile-aws b/Dockerfile-aws new file mode 100644 index 0000000..8696970 --- /dev/null +++ b/Dockerfile-aws @@ -0,0 +1,35 @@ +FROM ubuntu:22.04 + +RUN --mount=type=cache,target=/var/lib/apt/lists \ + --mount=type=cache,target=/var/cache,sharing=locked \ + apt-get update \ + && apt-get install --yes build-essential git cmake python3 \ + zlib1g-dev libssl-dev libcurlpp-dev + +WORKDIR /opt/ + +RUN git clone --recurse-submodules https://github.com/aws/aws-sdk-cpp.git \ + && cd aws-sdk-cpp \ + && git checkout 1.11.205 + +WORKDIR /opt/aws-sdk-cpp/build + + +RUN --mount=type=cache,target=/opt/aws-sdk-cpp/build \ + cmake .. -DCMAKE_TOOLCHAIN_FILE=../toolchains/gcc-c++20.cmake -DBUILD_ONLY="s3" \ + && cmake --build . \ + && cmake --install . + +WORKDIR /app + +COPY . . + +WORKDIR /app/build + +RUN --mount=type=cache,target=/app/build \ + cmake .. -DWITH_AWS_SDK=ON \ + && make \ + && cp extract_area extract_profile /usr/bin \ + && cp liblaszip.so /usr/lib + +RUN extract_profile --help diff --git a/README.md b/README.md index 3e23e9e..0f65efa 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,10 @@ cmake ../ * Make sure "Release" build is selected. * Build "extract_profile". +## Build options + +* `WITH_AWS_SDK`: Build with s3 support. Requires AWS SDK. + # Usage diff --git a/modules/unsuck/unsuck.hpp b/modules/unsuck/unsuck.hpp index 8674bea..8d6f093 100644 --- a/modules/unsuck/unsuck.hpp +++ b/modules/unsuck/unsuck.hpp @@ -33,6 +33,12 @@ using std::shared_ptr; using std::make_shared; using std::chrono::high_resolution_clock; +#ifdef WITH_AWS_SDK +#include +#include +#include +#endif + namespace fs = std::filesystem; static long long unsuck_start_time = high_resolution_clock::now().time_since_epoch().count(); @@ -364,9 +370,66 @@ inline bool iEndsWith(const std::string& str, const std::string& suffix) { return icompare(tstr, suffix); } +#ifdef WITH_AWS_SDK +inline string readAWSS3(string path, string range) { + auto no_proto = path.substr(5); + auto parts = split(no_proto, '/'); + auto bucket = parts[0]; + auto key = no_proto.substr(bucket.size() + 1); + if (std::getenv("DEBUG") == "TRUE") { + cout << "bucket: " << bucket << endl; + cout << "key: " << key << endl; + } + + auto clientConfig = Aws::Client::ClientConfiguration(); + if (const char* env_p = std::getenv("AWS_ENDPOINT_URL")) { + clientConfig.endpointOverride = env_p; + } + else if (const char* env_p = std::getenv("AWS_ENDPOINT_URL_S3")) { + clientConfig.endpointOverride = env_p; + } + auto client = Aws::S3::S3Client(clientConfig); + auto request = Aws::S3::Model::GetObjectRequest(); + request.SetBucket(bucket.c_str()); + request.SetKey(key.c_str()); + if (!range.empty()) { + if (std::getenv("DEBUG") == "TRUE") { + cout << "range: " << range << endl; + } + request.SetRange(range.c_str()); + } + + auto outcome = client.GetObject(request); + + if (outcome.IsSuccess()) { + auto& stream = outcome.GetResult().GetBody(); + std::stringstream ss; + ss << stream.rdbuf(); + + return ss.str(); + } else { + auto error = outcome.GetError(); + std::cerr << "ERROR: " << error.GetExceptionName() << ": " << error.GetMessage() << std::endl; + exit(1); + } + + std::stringstream ss; + + return ss.str(); + +} +#endif + // taken from: https://stackoverflow.com/questions/2602013/read-whole-ascii-file-into-c-stdstring/2602060 inline string readTextFile(string path) { +#ifdef WITH_AWS_SDK + // if path starts with s3://, download the file from s3 + if (path.starts_with("s3://")) { + return readAWSS3(path, string()); + } +#endif + std::ifstream t(path); std::string str; @@ -395,6 +458,18 @@ inline string readTextFile(string path) { inline shared_ptr readBinaryFile(string path) { +#ifdef WITH_AWS_SDK + // if path starts with s3://, download the file from s3 + if (path.find("s3://") == 0) { + auto str = readAWSS3(path, string()); + + auto buffer = make_shared(str.size()); + memcpy(buffer->data, str.data(), str.size()); + + return buffer; + } +#endif + auto file = fopen(path.c_str(), "rb"); auto size = fs::file_size(path); @@ -448,8 +523,20 @@ inline shared_ptr readBinaryFile(string path) { inline vector readBinaryFile(string path, uint64_t start, uint64_t size) { - //ifstream file(path, ios::binary); - +#ifdef WITH_AWS_SDK + // if path starts with s3://, download the file from s3 + if (path.find("s3://") == 0) { + auto str = readAWSS3(path, "bytes=" + to_string(start) + "-" + to_string(start + size - 1)); + + vector buffer(str.size()); + memcpy(buffer.data(), str.data(), str.size()); + + return buffer; + } +#endif + + //ifstream file(path, ios::binary); + // the fopen version seems to be quite a bit faster than ifstream auto file = fopen(path.c_str(), "rb"); @@ -481,6 +568,18 @@ inline vector readBinaryFile(string path, uint64_t start, uint64_t size } inline void readBinaryFile(string path, uint64_t start, uint64_t size, void* target) { + +#ifdef WITH_AWS_SDK + // if path starts with s3://, download the file from s3 + if (path.find("s3://") == 0) { + auto str = readAWSS3(path, "bytes=" + to_string(start) + "-" + to_string(start + size - 1)); + + memcpy(target, str.data(), str.size()); + + return; + } +#endif + auto file = fopen(path.c_str(), "rb"); auto totalSize = fs::file_size(path); @@ -518,7 +617,7 @@ inline void writeBinaryFile(string path, vector& data) { offset += batchSize; remaining -= batchSize; } - + of.close(); } diff --git a/src/executable_extract_area.cpp b/src/executable_extract_area.cpp index 8003dad..5fe6887 100644 --- a/src/executable_extract_area.cpp +++ b/src/executable_extract_area.cpp @@ -22,6 +22,10 @@ #include "PotreeWriter_v2.h" #include "Attributes.h" +#if WITH_AWS_SDK +#include +#endif + using std::string; using std::function; using std::shared_ptr; @@ -112,7 +116,12 @@ int main(int argc, char** argv) { Arguments args(argc, argv); + args.addArgument("help,h", "show this help message and exit"); + #ifdef WITH_AWS_SDK + args.addArgument("source,i,", "input files (Uses S3 if path starts with 's3:///')"); + #else args.addArgument("source,i,", "input files"); + #endif args.addArgument("output,o", "output file or directory, depending on target format"); args.addArgument("area", "clip area"); args.addArgument("output-format", "LAS, LAZ, POTREE"); @@ -121,6 +130,11 @@ int main(int argc, char** argv) { args.addArgument("output-attributes", ""); args.addArgument("get-candidates", "return number of candidate points"); + if (args.has("help")) { + cout << args.usage() << endl; + exit(0); + } + string strArea = args.get("area").as(); vector sources = args.get("source").as>(); string targetpath = args.get("output").as(); @@ -129,7 +143,22 @@ int main(int argc, char** argv) { Area area = parseArea(strArea); - sources = curateSources(sources); + bool use_aws_sdk = false; +#ifdef WITH_AWS_SDK + for (string path : sources) { + if (path.find("s3://") == 0) { + use_aws_sdk = true; + break; + } + } + Aws::SDKOptions options; + if (use_aws_sdk) { + Aws::InitAPI(options); + } +#endif + if (!use_aws_sdk) { + sources = curateSources(sources); + } auto stats = computeStats(sources); Attributes outputAttributes = computeAttributes(args); @@ -186,9 +215,14 @@ int main(int argc, char** argv) { writer->close(); } +#ifdef WITH_AWS_SDK + if (use_aws_sdk) { + Aws::ShutdownAPI(options); + } +#endif printElapsedTime("duration", tStart); return 0; -} \ No newline at end of file +} diff --git a/src/executable_extract_profile.cpp b/src/executable_extract_profile.cpp index 62459e0..0656b68 100644 --- a/src/executable_extract_profile.cpp +++ b/src/executable_extract_profile.cpp @@ -25,6 +25,10 @@ #include "PotreeWriter_v2.h" #include "Attributes.h" +#if WITH_AWS_SDK +#include +#endif + using std::set; using std::string; using std::function; @@ -57,10 +61,10 @@ Attributes computeAttributes(Arguments& args, vector sources) { attribute.numElements = jsAttribute["numElements"]; attribute.elementSize = jsAttribute["elementSize"]; attribute.type = typenameToType(jsAttribute["type"]); - + attribute.min.x = jsAttribute["min"][0]; attribute.max.x = jsAttribute["max"][0]; - + if (jsAttribute["min"].size() > 1) { attribute.min.y = jsAttribute["min"][1]; attribute.max.y = jsAttribute["max"][1]; @@ -206,7 +210,12 @@ int main(int argc, char** argv) { Arguments args(argc, argv); + args.addArgument("help,h", "show this help message and exit"); +#ifdef WITH_AWS_SDK + args.addArgument("source,i,", "input files (Uses S3 if path starts with 's3:///')"); +#else args.addArgument("source,i,", "input files"); +#endif args.addArgument("output,o", "output file or directory, depending on target format"); args.addArgument("coordinates", "coordinates of the profile segments. in the form \"{x0,y0},{x1,y1},...\""); args.addArgument("width", "width of the profile"); @@ -216,6 +225,11 @@ int main(int argc, char** argv) { args.addArgument("output-attributes", ""); args.addArgument("get-candidates", "return number of candidate points"); + if (args.has("help")) { + cout << args.usage() << endl; + exit(0); + } + if (!args.has("coordinates")) { GENERATE_ERROR_MESSAGE << "missing argument: --coordinates \"{x0,y0},{x1,y1},...\"" << endl; exit(123); @@ -237,7 +251,23 @@ int main(int argc, char** argv) { Area area; area.profiles = { profile }; - sources = curateSources(sources); + bool use_aws_sdk = false; +#ifdef WITH_AWS_SDK + for (string path : sources) { + if (path.starts_with("s3://")) { + use_aws_sdk = true; + break; + } + } + Aws::SDKOptions options; + if (use_aws_sdk) { + // options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Trace; + Aws::InitAPI(options); + } +#endif + if (!use_aws_sdk) { + sources = curateSources(sources); + } auto stats = computeStats(sources); Attributes outputAttributes = computeAttributes(args, sources); @@ -296,7 +326,7 @@ int main(int argc, char** argv) { //auto rgb = points->attributeBuffersMap["rgb"]; - + Attribute attribute_position_projected("position_projected_profile", 8, 2, 4, AttributeType::INT32); shared_ptr buffer_position_projected = make_shared(8 * points->numPoints); @@ -375,7 +405,7 @@ int main(int argc, char** argv) { // ss << std::this_thread::get_id() << ": loadPoints() end" << endl; // cout << ss.str(); //} - + }); }; @@ -383,11 +413,16 @@ int main(int argc, char** argv) { //cout << "#accepted: " << totalAccepted << ", #rejected: " << totalRejected << endl; writer->close(); + } +#ifdef WITH_AWS_SDK + if (use_aws_sdk) { + Aws::ShutdownAPI(options); + } +#endif //printElapsedTime("duration", tStart); - return 0; -} \ No newline at end of file +}