Skip to content

Commit

Permalink
Add AWS S3 support
Browse files Browse the repository at this point in the history
  • Loading branch information
sbrunner committed Dec 7, 2023
1 parent 67da806 commit 66922a3
Show file tree
Hide file tree
Showing 9 changed files with 283 additions and 12 deletions.
6 changes: 6 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/.git
/build
/Dockerfile*
/.dockerignore
/docs
/README.md
17 changes: 17 additions & 0 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: Build

on:
- push

jobs:
main:
name: Build with AWS
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2

- run: docker build --file=Dockerfile --tag=test .
- run: docker run test extract_profile --help && docker run test extract_area --help

- run: docker build --file=Dockerfile-aws --tag=aws .
- run: docker run aws extract_profile --help && docker run aws extract_area --help
20 changes: 20 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
cmake_minimum_required(VERSION 3.16)
set(CMAKE_SUPPRESS_REGENERATION true)

option(WITH_AWS_SDK "Build with AWS SDK" OFF)

project(CPotree LANGUAGES CXX)

set(CMAKE_CXX_STANDARD 20)
Expand All @@ -25,6 +27,15 @@ set(BROTLI_DIR "${PROJECT_SOURCE_DIR}/libs/brotli")
add_subdirectory(${LASZIP_DIR})
add_subdirectory(${BROTLI_DIR})

#######################
# Initialize AWS SDK
#######################

if (WITH_AWS_SDK)
add_definitions(-DWITH_AWS_SDK)
find_package(AWSSDK REQUIRED COMPONENTS s3)
endif (WITH_AWS_SDK)

###############################################
# COPY LICENSE FILES TO BINARY DIRECTORY
###############################################
Expand Down Expand Up @@ -75,6 +86,11 @@ add_executable(extract_profile
./src/executable_extract_profile.cpp
)


if (WITH_AWS_SDK)
target_link_libraries(extract_profile ${AWSSDK_LINK_LIBRARIES} ${AWSSDK_PLATFORM_DEPS})
endif (WITH_AWS_SDK)

target_link_libraries(extract_profile laszip)
target_link_libraries(extract_profile brotlienc-static)
target_link_libraries(extract_profile brotlidec-static)
Expand Down Expand Up @@ -107,6 +123,10 @@ add_executable(extract_area
./src/executable_extract_area.cpp
)

if (WITH_AWS_SDK)
target_link_libraries(extract_area ${AWSSDK_LINK_LIBRARIES} ${AWSSDK_PLATFORM_DEPS})
endif (WITH_AWS_SDK)

target_link_libraries(extract_area laszip)
target_link_libraries(extract_area brotlienc-static)
target_link_libraries(extract_area brotlidec-static)
Expand Down
21 changes: 21 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
FROM ubuntu:23.10

RUN --mount=type=cache,target=/var/lib/apt/lists \
--mount=type=cache,target=/var/cache,sharing=locked \
apt-get update \
&& apt-get install --yes build-essential git cmake python3 \
zlib1g-dev libssl-dev libcurlpp-dev

WORKDIR /app

COPY . .

WORKDIR /app/build

RUN --mount=type=cache,target=/app/build \
cmake .. \
&& make \
&& cp extract_area extract_profile /usr/bin \
&& cp liblaszip.so /usr/lib

RUN extract_profile --help
35 changes: 35 additions & 0 deletions Dockerfile-aws
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
FROM ubuntu:22.04

RUN --mount=type=cache,target=/var/lib/apt/lists \
--mount=type=cache,target=/var/cache,sharing=locked \
apt-get update \
&& apt-get install --yes build-essential git cmake python3 \
zlib1g-dev libssl-dev libcurlpp-dev

WORKDIR /opt/

RUN git clone --recurse-submodules https://github.com/aws/aws-sdk-cpp.git \
&& cd aws-sdk-cpp \
&& git checkout 1.11.205

WORKDIR /opt/aws-sdk-cpp/build


RUN --mount=type=cache,target=/opt/aws-sdk-cpp/build \
cmake .. -DCMAKE_TOOLCHAIN_FILE=../toolchains/gcc-c++20.cmake -DBUILD_ONLY="s3" \
&& cmake --build . \
&& cmake --install .

WORKDIR /app

COPY . .

WORKDIR /app/build

RUN --mount=type=cache,target=/app/build \
cmake .. -DWITH_AWS_SDK=ON \
&& make \
&& cp extract_area extract_profile /usr/bin \
&& cp liblaszip.so /usr/lib

RUN extract_profile --help
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ cmake ../
* Make sure "Release" build is selected.
* Build "extract_profile".

## Build options

* `WITH_AWS_SDK`: Build with s3 support. Requires AWS SDK.


# Usage

Expand Down
105 changes: 102 additions & 3 deletions modules/unsuck/unsuck.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ using std::shared_ptr;
using std::make_shared;
using std::chrono::high_resolution_clock;

#ifdef WITH_AWS_SDK
#include <aws/core/Aws.h>
#include <aws/s3/S3Client.h>
#include <aws/s3/model/GetObjectRequest.h>
#endif

namespace fs = std::filesystem;

static long long unsuck_start_time = high_resolution_clock::now().time_since_epoch().count();
Expand Down Expand Up @@ -364,9 +370,66 @@ inline bool iEndsWith(const std::string& str, const std::string& suffix) {
return icompare(tstr, suffix);
}

#ifdef WITH_AWS_SDK
inline string readAWSS3(string path, string range) {
auto no_proto = path.substr(5);
auto parts = split(no_proto, '/');
auto bucket = parts[0];
auto key = no_proto.substr(bucket.size() + 1);
if (std::getenv("DEBUG") == "TRUE") {
cout << "bucket: " << bucket << endl;
cout << "key: " << key << endl;
}

auto clientConfig = Aws::Client::ClientConfiguration();
if (const char* env_p = std::getenv("AWS_ENDPOINT_URL")) {
clientConfig.endpointOverride = env_p;
}
else if (const char* env_p = std::getenv("AWS_ENDPOINT_URL_S3")) {
clientConfig.endpointOverride = env_p;
}
auto client = Aws::S3::S3Client(clientConfig);
auto request = Aws::S3::Model::GetObjectRequest();
request.SetBucket(bucket.c_str());
request.SetKey(key.c_str());
if (!range.empty()) {
if (std::getenv("DEBUG") == "TRUE") {
cout << "range: " << range << endl;
}
request.SetRange(range.c_str());
}

auto outcome = client.GetObject(request);

if (outcome.IsSuccess()) {
auto& stream = outcome.GetResult().GetBody();
std::stringstream ss;
ss << stream.rdbuf();

return ss.str();
} else {
auto error = outcome.GetError();
std::cerr << "ERROR: " << error.GetExceptionName() << ": " << error.GetMessage() << std::endl;
exit(1);
}

std::stringstream ss;

return ss.str();

}
#endif

// taken from: https://stackoverflow.com/questions/2602013/read-whole-ascii-file-into-c-stdstring/2602060
inline string readTextFile(string path) {

#ifdef WITH_AWS_SDK
// if path starts with s3://, download the file from s3
if (path.starts_with("s3://")) {
return readAWSS3(path, string());
}
#endif

std::ifstream t(path);
std::string str;

Expand Down Expand Up @@ -395,6 +458,18 @@ inline string readTextFile(string path) {

inline shared_ptr<Buffer> readBinaryFile(string path) {

#ifdef WITH_AWS_SDK
// if path starts with s3://, download the file from s3
if (path.find("s3://") == 0) {
auto str = readAWSS3(path, string());

auto buffer = make_shared<Buffer>(str.size());
memcpy(buffer->data, str.data(), str.size());

return buffer;
}
#endif

auto file = fopen(path.c_str(), "rb");
auto size = fs::file_size(path);

Expand Down Expand Up @@ -448,8 +523,20 @@ inline shared_ptr<Buffer> readBinaryFile(string path) {

inline vector<uint8_t> readBinaryFile(string path, uint64_t start, uint64_t size) {

//ifstream file(path, ios::binary);

#ifdef WITH_AWS_SDK
// if path starts with s3://, download the file from s3
if (path.find("s3://") == 0) {
auto str = readAWSS3(path, "bytes=" + to_string(start) + "-" + to_string(start + size - 1));

vector<uint8_t> buffer(str.size());
memcpy(buffer.data(), str.data(), str.size());

return buffer;
}
#endif

//ifstream file(path, ios::binary);

// the fopen version seems to be quite a bit faster than ifstream
auto file = fopen(path.c_str(), "rb");

Expand Down Expand Up @@ -481,6 +568,18 @@ inline vector<uint8_t> readBinaryFile(string path, uint64_t start, uint64_t size
}

inline void readBinaryFile(string path, uint64_t start, uint64_t size, void* target) {

#ifdef WITH_AWS_SDK
// if path starts with s3://, download the file from s3
if (path.find("s3://") == 0) {
auto str = readAWSS3(path, "bytes=" + to_string(start) + "-" + to_string(start + size - 1));

memcpy(target, str.data(), str.size());

return;
}
#endif

auto file = fopen(path.c_str(), "rb");

auto totalSize = fs::file_size(path);
Expand Down Expand Up @@ -518,7 +617,7 @@ inline void writeBinaryFile(string path, vector<T>& data) {
offset += batchSize;
remaining -= batchSize;
}


of.close();
}
Expand Down
38 changes: 36 additions & 2 deletions src/executable_extract_area.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
#include "PotreeWriter_v2.h"
#include "Attributes.h"

#if WITH_AWS_SDK
#include <aws/core/Aws.h>
#endif

using std::string;
using std::function;
using std::shared_ptr;
Expand Down Expand Up @@ -112,7 +116,12 @@ int main(int argc, char** argv) {

Arguments args(argc, argv);

args.addArgument("help,h", "show this help message and exit");
#ifdef WITH_AWS_SDK
args.addArgument("source,i,", "input files (Uses S3 if path starts with 's3://<bucket>/<path>')");
#else
args.addArgument("source,i,", "input files");
#endif
args.addArgument("output,o", "output file or directory, depending on target format");
args.addArgument("area", "clip area");
args.addArgument("output-format", "LAS, LAZ, POTREE");
Expand All @@ -121,6 +130,11 @@ int main(int argc, char** argv) {
args.addArgument("output-attributes", "");
args.addArgument("get-candidates", "return number of candidate points");

if (args.has("help")) {
cout << args.usage() << endl;
exit(0);
}

string strArea = args.get("area").as<string>();
vector<string> sources = args.get("source").as<vector<string>>();
string targetpath = args.get("output").as<string>();
Expand All @@ -129,7 +143,22 @@ int main(int argc, char** argv) {

Area area = parseArea(strArea);

sources = curateSources(sources);
bool use_aws_sdk = false;
#ifdef WITH_AWS_SDK
for (string path : sources) {
if (path.find("s3://") == 0) {
use_aws_sdk = true;
break;
}
}
Aws::SDKOptions options;
if (use_aws_sdk) {
Aws::InitAPI(options);
}
#endif
if (!use_aws_sdk) {
sources = curateSources(sources);
}
auto stats = computeStats(sources);

Attributes outputAttributes = computeAttributes(args);
Expand Down Expand Up @@ -186,9 +215,14 @@ int main(int argc, char** argv) {
writer->close();
}

#ifdef WITH_AWS_SDK
if (use_aws_sdk) {
Aws::ShutdownAPI(options);
}
#endif

printElapsedTime("duration", tStart);


return 0;
}
}
Loading

0 comments on commit 66922a3

Please sign in to comment.