From f860cdcaf5eedf42d21f85087823a181bff5129e Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Sat, 7 Jan 2023 08:32:03 -0600 Subject: [PATCH 01/21] initial integration of the s3 transfer manager This replaces the naive multipart upload implementation with the new S3 transfer manager. Unfortunately, in order for the transfer manager to do multipart uploads, it must use the CRT client, but the CRT client does not currently allow you to specify path-style access, which is a requirement for some non-AWS S3 implementations. Hopefully they add support soon. --- ocfl-java-aws/pom.xml | 8 + .../main/java/io/ocfl/aws/OcflS3Client.java | 487 +++++++----------- .../java/io/ocfl/aws/OcflS3Exception.java | 47 ++ .../java/io/ocfl/aws/OcflS3ClientTest.java | 138 ++--- .../src/test/java/io/ocfl/aws/OcflS3Test.java | 45 +- .../ocfl/core/storage/DefaultOcflStorage.java | 1 + .../ocfl/core/storage/cloud/CloudClient.java | 5 + .../ocfl/core/storage/cloud/CloudStorage.java | 8 + .../io/ocfl/core/storage/common/Storage.java | 5 + .../storage/filesystem/FileSystemStorage.java | 8 + .../test/java/io/ocfl/itest/LoadITest.java | 21 +- .../io/ocfl/itest/s3/S3BadReposITest.java | 23 +- .../java/io/ocfl/itest/s3/S3ITestHelper.java | 55 +- .../io/ocfl/itest/s3/S3MutableHeadITest.java | 23 +- .../java/io/ocfl/itest/s3/S3OcflITest.java | 23 +- .../java/io/ocfl/itest/s3/S3StorageTest.java | 43 +- pom.xml | 5 + 17 files changed, 489 insertions(+), 456 deletions(-) create mode 100644 ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Exception.java diff --git a/ocfl-java-aws/pom.xml b/ocfl-java-aws/pom.xml index c35b03a3..b6de0cc8 100644 --- a/ocfl-java-aws/pom.xml +++ b/ocfl-java-aws/pom.xml @@ -66,6 +66,14 @@ software.amazon.awssdk s3 + + software.amazon.awssdk + s3-transfer-manager + + + software.amazon.awssdk.crt + aws-crt + org.codehaus.woodstox stax2-api diff --git a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java index 3eb868eb..1f949c59 100644 --- a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java +++ b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java @@ -24,9 +24,7 @@ package io.ocfl.aws; -import com.google.common.annotations.VisibleForTesting; import io.ocfl.api.exception.OcflIOException; -import io.ocfl.api.exception.OcflInputException; import io.ocfl.api.util.Enforce; import io.ocfl.core.storage.cloud.CloudClient; import io.ocfl.core.storage.cloud.CloudObjectKey; @@ -36,43 +34,32 @@ import io.ocfl.core.util.UncheckedFiles; import java.io.IOException; import java.io.InputStream; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; import java.nio.file.Path; -import java.nio.file.StandardOpenOption; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.Objects; +import java.util.concurrent.CompletionException; import java.util.function.BiConsumer; import java.util.stream.Collectors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.core.exception.SdkException; -import software.amazon.awssdk.core.sync.RequestBody; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; -import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; -import software.amazon.awssdk.services.s3.model.CompletedMultipartUpload; -import software.amazon.awssdk.services.s3.model.CompletedPart; -import software.amazon.awssdk.services.s3.model.CopyObjectRequest; -import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.core.async.AsyncRequestBody; +import software.amazon.awssdk.core.async.AsyncResponseTransformer; +import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.model.Delete; import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; import software.amazon.awssdk.services.s3.model.GetObjectRequest; import software.amazon.awssdk.services.s3.model.HeadBucketRequest; import software.amazon.awssdk.services.s3.model.HeadObjectRequest; -import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import software.amazon.awssdk.services.s3.model.ListObjectsV2Response; import software.amazon.awssdk.services.s3.model.NoSuchBucketException; import software.amazon.awssdk.services.s3.model.NoSuchKeyException; import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import software.amazon.awssdk.services.s3.model.PutObjectRequest; -import software.amazon.awssdk.services.s3.model.UploadPartCopyRequest; -import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.transfer.s3.S3TransferManager; /** * CloudClient implementation that uses Amazon's S3 synchronous v2 client @@ -81,30 +68,17 @@ public class OcflS3Client implements CloudClient { private static final Logger LOG = LoggerFactory.getLogger(OcflS3Client.class); - private static final int KB = 1024; - private static final int MB = 1024 * KB; - private static final long GB = 1024 * MB; - private static final long TB = 1024 * GB; - - private static final long MAX_FILE_BYTES = 5 * TB; - - private static final int MAX_PART_BYTES = 100 * MB; - private static final int PART_SIZE_BYTES = 10 * MB; - - private static final int MAX_PARTS = 100; - private static final int PART_SIZE_INCREMENT = 10; - private static final int PARTS_INCREMENT = 100; - - private final S3Client s3Client; + // TODO TM add more notes about CRT client + // TODO TM notes about closing + private final S3AsyncClient s3Client; + private final S3TransferManager transferManager; private final String bucket; private final String repoPrefix; private final CloudObjectKey.Builder keyBuilder; private final BiConsumer putObjectModifier; - private final BiConsumer createMultipartModifier; - private int maxPartBytes = MAX_PART_BYTES; - private int partSizeBytes = PART_SIZE_BYTES; + private final boolean shouldCloseManager; /** * Used to create a new OcflS3Client instance. @@ -121,31 +95,34 @@ public static Builder builder() { * @param s3Client aws sdk s3 client * @param bucket s3 bucket */ - public OcflS3Client(S3Client s3Client, String bucket) { + public OcflS3Client(S3AsyncClient s3Client, String bucket) { this(s3Client, bucket, null, null, null); } /** * @see OcflS3Client#builder() * - * @param s3Client aws sdk s3 client - * @param bucket s3 bucket - * @param prefix key prefix - * @param putObjectModifier hook for modifying putObject requests - * @param createMultipartModifier hook for modifying createMultipartUpload requests + * @param s3Client aws sdk s3 client, not null + * @param bucket s3 bucket, not null + * @param prefix key prefix, may be null + * @param transferManager aws sdk s3 transfer manager, may be null + * @param putObjectModifier hook for modifying putObject requests, may be null */ public OcflS3Client( - S3Client s3Client, + S3AsyncClient s3Client, String bucket, String prefix, - BiConsumer putObjectModifier, - BiConsumer createMultipartModifier) { + S3TransferManager transferManager, + BiConsumer putObjectModifier) { this.s3Client = Enforce.notNull(s3Client, "s3Client cannot be null"); this.bucket = Enforce.notBlank(bucket, "bucket cannot be blank"); this.repoPrefix = sanitizeRepoPrefix(prefix == null ? "" : prefix); + this.shouldCloseManager = transferManager == null; + this.transferManager = transferManager == null + ? S3TransferManager.builder().s3Client(s3Client).build() + : transferManager; this.keyBuilder = CloudObjectKey.builder().prefix(repoPrefix); this.putObjectModifier = putObjectModifier != null ? putObjectModifier : (k, b) -> {}; - this.createMultipartModifier = createMultipartModifier != null ? createMultipartModifier : (k, b) -> {}; } private static String sanitizeRepoPrefix(String repoPrefix) { @@ -161,6 +138,16 @@ private static int indexLastNonSlash(String string) { return 0; } + /** + * {@inheritDoc} + */ + @Override + public void close() { + if (shouldCloseManager) { + transferManager.close(); + } + } + /** * {@inheritDoc} */ @@ -193,82 +180,26 @@ public CloudObjectKey uploadFile(Path srcPath, String dstPath, String contentTyp var fileSize = UncheckedFiles.size(srcPath); var dstKey = keyBuilder.buildFromPath(dstPath); - if (fileSize >= MAX_FILE_BYTES) { - throw new OcflInputException( - String.format("Cannot store file %s because it exceeds the maximum file size.", srcPath)); - } - - if (fileSize > maxPartBytes) { - multipartUpload(srcPath, dstKey, fileSize, contentType); - } else { - LOG.debug("Uploading {} to bucket {} key {} size {}", srcPath, bucket, dstKey, fileSize); + LOG.debug("Uploading {} to bucket {} key {} size {}", srcPath, bucket, dstKey, fileSize); - var builder = PutObjectRequest.builder().contentType(contentType); - - putObjectModifier.accept(dstKey.getKey(), builder); - - s3Client.putObject( - builder.bucket(bucket) - .key(dstKey.getKey()) - .contentLength(fileSize) - .build(), - srcPath); - } - - return dstKey; - } - - // TODO reduce memory consumption? - private void multipartUpload(Path srcPath, CloudObjectKey dstKey, long fileSize, String contentType) { - var partSize = determinePartSize(fileSize); - - LOG.debug( - "Multipart upload of {} to bucket {} key {}. File size: {}; part size: {}", - srcPath, - bucket, - dstKey, - fileSize, - partSize); + var builder = PutObjectRequest.builder().contentType(contentType); - var uploadId = beginMultipartUpload(dstKey, contentType); + putObjectModifier.accept(dstKey.getKey(), builder); - var completedParts = new ArrayList(); + var upload = transferManager.uploadFile(req -> req.source(srcPath) + .putObjectRequest(builder.bucket(bucket) + .key(dstKey.getKey()) + .contentLength(fileSize) + .build()) + .build()); try { - try (var channel = FileChannel.open(srcPath, StandardOpenOption.READ)) { - var buffer = ByteBuffer.allocate(partSize); - var i = 1; - - while (channel.read(buffer) > 0) { - buffer.flip(); - - var partResponse = s3Client.uploadPart( - UploadPartRequest.builder() - .bucket(bucket) - .key(dstKey.getKey()) - .uploadId(uploadId) - .partNumber(i) - // TODO entire part is in memory. stream part to file first? - .build(), - RequestBody.fromByteBuffer(buffer)); - - completedParts.add(CompletedPart.builder() - .partNumber(i) - .eTag(partResponse.eTag()) - .build()); - - buffer.clear(); - i++; - } - } catch (IOException e) { - throw new OcflIOException(e); - } - - completeMultipartUpload(uploadId, dstKey, completedParts); + upload.completionFuture().join(); } catch (RuntimeException e) { - abortMultipartUpload(uploadId, dstKey); - throw e; + throw new OcflS3Exception("Failed to upload " + srcPath + " to " + dstKey, unwrapCompletionEx(e)); } + + return dstKey; } /** @@ -277,13 +208,18 @@ private void multipartUpload(Path srcPath, CloudObjectKey dstKey, long fileSize, @Override public CloudObjectKey uploadBytes(String dstPath, byte[] bytes, String contentType) { var dstKey = keyBuilder.buildFromPath(dstPath); - LOG.debug("Writing string to bucket {} key {}", bucket, dstKey); + LOG.debug("Writing bytes to bucket {} key {}", bucket, dstKey); var builder = PutObjectRequest.builder().contentType(contentType); putObjectModifier.accept(dstKey.getKey(), builder); - s3Client.putObject(builder.bucket(bucket).key(dstKey.getKey()).build(), RequestBody.fromBytes(bytes)); + try { + s3Client.putObject(builder.bucket(bucket).key(dstKey.getKey()).build(), AsyncRequestBody.fromBytes(bytes)) + .join(); + } catch (RuntimeException e) { + throw new OcflS3Exception("Failed to upload bytes to " + dstKey, unwrapCompletionEx(e)); + } return dstKey; } @@ -299,77 +235,23 @@ public CloudObjectKey copyObject(String srcPath, String dstPath) { LOG.debug("Copying {} to {} in bucket {}", srcKey, dstKey, bucket); try { - s3Client.copyObject(CopyObjectRequest.builder() - .destinationBucket(bucket) - .destinationKey(dstKey.getKey()) - .sourceBucket(bucket) - .sourceKey(srcKey.getKey()) + var copy = transferManager.copy(req -> req.copyObjectRequest(copyReq -> copyReq.destinationBucket(bucket) + .destinationKey(dstKey.getKey()) + .sourceBucket(bucket) + .sourceKey(srcKey.getKey()) + .build()) .build()); - } catch (NoSuchKeyException e) { - throw new KeyNotFoundException(e); - } catch (SdkException e) { - // TODO verify class and message - if (e.getMessage().contains("copy source is larger than the maximum allowable size")) { - multipartCopy(srcKey, dstKey); - } else { - throw e; - } - } - - return dstKey; - } - - private void multipartCopy(CloudObjectKey srcKey, CloudObjectKey dstKey) { - var head = headObject(srcKey); - var fileSize = head.contentLength(); - var partSize = determinePartSize(fileSize); - - LOG.debug( - "Multipart copy of {} to {} in bucket {}: File size {}; part size: {}", - srcKey, - dstKey, - bucket, - fileSize, - partSize); - - var uploadId = beginMultipartUpload(dstKey, null); - try { - var completedParts = new ArrayList(); - var part = 1; - var position = 0L; - - while (position < fileSize) { - var end = Math.min(fileSize - 1, part * partSize - 1); - var partResponse = s3Client.uploadPartCopy(UploadPartCopyRequest.builder() - .destinationBucket(bucket) - .destinationKey(dstKey.getKey()) - .sourceBucket(bucket) - .sourceKey(srcKey.getKey()) - .partNumber(part) - .uploadId(uploadId) - .copySourceRange(String.format("bytes=%s-%s", position, end)) - .build()); - - completedParts.add(CompletedPart.builder() - .partNumber(part) - .eTag(partResponse.copyPartResult().eTag()) - .build()); - - part++; - position = end + 1; - } - - completeMultipartUpload(uploadId, dstKey, completedParts); + copy.completionFuture().join(); } catch (RuntimeException e) { - abortMultipartUpload(uploadId, dstKey); - throw e; + var cause = unwrapCompletionEx(e); + if (cause instanceof NoSuchKeyException) { + throw new KeyNotFoundException("Key " + srcKey + " not found in bucket " + bucket, cause); + } + throw new OcflS3Exception("Failed to copy object from " + srcKey + " to " + dstKey, cause); } - } - private HeadObjectResponse headObject(CloudObjectKey key) { - return s3Client.headObject( - HeadObjectRequest.builder().bucket(bucket).key(key.getKey()).build()); + return dstKey; } /** @@ -378,17 +260,21 @@ private HeadObjectResponse headObject(CloudObjectKey key) { @Override public Path downloadFile(String srcPath, Path dstPath) { var srcKey = keyBuilder.buildFromPath(srcPath); - LOG.debug("Downloading bucket {} key {} to {}", bucket, srcKey, dstPath); + LOG.debug("Downloading from bucket {} key {} to {}", bucket, srcKey, dstPath); try { - s3Client.getObject( - GetObjectRequest.builder() - .bucket(bucket) - .key(srcKey.getKey()) - .build(), - dstPath); - } catch (NoSuchKeyException e) { - throw new KeyNotFoundException(e); + var download = transferManager.downloadFile(req -> req.getObjectRequest( + getReq -> getReq.bucket(bucket).key(srcKey.getKey()).build()) + .destination(dstPath) + .build()); + + download.completionFuture().join(); + } catch (RuntimeException e) { + var cause = unwrapCompletionEx(e); + if (cause instanceof NoSuchKeyException) { + throw new KeyNotFoundException("Key " + srcKey + " not found in bucket " + bucket, cause); + } + throw new OcflS3Exception("Failed to download " + srcKey + " to " + dstPath, cause); } return dstPath; @@ -400,15 +286,22 @@ public Path downloadFile(String srcPath, Path dstPath) { @Override public InputStream downloadStream(String srcPath) { var srcKey = keyBuilder.buildFromPath(srcPath); - LOG.debug("Streaming bucket {} key {}", bucket, srcKey); + LOG.debug("Streaming from bucket {} key {}", bucket, srcKey); try { - return s3Client.getObject(GetObjectRequest.builder() - .bucket(bucket) - .key(srcKey.getKey()) - .build()); - } catch (NoSuchKeyException e) { - throw new KeyNotFoundException(String.format("Key %s not found in bucket %s.", srcKey, bucket), e); + return s3Client.getObject( + GetObjectRequest.builder() + .bucket(bucket) + .key(srcKey.getKey()) + .build(), + AsyncResponseTransformer.toBlockingInputStream()) + .join(); + } catch (RuntimeException e) { + var cause = unwrapCompletionEx(e); + if (cause instanceof NoSuchKeyException) { + throw new KeyNotFoundException("Key " + srcKey + " not found in bucket " + bucket, cause); + } + throw new OcflS3Exception("Failed to download " + srcKey, cause); } } @@ -432,16 +325,23 @@ public HeadResult head(String path) { var key = keyBuilder.buildFromPath(path); try { - var s3Result = s3Client.headObject( - HeadObjectRequest.builder().bucket(bucket).key(key.getKey()).build()); + var s3Result = s3Client.headObject(HeadObjectRequest.builder() + .bucket(bucket) + .key(key.getKey()) + .build()) + .join(); return new HeadResult() .setContentEncoding(s3Result.contentEncoding()) .setContentLength(s3Result.contentLength()) .setETag(s3Result.eTag()) .setLastModified(s3Result.lastModified()); - } catch (NoSuchKeyException e) { - throw new KeyNotFoundException(String.format("Key %s not found in bucket %s.", key, bucket), e); + } catch (RuntimeException e) { + var cause = unwrapCompletionEx(e); + if (cause instanceof NoSuchKeyException) { + throw new KeyNotFoundException("Key " + key + " not found in bucket " + bucket, cause); + } + throw new OcflS3Exception("Failed to HEAD " + key, cause); } } @@ -484,15 +384,20 @@ public boolean directoryExists(String path) { LOG.debug("Checking existence of {} in bucket {}", prefix, bucket); - var response = s3Client.listObjectsV2(ListObjectsV2Request.builder() - .bucket(bucket) - .delimiter("/") - .prefix(prefix) - .maxKeys(1) - .build()); - - return response.contents().stream().findAny().isPresent() - || response.commonPrefixes().stream().findAny().isPresent(); + try { + var response = s3Client.listObjectsV2(ListObjectsV2Request.builder() + .bucket(bucket) + .delimiter("/") + .prefix(prefix) + .maxKeys(1) + .build()) + .join(); + + return response.contents().stream().findAny().isPresent() + || response.commonPrefixes().stream().findAny().isPresent(); + } catch (RuntimeException e) { + throw new OcflS3Exception("Failed to list objects under " + prefix, unwrapCompletionEx(e)); + } } /** @@ -532,10 +437,15 @@ private void deleteObjectsInternal(Collection objectKeys) { .map(key -> ObjectIdentifier.builder().key(key.getKey()).build()) .collect(Collectors.toList()); - s3Client.deleteObjects(DeleteObjectsRequest.builder() - .bucket(bucket) - .delete(Delete.builder().objects(objectIds).build()) - .build()); + try { + s3Client.deleteObjects(DeleteObjectsRequest.builder() + .bucket(bucket) + .delete(Delete.builder().objects(objectIds).build()) + .build()) + .join(); + } catch (RuntimeException e) { + throw new OcflS3Exception("Failed to delete objects " + objectIds, unwrapCompletionEx(e)); + } } } @@ -565,79 +475,42 @@ public void safeDeleteObjects(Collection objectPaths) { @Override public boolean bucketExists() { try { - s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()); + s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()) + .join(); return true; - } catch (NoSuchBucketException e) { - return false; - } - } - - private String beginMultipartUpload(CloudObjectKey key, String contentType) { - var builder = CreateMultipartUploadRequest.builder().contentType(contentType); - - createMultipartModifier.accept(key.getKey(), builder); - - return s3Client.createMultipartUpload( - builder.bucket(bucket).key(key.getKey()).build()) - .uploadId(); - } - - private void completeMultipartUpload(String uploadId, CloudObjectKey key, List parts) { - s3Client.completeMultipartUpload(CompleteMultipartUploadRequest.builder() - .bucket(bucket) - .key(key.getKey()) - .uploadId(uploadId) - .multipartUpload(CompletedMultipartUpload.builder().parts(parts).build()) - .build()); - } - - private void abortMultipartUpload(String uploadId, CloudObjectKey key) { - try { - s3Client.abortMultipartUpload(AbortMultipartUploadRequest.builder() - .bucket(bucket) - .key(key.getKey()) - .uploadId(uploadId) - .build()); } catch (RuntimeException e) { - LOG.error("Failed to abort multipart upload. Bucket: {}; Key: {}; Upload Id: {}", bucket, key, uploadId, e); - } - } - - private int determinePartSize(long fileSize) { - var partSize = partSizeBytes; - var maxParts = MAX_PARTS; - - while (fileSize / partSize > maxParts) { - partSize += PART_SIZE_INCREMENT; - - if (partSize > maxPartBytes) { - maxParts += PARTS_INCREMENT; - partSize /= 2; + var cause = unwrapCompletionEx(e); + if (cause instanceof NoSuchBucketException) { + return false; } + throw new OcflS3Exception("Failed ot HEAD bucket " + bucket, cause); } - - return partSize; } private ListResult toListResult(ListObjectsV2Request.Builder requestBuilder) { - var result = s3Client.listObjectsV2(requestBuilder.build()); + try { + var result = s3Client.listObjectsV2(requestBuilder.build()).join(); - var prefixLength = prefixLength(result.prefix()); - var repoPrefixLength = repoPrefix.isBlank() ? 0 : repoPrefix.length() + 1; + var prefixLength = prefixLength(result.prefix()); + var repoPrefixLength = repoPrefix.isBlank() ? 0 : repoPrefix.length() + 1; - var objects = toObjectListings(result, prefixLength); - var dirs = toDirectoryListings(result, repoPrefixLength); + var objects = toObjectListings(result, prefixLength); + var dirs = toDirectoryListings(result, repoPrefixLength); - while (Boolean.TRUE.equals(result.isTruncated())) { - result = s3Client.listObjectsV2(requestBuilder - .continuationToken(result.nextContinuationToken()) - .build()); + while (Boolean.TRUE.equals(result.isTruncated())) { + result = s3Client.listObjectsV2(requestBuilder + .continuationToken(result.nextContinuationToken()) + .build()) + .join(); - objects.addAll(toObjectListings(result, prefixLength)); - dirs.addAll(toDirectoryListings(result, repoPrefixLength)); - } + objects.addAll(toObjectListings(result, prefixLength)); + dirs.addAll(toDirectoryListings(result, repoPrefixLength)); + } - return new ListResult().setObjects(objects).setDirectories(dirs); + return new ListResult().setObjects(objects).setDirectories(dirs); + } catch (RuntimeException e) { + throw new OcflS3Exception("Failed to list objects", unwrapCompletionEx(e)); + } } private List toObjectListings(ListObjectsV2Response result, int prefixLength) { @@ -672,35 +545,53 @@ private int prefixLength(String prefix) { return prefixLength; } - @VisibleForTesting - void setMaxPartBytes(int maxPartBytes) { - this.maxPartBytes = maxPartBytes; - } - - @VisibleForTesting - void setPartSizeBytes(int partSizeBytes) { - this.partSizeBytes = partSizeBytes; + /** + * If the exception is a CompletionException, then the exception's cause is returned. Otherwise, the exception + * itself is returned. + * + * @param e the exception + * @return the exception or its cause + */ + private Throwable unwrapCompletionEx(RuntimeException e) { + Throwable cause = e; + if (e instanceof CompletionException) { + cause = e.getCause(); + } + return cause; } public static class Builder { - private S3Client s3Client; + private S3AsyncClient s3Client; + private S3TransferManager transferManager; private String bucket; private String repoPrefix; private BiConsumer putObjectModifier; - private BiConsumer createMultipartModifier; /** - * The AWS SDK s3 client. Required. + * The AWS SDK S3 client. This SHOULD be a CRT client. Required. * * @param s3Client s3 client * @return builder */ - public Builder s3Client(S3Client s3Client) { + public Builder s3Client(S3AsyncClient s3Client) { this.s3Client = Enforce.notNull(s3Client, "s3Client cannot be null"); return this; } + /** + * The AWS SDK S3 transfer manager. This only needs to be specified when you need to set specific settings, and, + * if it is specified, it can use the same S3 client as was supplied in {@link #s3Client(S3AsyncClient)}. + * Otherwise, when not specified, the default transfer manager is created using the provided S3 Client. + * + * @param transferManager S3 transfer manager + * @return builder + */ + public Builder transferManager(S3TransferManager transferManager) { + this.transferManager = Enforce.notNull(transferManager, "transferManager cannot be null"); + return this; + } + /** * The S3 bucket to use. Required. * @@ -738,29 +629,13 @@ public Builder putObjectModifier(BiConsumer pu return this; } - /** - * Provides a hook to modify createMultipartUpload requests before they are executed. It is intended to be used - * to set object attributes such as tags. - * - *

The first argument is the object key the request is for, and the second is the request builder to apply - * changes to. - * - * @param createMultipartModifier hook for modifying createMultipartUpload requests - * @return builder - */ - public Builder createMultipartModifier( - BiConsumer createMultipartModifier) { - this.createMultipartModifier = createMultipartModifier; - return this; - } - /** * Constructs a new OcflS3Client. s3Client and bucket must be set. * * @return OcflS3Client */ public OcflS3Client build() { - return new OcflS3Client(s3Client, bucket, repoPrefix, putObjectModifier, createMultipartModifier); + return new OcflS3Client(s3Client, bucket, repoPrefix, transferManager, putObjectModifier); } } } diff --git a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Exception.java b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Exception.java new file mode 100644 index 00000000..9e8bccf6 --- /dev/null +++ b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Exception.java @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2019 University of Wisconsin Board of Regents + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package io.ocfl.aws; + +import io.ocfl.api.exception.OcflJavaException; + +/** + * Captures an error that occurred while interacting with S3. + */ +public class OcflS3Exception extends OcflJavaException { + + public OcflS3Exception() {} + + public OcflS3Exception(String message) { + super(message); + } + + public OcflS3Exception(String message, Throwable cause) { + super(message, cause); + } + + public OcflS3Exception(Throwable cause) { + super(cause.getMessage(), cause); + } +} diff --git a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java index dcb3e885..04758f42 100644 --- a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java +++ b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java @@ -7,13 +7,14 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import static software.amazon.awssdk.http.SdkHttpConfigurationOption.TRUST_ALL_CERTIFICATES; -import at.favre.lib.bytes.Bytes; import com.adobe.testing.s3mock.junit5.S3MockExtension; import io.ocfl.core.storage.cloud.KeyNotFoundException; import io.ocfl.core.storage.cloud.ListResult; import io.ocfl.core.util.FileUtil; import java.io.IOException; +import java.net.URI; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; @@ -23,6 +24,7 @@ import java.util.concurrent.ThreadLocalRandom; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -32,11 +34,14 @@ import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; -import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.core.async.AsyncResponseTransformer; +import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; import software.amazon.awssdk.regions.Region; -import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Configuration; import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import software.amazon.awssdk.services.s3.model.S3Object; +import software.amazon.awssdk.utils.AttributeMap; public class OcflS3ClientTest { @@ -48,7 +53,7 @@ public class OcflS3ClientTest { @RegisterExtension public static S3MockExtension S3_MOCK = S3MockExtension.builder().silent().build(); - private static S3Client awsS3Client; + private static S3AsyncClient awsS3Client; private static OcflS3Client client; private static String bucket; @@ -63,20 +68,33 @@ public static void beforeAll() { if (StringUtils.isNotBlank(accessKey) && StringUtils.isNotBlank(secretKey) && StringUtils.isNotBlank(bucket)) { LOG.info("Running tests against AWS"); - awsS3Client = S3Client.builder() + awsS3Client = S3AsyncClient.crtBuilder() .region(Region.US_EAST_2) .credentialsProvider( StaticCredentialsProvider.create(AwsBasicCredentials.create(accessKey, secretKey))) - .httpClientBuilder(ApacheHttpClient.builder()) .build(); OcflS3ClientTest.bucket = bucket; } else { LOG.info("Running tests against S3 Mock"); - awsS3Client = S3_MOCK.createS3ClientV2(); + awsS3Client = S3AsyncClient.builder() + .endpointOverride(URI.create(S3_MOCK.getServiceEndpoint())) + .region(Region.US_EAST_2) + .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create("foo", "bar"))) + .serviceConfiguration(S3Configuration.builder() + .pathStyleAccessEnabled(true) + .build()) + .httpClient(NettyNioAsyncHttpClient.builder() + .buildWithDefaults(AttributeMap.builder() + .put(TRUST_ALL_CERTIFICATES, Boolean.TRUE) + .build())) + .build(); + ; OcflS3ClientTest.bucket = UUID.randomUUID().toString(); - awsS3Client.createBucket(request -> { - request.bucket(OcflS3ClientTest.bucket); - }); + awsS3Client + .createBucket(request -> { + request.bucket(OcflS3ClientTest.bucket); + }) + .join(); } client = OcflS3Client.builder() @@ -86,6 +104,12 @@ public static void beforeAll() { .build(); } + @AfterAll + public static void afterAll() { + awsS3Client.close(); + client.close(); + } + @AfterEach public void after() { client.deletePath(""); @@ -123,82 +147,26 @@ public void putObjectWithModification() throws IOException { assertObjectsExist(bucket, List.of(key1, key2)); - try (var response = awsS3Client.getObject(builder -> { - builder.bucket(bucket) - .key(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, key1)) - .build(); - })) { + try (var response = awsS3Client + .getObject( + builder -> builder.bucket(bucket) + .key(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, key1)) + .build(), + AsyncResponseTransformer.toBlockingInputStream()) + .join()) { assertEquals("text/plain", response.response().contentType()); } - try (var response = awsS3Client.getObject(builder -> { - builder.bucket(bucket) - .key(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, key2)) - .build(); - })) { + try (var response = awsS3Client + .getObject( + builder -> builder.bucket(bucket) + .key(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, key2)) + .build(), + AsyncResponseTransformer.toBlockingInputStream()) + .join()) { assertEquals("application/octet-stream", response.response().contentType()); } } - @Test - public void multipartUpload() { - var size = 1024 * 1024 * 5; - client.setMaxPartBytes(size); - client.setPartSizeBytes(size); - - var key = "dir/sub/test.txt"; - - var byteString = Bytes.random(size + 100).encodeHex(); - - client.uploadFile(createFile(byteString), key); - - assertObjectsExist(bucket, List.of(key)); - - assertEquals(byteString, client.downloadString(key)); - } - - @Test - public void multipartUploadWithModification() throws IOException { - var client = OcflS3Client.builder() - .s3Client(awsS3Client) - .bucket(bucket) - .repoPrefix(REPO_PREFIX) - .createMultipartModifier((key, builder) -> { - if (key.endsWith("/test.txt")) { - builder.contentType("text/plain"); - } - }) - .build(); - var size = 1024 * 1024 * 5; - client.setMaxPartBytes(size); - client.setPartSizeBytes(size); - - var key1 = "dir/sub/test.txt"; - var key2 = "dir/sub/test.json"; - - var byteString = Bytes.random(size + 100).encodeHex(); - client.uploadFile(createFile(byteString), key1); - - byteString = Bytes.random(size + 100).encodeHex(); - client.uploadFile(createFile(byteString), key2); - - assertObjectsExist(bucket, List.of(key1, key2)); - - try (var response = awsS3Client.getObject(builder -> { - builder.bucket(bucket) - .key(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, key1)) - .build(); - })) { - assertEquals("text/plain", response.response().contentType()); - } - try (var response = awsS3Client.getObject(builder -> { - builder.bucket(bucket) - .key(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, key2)) - .build(); - })) { - assertEquals("binary/octet-stream", response.response().contentType()); - } - } - @Test public void basicDownloadFileWhenExists() throws IOException { var key = "dir/sub/test.txt"; @@ -424,10 +392,12 @@ private void assertObjectListingAll(String searchPrefix, String key, ListResult. } private void assertObjectsExist(String bucket, Collection expectedKeys) { - var result = awsS3Client.listObjectsV2(ListObjectsV2Request.builder() - .bucket(bucket) - .prefix(REPO_PREFIX) - .build()); + var result = awsS3Client + .listObjectsV2(ListObjectsV2Request.builder() + .bucket(bucket) + .prefix(REPO_PREFIX) + .build()) + .join(); var actualKeys = result.contents().stream().map(S3Object::key).collect(Collectors.toList()); var prefixedExpected = expectedKeys.stream() diff --git a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3Test.java b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3Test.java index 9fa7071c..ae27cb71 100644 --- a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3Test.java +++ b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3Test.java @@ -4,6 +4,7 @@ import static org.hamcrest.Matchers.containsInAnyOrder; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static software.amazon.awssdk.http.SdkHttpConfigurationOption.TRUST_ALL_CERTIFICATES; import com.adobe.testing.s3mock.junit5.S3MockExtension; import io.ocfl.api.MutableOcflRepository; @@ -18,6 +19,7 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.net.URI; import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.util.Collection; @@ -26,6 +28,7 @@ import java.util.concurrent.ThreadLocalRandom; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -35,11 +38,13 @@ import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; -import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; import software.amazon.awssdk.regions.Region; -import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Configuration; import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import software.amazon.awssdk.services.s3.model.S3Object; +import software.amazon.awssdk.utils.AttributeMap; public class OcflS3Test { @@ -54,7 +59,7 @@ public class OcflS3Test { @RegisterExtension public static S3MockExtension S3_MOCK = S3MockExtension.builder().silent().build(); - private static S3Client s3Client; + private static S3AsyncClient s3Client; private static CloudClient cloudClient; private static String bucket; @@ -69,20 +74,31 @@ public static void beforeAll() { if (StringUtils.isNotBlank(accessKey) && StringUtils.isNotBlank(secretKey) && StringUtils.isNotBlank(bucket)) { LOG.info("Running tests against AWS"); - s3Client = S3Client.builder() + s3Client = S3AsyncClient.crtBuilder() .region(Region.US_EAST_2) .credentialsProvider( StaticCredentialsProvider.create(AwsBasicCredentials.create(accessKey, secretKey))) - .httpClientBuilder(ApacheHttpClient.builder()) .build(); OcflS3Test.bucket = bucket; } else { LOG.info("Running tests against S3 Mock"); - s3Client = S3_MOCK.createS3ClientV2(); + s3Client = S3AsyncClient.builder() + .endpointOverride(URI.create(S3_MOCK.getServiceEndpoint())) + .region(Region.US_EAST_2) + .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create("foo", "bar"))) + .serviceConfiguration(S3Configuration.builder() + .pathStyleAccessEnabled(true) + .build()) + .httpClient(NettyNioAsyncHttpClient.builder() + .buildWithDefaults(AttributeMap.builder() + .put(TRUST_ALL_CERTIFICATES, Boolean.TRUE) + .build())) + .build(); OcflS3Test.bucket = UUID.randomUUID().toString(); s3Client.createBucket(request -> { - request.bucket(OcflS3Test.bucket); - }); + request.bucket(OcflS3Test.bucket); + }) + .join(); } cloudClient = OcflS3Client.builder() @@ -92,6 +108,12 @@ public static void beforeAll() { .build(); } + @AfterAll + public static void afterAll() { + s3Client.close(); + cloudClient.close(); + } + @AfterEach public void after() { cloudClient.deletePath(""); @@ -227,9 +249,10 @@ public void basicPurgeTest() { private void assertObjectsExist(String bucket, String prefix, Collection expectedKeys) { var result = s3Client.listObjectsV2(ListObjectsV2Request.builder() - .bucket(bucket) - .prefix(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, prefix)) - .build()); + .bucket(bucket) + .prefix(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, prefix)) + .build()) + .join(); var actualKeys = result.contents().stream().map(S3Object::key).collect(Collectors.toList()); var prefixedExpected = expectedKeys.stream() diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/storage/DefaultOcflStorage.java b/ocfl-java-core/src/main/java/io/ocfl/core/storage/DefaultOcflStorage.java index 39fd19db..1090a6d4 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/storage/DefaultOcflStorage.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/storage/DefaultOcflStorage.java @@ -619,6 +619,7 @@ public ValidationResults validateObject(String objectId, boolean contentFixityCh public void close() { LOG.debug("Closing {}", this.getClass().getName()); super.close(); + storage.close(); } @Override diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudClient.java b/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudClient.java index 188bde5e..4057e48f 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudClient.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudClient.java @@ -33,6 +33,11 @@ */ public interface CloudClient { + /** + * Close any resources the client may have created. This will NOT close resources that were passed into the client. + */ + void close(); + /** * The name of the bucket the OCFL repository is in. * diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudStorage.java b/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudStorage.java index 4ad731e7..10e02613 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudStorage.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudStorage.java @@ -328,6 +328,14 @@ public void deleteEmptyDirsUp(String path) { // no-op } + /** + * {@inheritDoc} + */ + @Override + public void close() { + client.close(); + } + private void failOnExistingFile(String path) { if (fileExists(path)) { throw new OcflFileAlreadyExistsException(String.format("File %s already exists", path)); diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/storage/common/Storage.java b/ocfl-java-core/src/main/java/io/ocfl/core/storage/common/Storage.java index 14f3cbe9..c5fc535a 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/storage/common/Storage.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/storage/common/Storage.java @@ -188,4 +188,9 @@ public interface Storage { * @param path starting path */ void deleteEmptyDirsUp(String path); + + /** + * Closes any resources the storage implementation may have open. + */ + void close(); } diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/storage/filesystem/FileSystemStorage.java b/ocfl-java-core/src/main/java/io/ocfl/core/storage/filesystem/FileSystemStorage.java index 2e94dd92..58717660 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/storage/filesystem/FileSystemStorage.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/storage/filesystem/FileSystemStorage.java @@ -347,4 +347,12 @@ public void deleteEmptyDirsUp(String path) { var fullPath = storageRoot.resolve(path); FileUtil.deleteDirAndParentsIfEmpty(fullPath); } + + /** + * {@inheritDoc} + */ + @Override + public void close() { + // no-op + } } diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/LoadITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/LoadITest.java index 22d8f1e4..f271d4bd 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/LoadITest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/LoadITest.java @@ -36,9 +36,9 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import software.amazon.awssdk.http.apache.ApacheHttpClient; import software.amazon.awssdk.regions.Region; -import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.transfer.s3.S3TransferManager; @Disabled public class LoadITest { @@ -264,12 +264,11 @@ public void s3WriteTest() throws InterruptedException { var objectPath = createTestObject(1, 3 * MB); var prefix = UUID.randomUUID().toString(); - var s3Client = S3Client.builder() - .region(Region.US_EAST_2) - .httpClientBuilder(ApacheHttpClient.builder()) - .build(); + var s3Client = S3AsyncClient.crtBuilder().region(Region.US_EAST_2).build(); + var transferManager = S3TransferManager.builder().s3Client(s3Client).build(); var cloutClient = OcflS3Client.builder() .s3Client(s3Client) + .transferManager(transferManager) .bucket("pwinckles-ocfl") .repoPrefix(prefix) .build(); @@ -293,6 +292,9 @@ public void s3WriteTest() throws InterruptedException { System.out.println("Finished. Waiting for metrics collection..."); TimeUnit.SECONDS.sleep(30); System.out.println("Done"); + + s3Client.close(); + transferManager.close(); } private void runPutTest( @@ -445,10 +447,8 @@ private OcflRepository createFsRepo() { } private MutableOcflRepository createS3Repo() { - var s3Client = S3Client.builder() - .region(Region.US_EAST_2) - .httpClientBuilder(ApacheHttpClient.builder()) - .build(); + var s3Client = S3AsyncClient.crtBuilder().region(Region.US_EAST_2).build(); + var transferManager = S3TransferManager.builder().s3Client(s3Client).build(); var prefix = UUID.randomUUID().toString(); // Note this is NOT using a db, which an S3 setup would normally use return new OcflRepositoryBuilder() @@ -459,6 +459,7 @@ private MutableOcflRepository createS3Repo() { .bucket("pwinckles-ocfl") .repoPrefix(prefix) .s3Client(s3Client) + .transferManager(transferManager) .build()); }) .workDir(UncheckedFiles.createDirectories(tempRoot.resolve("temp"))) diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3BadReposITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3BadReposITest.java index 700a7f7b..06152d87 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3BadReposITest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3BadReposITest.java @@ -17,11 +17,13 @@ import java.util.UUID; import java.util.concurrent.ThreadLocalRandom; import org.apache.commons.lang3.StringUtils; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.extension.RegisterExtension; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.transfer.s3.S3TransferManager; public class S3BadReposITest extends BadReposITest { @@ -33,7 +35,8 @@ public class S3BadReposITest extends BadReposITest { @RegisterExtension public static S3MockExtension S3_MOCK = S3MockExtension.builder().silent().build(); - private static S3Client s3Client; + private static S3AsyncClient s3Client; + private static S3TransferManager transferManager; private static String bucket; private static ComboPooledDataSource dataSource; @@ -53,19 +56,28 @@ public static void beforeAll() { S3BadReposITest.bucket = bucket; } else { LOG.info("Running tests against S3 Mock"); - s3Client = S3_MOCK.createS3ClientV2(); + s3Client = S3ITestHelper.createMockS3Client(S3_MOCK.getServiceEndpoint()); S3BadReposITest.bucket = UUID.randomUUID().toString(); s3Client.createBucket(request -> { - request.bucket(S3BadReposITest.bucket); - }); + request.bucket(S3BadReposITest.bucket); + }) + .join(); } + transferManager = S3TransferManager.builder().s3Client(s3Client).build(); + dataSource = new ComboPooledDataSource(); dataSource.setJdbcUrl(System.getProperty("db.url", "jdbc:h2:mem:test")); dataSource.setUser(System.getProperty("db.user", "")); dataSource.setPassword(System.getProperty("db.password", "")); } + @AfterAll + public static void afterAll() { + s3Client.close(); + transferManager.close(); + } + @Override protected void onBefore() { s3Helper = new S3ITestHelper(s3Client); @@ -119,6 +131,7 @@ private CloudClient createCloudClient(String name) { return OcflS3Client.builder() .s3Client(s3Client) + .transferManager(transferManager) .bucket(bucket) .repoPrefix(prefix(name)) .build(); diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3ITestHelper.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3ITestHelper.java index 193c4ead..d0ba9db2 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3ITestHelper.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3ITestHelper.java @@ -2,12 +2,14 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsInAnyOrder; +import static software.amazon.awssdk.http.SdkHttpConfigurationOption.TRUST_ALL_CERTIFICATES; import io.ocfl.api.model.DigestAlgorithm; import io.ocfl.core.util.DigestUtil; import io.ocfl.core.util.FileUtil; import io.ocfl.itest.ITestHelper; import java.io.IOException; +import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; @@ -16,28 +18,44 @@ import org.junit.jupiter.api.Assertions; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; -import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.core.async.AsyncResponseTransformer; +import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; import software.amazon.awssdk.regions.Region; -import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Configuration; import software.amazon.awssdk.services.s3.model.GetObjectRequest; import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import software.amazon.awssdk.services.s3.model.S3Object; +import software.amazon.awssdk.utils.AttributeMap; public class S3ITestHelper { private static final String OCFL_SPEC_FILE = "ocfl_1.1.md"; - private S3Client s3Client; + private S3AsyncClient s3Client; - public S3ITestHelper(S3Client s3Client) { + public S3ITestHelper(S3AsyncClient s3Client) { this.s3Client = s3Client; } - public static S3Client createS3Client(String accessKey, String secretKey) { - return S3Client.builder() + public static S3AsyncClient createS3Client(String accessKey, String secretKey) { + return S3AsyncClient.crtBuilder() .region(Region.US_EAST_2) .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create(accessKey, secretKey))) - .httpClientBuilder(ApacheHttpClient.builder()) + .build(); + } + + public static S3AsyncClient createMockS3Client(String endpoint) { + return S3AsyncClient.builder() + .endpointOverride(URI.create(endpoint)) + .region(Region.US_EAST_2) + .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create("foo", "bar"))) + .serviceConfiguration( + S3Configuration.builder().pathStyleAccessEnabled(true).build()) + .httpClient(NettyNioAsyncHttpClient.builder() + .buildWithDefaults(AttributeMap.builder() + .put(TRUST_ALL_CERTIFICATES, Boolean.TRUE) + .build())) .build(); } @@ -80,14 +98,14 @@ private List listAllFiles(Path root) { } private byte[] getObjectContent(String bucket, String prefix, String key) { - try (var result = s3Client.getObject(GetObjectRequest.builder() - .bucket(bucket) - .key(prefix + "/" + key) - .build())) { - return result.readAllBytes(); - } catch (IOException e) { - throw new RuntimeException(e); - } + return s3Client.getObject( + GetObjectRequest.builder() + .bucket(bucket) + .key(prefix + "/" + key) + .build(), + AsyncResponseTransformer.toBytes()) + .join() + .asByteArray(); } private String computeS3Digest(String bucket, String prefix, String key) { @@ -95,8 +113,11 @@ private String computeS3Digest(String bucket, String prefix, String key) { } public List listAllObjects(String bucket, String prefix) { - var result = s3Client.listObjectsV2( - ListObjectsV2Request.builder().bucket(bucket).prefix(prefix).build()); + var result = s3Client.listObjectsV2(ListObjectsV2Request.builder() + .bucket(bucket) + .prefix(prefix) + .build()) + .join(); return result.contents().stream() .map(S3Object::key) diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3MutableHeadITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3MutableHeadITest.java index 3bd533d4..0e7b503a 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3MutableHeadITest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3MutableHeadITest.java @@ -21,11 +21,13 @@ import java.util.concurrent.ThreadLocalRandom; import java.util.function.Consumer; import org.apache.commons.lang3.StringUtils; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.extension.RegisterExtension; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.transfer.s3.S3TransferManager; public class S3MutableHeadITest extends MutableHeadITest { @@ -37,7 +39,8 @@ public class S3MutableHeadITest extends MutableHeadITest { @RegisterExtension public static S3MockExtension S3_MOCK = S3MockExtension.builder().silent().build(); - private static S3Client s3Client; + private static S3AsyncClient s3Client; + private static S3TransferManager transferManager; private static String bucket; private static ComboPooledDataSource dataSource; @@ -57,19 +60,28 @@ public static void beforeAll() { S3MutableHeadITest.bucket = bucket; } else { LOG.info("Running tests against S3 Mock"); - s3Client = S3_MOCK.createS3ClientV2(); + s3Client = S3ITestHelper.createMockS3Client(S3_MOCK.getServiceEndpoint()); S3MutableHeadITest.bucket = UUID.randomUUID().toString(); s3Client.createBucket(request -> { - request.bucket(S3MutableHeadITest.bucket); - }); + request.bucket(S3MutableHeadITest.bucket); + }) + .join(); } + transferManager = S3TransferManager.builder().s3Client(s3Client).build(); + dataSource = new ComboPooledDataSource(); dataSource.setJdbcUrl(System.getProperty("db.url", "jdbc:h2:mem:test")); dataSource.setUser(System.getProperty("db.user", "")); dataSource.setPassword(System.getProperty("db.password", "")); } + @AfterAll + public static void afterAll() { + s3Client.close(); + transferManager.close(); + } + @Override protected void onBefore() { s3Helper = new S3ITestHelper(s3Client); @@ -143,6 +155,7 @@ private CloudClient createCloudClient(String name) { return OcflS3Client.builder() .s3Client(s3Client) + .transferManager(transferManager) .bucket(bucket) .repoPrefix(prefix(name)) .build(); diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3OcflITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3OcflITest.java index c1cb73f3..0a1e3c81 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3OcflITest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3OcflITest.java @@ -35,13 +35,15 @@ import java.util.function.Consumer; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; import org.junit.jupiter.api.extension.RegisterExtension; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.transfer.s3.S3TransferManager; public class S3OcflITest extends OcflITest { @@ -57,7 +59,8 @@ public class S3OcflITest extends OcflITest { @RegisterExtension public static S3MockExtension S3_MOCK = S3MockExtension.builder().silent().build(); - private static S3Client s3Client; + private static S3AsyncClient s3Client; + private static S3TransferManager transferManager; private static String bucket; private static ComboPooledDataSource dataSource; @@ -77,19 +80,28 @@ public static void beforeAll() { S3OcflITest.bucket = bucket; } else { LOG.info("Running tests against S3 Mock"); - s3Client = S3_MOCK.createS3ClientV2(); + s3Client = S3ITestHelper.createMockS3Client(S3_MOCK.getServiceEndpoint()); S3OcflITest.bucket = UUID.randomUUID().toString(); s3Client.createBucket(request -> { - request.bucket(S3OcflITest.bucket); - }); + request.bucket(S3OcflITest.bucket); + }) + .join(); } + transferManager = S3TransferManager.builder().s3Client(s3Client).build(); + dataSource = new ComboPooledDataSource(); dataSource.setJdbcUrl(System.getProperty("db.url", "jdbc:h2:mem:test")); dataSource.setUser(System.getProperty("db.user", "")); dataSource.setPassword(System.getProperty("db.password", "")); } + @AfterAll + public static void afterAll() { + s3Client.close(); + transferManager.close(); + } + @Override protected void onBefore() { s3Helper = new S3ITestHelper(s3Client); @@ -276,6 +288,7 @@ private CloudClient createCloudClient(String name) { return OcflS3Client.builder() .s3Client(s3Client) + .transferManager(transferManager) .bucket(bucket) .repoPrefix(prefix(name)) .build(); diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3StorageTest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3StorageTest.java index 36722388..21aef035 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3StorageTest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3StorageTest.java @@ -13,11 +13,14 @@ import java.util.Set; import java.util.UUID; import java.util.concurrent.ThreadLocalRandom; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.extension.RegisterExtension; -import software.amazon.awssdk.core.sync.RequestBody; -import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.core.async.AsyncRequestBody; +import software.amazon.awssdk.core.async.AsyncResponseTransformer; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.transfer.s3.S3TransferManager; public class S3StorageTest extends StorageTest { @@ -27,7 +30,8 @@ public class S3StorageTest extends StorageTest { @RegisterExtension public static S3MockExtension S3_MOCK = S3MockExtension.builder().silent().build(); - private static S3Client s3Client; + private static S3AsyncClient s3Client; + private static S3TransferManager transferManager; private static String bucket; private Set repoPrefixes = new HashSet<>(); @@ -36,11 +40,19 @@ public class S3StorageTest extends StorageTest { @BeforeAll public static void beforeAll() { - s3Client = S3_MOCK.createS3ClientV2(); + s3Client = S3ITestHelper.createMockS3Client(S3_MOCK.getServiceEndpoint()); S3StorageTest.bucket = UUID.randomUUID().toString(); s3Client.createBucket(request -> { - request.bucket(S3StorageTest.bucket); - }); + request.bucket(S3StorageTest.bucket); + }) + .join(); + transferManager = S3TransferManager.builder().s3Client(s3Client).build(); + } + + @AfterAll + public static void afterAll() { + s3Client.close(); + transferManager.close(); } @AfterEach @@ -65,16 +77,20 @@ protected void file(String path) { protected void file(String path, String content) { s3Client.putObject( - request -> { - request.bucket(bucket).key(FileUtil.pathJoinFailEmpty(prefix(name), path)); - }, - RequestBody.fromString(content)); + request -> { + request.bucket(bucket).key(FileUtil.pathJoinFailEmpty(prefix(name), path)); + }, + AsyncRequestBody.fromString(content)) + .join(); } protected String readFile(String path) { - try (var content = s3Client.getObject(request -> { - request.bucket(bucket).key(FileUtil.pathJoinFailEmpty(prefix(name), path)); - })) { + try (var content = s3Client.getObject( + request -> { + request.bucket(bucket).key(FileUtil.pathJoinFailEmpty(prefix(name), path)); + }, + AsyncResponseTransformer.toBlockingInputStream()) + .join()) { return new String(content.readAllBytes()); } catch (IOException e) { throw new UncheckedIOException(e); @@ -86,6 +102,7 @@ private CloudClient createCloudClient(String name) { return OcflS3Client.builder() .s3Client(s3Client) + .transferManager(transferManager) .bucket(bucket) .repoPrefix(prefix(name)) .build(); diff --git a/pom.xml b/pom.xml index 2542e307..1adbb68a 100644 --- a/pom.xml +++ b/pom.xml @@ -344,6 +344,11 @@ pom import + + software.amazon.awssdk.crt + aws-crt + 0.20.5 + From 849eb96e592d66110d5098b41fc6d4cd95c522bb Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Thu, 12 Jan 2023 17:35:18 -0600 Subject: [PATCH 02/21] update docs --- docs/USAGE.md | 27 +++++++++++++++++++ .../main/java/io/ocfl/aws/OcflS3Client.java | 17 +++++++++--- .../io/ocfl/core/OcflRepositoryBuilder.java | 8 ++++-- 3 files changed, 46 insertions(+), 6 deletions(-) diff --git a/docs/USAGE.md b/docs/USAGE.md index 80547088..cacee0c3 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -165,6 +165,33 @@ on large files or objects with lots of files. Additionally, it does not cache any object files locally, requiring them to be retrieved from S3 on every access. +### S3 Transfer Manager + +`ocfl-java` uses the new [S3 Transfer +Manager](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/transfer-manager.html) +to upload and download files from S3. You can configure the transfer +manager to target a specific throughput, based on the needs of your +application. Consult the official documentation for details. + +However, note that it is **crucial** that you configure the transfer +manager to use the new [CRT S3 +client](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/crt-based-s3-client.html). +The CRT client is **required** by the transfer manager in order to +make multipart uploads. + +If you do not specify a transfer manager when constructing the +`OcflS3Client`, then it will create the default transfer manager using +the S3 client it was provided, which, again, should be a CRT client. +When you use the default transfer manager, you need to be sure to +close the `OcflRepository` when you are done with it, otherwise the +transfer manager will not be closed. + +For example, you might construct the S3 client like: + +``` java +S3AsyncClient.crtBuilder().build() +``` + ### Configuration Use `OcflStorageBuilder.builder()` to create and configure an diff --git a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java index 1f949c59..3c72a556 100644 --- a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java +++ b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java @@ -68,8 +68,6 @@ public class OcflS3Client implements CloudClient { private static final Logger LOG = LoggerFactory.getLogger(OcflS3Client.class); - // TODO TM add more notes about CRT client - // TODO TM notes about closing private final S3AsyncClient s3Client; private final S3TransferManager transferManager; private final String bucket; @@ -569,7 +567,12 @@ public static class Builder { private BiConsumer putObjectModifier; /** - * The AWS SDK S3 client. This SHOULD be a CRT client. Required. + * The AWS SDK S3 client. Required. + *

+ * This SHOULD be a {@link CRT client}. + * The reason for this is that the {@link S3TransferManager} requires the CRT client for doing multipart uploads. + *

+ * This client is NOT closed when the repository is closed, and the user is responsible for closing it when appropriate. * * @param s3Client s3 client * @return builder @@ -583,6 +586,9 @@ public Builder s3Client(S3AsyncClient s3Client) { * The AWS SDK S3 transfer manager. This only needs to be specified when you need to set specific settings, and, * if it is specified, it can use the same S3 client as was supplied in {@link #s3Client(S3AsyncClient)}. * Otherwise, when not specified, the default transfer manager is created using the provided S3 Client. + *

+ * When a transfer manager is provided, it will NOT be closed when the repository is closed, and the user is + * responsible for closing it when appropriate. * * @param transferManager S3 transfer manager * @return builder @@ -630,7 +636,10 @@ public Builder putObjectModifier(BiConsumer pu } /** - * Constructs a new OcflS3Client. s3Client and bucket must be set. + * Constructs a new {@link OcflS3Client}. {@link #s3Client(S3AsyncClient)} and {@link #bucket(String)} must be set. + *

+ * Remember to call {@link OcflRepository#close()} when you are done with the repository so that the default + * S3 transfer manager is closed. * * @return OcflS3Client */ diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/OcflRepositoryBuilder.java b/ocfl-java-core/src/main/java/io/ocfl/core/OcflRepositoryBuilder.java index d8b9f085..18729161 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/OcflRepositoryBuilder.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/OcflRepositoryBuilder.java @@ -63,7 +63,7 @@ /** * Constructs a local file system based OCFL repository sensible defaults that can be overridden prior to calling - * build(). + * {@link #build()}. * *

Important: The same OcflRepositoryBuilder instance MUST NOT be used to initialize multiple repositories. */ @@ -86,7 +86,7 @@ public class OcflRepositoryBuilder { /** * Constructs a local file system based OCFL repository sensible defaults that can be overridden prior to calling - * build(). + * {@link #build()}. * *

Important: The same OcflRepositoryBuilder instance MUST NOT be used to initialize multiple repositories. */ @@ -375,6 +375,8 @@ public OcflRepositoryBuilder verifyStaging(boolean verifyStaging) { /** * Constructs an OCFL repository. Brand new repositories are initialized. + *

+ * Remember to call {@link OcflRepository#close()} when you are done with the repository. * * @return OcflRepository */ @@ -384,6 +386,8 @@ public OcflRepository build() { /** * Constructs an OCFL repository that allows the use of the Mutable HEAD Extension. Brand new repositories are initialized. + *

+ * Remember to call {@link OcflRepository#close()} when you are done with the repository. * * @return MutableOcflRepository */ From d3098f72323139f1109f7af09bc04d0fa65d6cc8 Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Thu, 12 Jan 2023 17:54:11 -0600 Subject: [PATCH 03/21] fix javadoc --- ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java index 3c72a556..c1235f77 100644 --- a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java +++ b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java @@ -569,7 +569,7 @@ public static class Builder { /** * The AWS SDK S3 client. Required. *

- * This SHOULD be a {@link CRT client}. + * This SHOULD be a CRT client. * The reason for this is that the {@link S3TransferManager} requires the CRT client for doing multipart uploads. *

* This client is NOT closed when the repository is closed, and the user is responsible for closing it when appropriate. From a6e96afe99dcf664c4ea0e28005be102c074449e Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Mon, 23 Jan 2023 18:02:16 -0600 Subject: [PATCH 04/21] enhance s3 exception code handling --- .../main/java/io/ocfl/aws/OcflS3Client.java | 28 +++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java index c1235f77..bf0d514b 100644 --- a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java +++ b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java @@ -59,6 +59,7 @@ import software.amazon.awssdk.services.s3.model.NoSuchKeyException; import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.S3Exception; import software.amazon.awssdk.transfer.s3.S3TransferManager; /** @@ -243,7 +244,7 @@ public CloudObjectKey copyObject(String srcPath, String dstPath) { copy.completionFuture().join(); } catch (RuntimeException e) { var cause = unwrapCompletionEx(e); - if (cause instanceof NoSuchKeyException) { + if (wasNotFound(cause)) { throw new KeyNotFoundException("Key " + srcKey + " not found in bucket " + bucket, cause); } throw new OcflS3Exception("Failed to copy object from " + srcKey + " to " + dstKey, cause); @@ -269,7 +270,7 @@ public Path downloadFile(String srcPath, Path dstPath) { download.completionFuture().join(); } catch (RuntimeException e) { var cause = unwrapCompletionEx(e); - if (cause instanceof NoSuchKeyException) { + if (wasNotFound(cause)) { throw new KeyNotFoundException("Key " + srcKey + " not found in bucket " + bucket, cause); } throw new OcflS3Exception("Failed to download " + srcKey + " to " + dstPath, cause); @@ -296,7 +297,7 @@ public InputStream downloadStream(String srcPath) { .join(); } catch (RuntimeException e) { var cause = unwrapCompletionEx(e); - if (cause instanceof NoSuchKeyException) { + if (wasNotFound(cause)) { throw new KeyNotFoundException("Key " + srcKey + " not found in bucket " + bucket, cause); } throw new OcflS3Exception("Failed to download " + srcKey, cause); @@ -336,7 +337,7 @@ public HeadResult head(String path) { .setLastModified(s3Result.lastModified()); } catch (RuntimeException e) { var cause = unwrapCompletionEx(e); - if (cause instanceof NoSuchKeyException) { + if (wasNotFound(cause)) { throw new KeyNotFoundException("Key " + key + " not found in bucket " + bucket, cause); } throw new OcflS3Exception("Failed to HEAD " + key, cause); @@ -478,7 +479,7 @@ public boolean bucketExists() { return true; } catch (RuntimeException e) { var cause = unwrapCompletionEx(e); - if (cause instanceof NoSuchBucketException) { + if (wasNotFound(cause)) { return false; } throw new OcflS3Exception("Failed ot HEAD bucket " + bucket, cause); @@ -558,6 +559,23 @@ private Throwable unwrapCompletionEx(RuntimeException e) { return cause; } + /** + * Returns true if the exception indicates the object/bucket was NOT found in S3. + * + * @param e the exception + * @return true if the object/bucket was NOT found in S3. + */ + private boolean wasNotFound(Throwable e) { + if (e instanceof NoSuchKeyException || e instanceof NoSuchBucketException) { + return true; + } else if (e instanceof S3Exception) { + // It seems like the CRT client does not return NoSuchKeyExceptions... + var s3e = (S3Exception) e; + return 404 == s3e.statusCode(); + } + return false; + } + public static class Builder { private S3AsyncClient s3Client; private S3TransferManager transferManager; From e6852df61af8aeb49ed5ba0efe3a43619f740af0 Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Tue, 4 Apr 2023 06:39:04 -0500 Subject: [PATCH 05/21] cap batch deletes at 999 objects --- .../main/java/io/ocfl/aws/OcflS3Client.java | 19 ++++++++++++++----- pom.xml | 2 +- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java index bf0d514b..09337dd3 100644 --- a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java +++ b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java @@ -36,10 +36,12 @@ import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.nio.file.Path; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.Objects; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; import java.util.function.BiConsumer; import java.util.stream.Collectors; @@ -48,7 +50,6 @@ import software.amazon.awssdk.core.async.AsyncRequestBody; import software.amazon.awssdk.core.async.AsyncResponseTransformer; import software.amazon.awssdk.services.s3.S3AsyncClient; -import software.amazon.awssdk.services.s3.model.Delete; import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; import software.amazon.awssdk.services.s3.model.GetObjectRequest; import software.amazon.awssdk.services.s3.model.HeadBucketRequest; @@ -437,10 +438,18 @@ private void deleteObjectsInternal(Collection objectKeys) { .collect(Collectors.toList()); try { - s3Client.deleteObjects(DeleteObjectsRequest.builder() - .bucket(bucket) - .delete(Delete.builder().objects(objectIds).build()) - .build()) + var futures = new ArrayList>(); + + // Can only delete at most 1,000 objects per request + for (int i = 0; i < objectIds.size(); i += 999) { + var toDelete = objectIds.subList(i, Math.min(objectIds.size(), i + 999)); + futures.add(s3Client.deleteObjects(DeleteObjectsRequest.builder() + .bucket(bucket) + .delete(builder -> builder.objects(toDelete)) + .build())); + } + + CompletableFuture.allOf(futures.toArray(new CompletableFuture[] {})) .join(); } catch (RuntimeException e) { throw new OcflS3Exception("Failed to delete objects " + objectIds, unwrapCompletionEx(e)); diff --git a/pom.xml b/pom.xml index 1adbb68a..1158d1e4 100644 --- a/pom.xml +++ b/pom.xml @@ -347,7 +347,7 @@ software.amazon.awssdk.crt aws-crt - 0.20.5 + 0.21.9 From 16fd32daa5122af21c800de32688c87a98ef0152 Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Sun, 30 Apr 2023 20:18:29 -0500 Subject: [PATCH 06/21] make DefaultOcflObjectUpdater thread safe --- .../core/DefaultMutableOcflRepository.java | 26 ++- .../ocfl/core/DefaultOcflObjectUpdater.java | 194 ++++++++++-------- .../io/ocfl/core/DefaultOcflRepository.java | 31 ++- .../main/java/io/ocfl/core/FileLocker.java | 115 +++++++++++ .../io/ocfl/core/OcflRepositoryBuilder.java | 22 +- .../ocfl/core/inventory/AddFileProcessor.java | 63 ++++-- .../ocfl/core/inventory/InventoryUpdater.java | 21 +- .../io/ocfl/core/lock/InMemoryObjectLock.java | 1 + .../io/ocfl/core/util/UncheckedCallable.java | 31 +++ 9 files changed, 367 insertions(+), 137 deletions(-) create mode 100644 ocfl-java-core/src/main/java/io/ocfl/core/FileLocker.java create mode 100644 ocfl-java-core/src/main/java/io/ocfl/core/util/UncheckedCallable.java diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultMutableOcflRepository.java b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultMutableOcflRepository.java index c3763bd2..64d33f76 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultMutableOcflRepository.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultMutableOcflRepository.java @@ -41,6 +41,7 @@ import io.ocfl.core.util.FileUtil; import io.ocfl.core.util.UncheckedFiles; import java.nio.file.Path; +import java.time.Duration; import java.util.function.Consumer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -58,6 +59,12 @@ public class DefaultMutableOcflRepository extends DefaultOcflRepository implemen private static final Logger LOG = LoggerFactory.getLogger(DefaultMutableOcflRepository.class); + private final OcflConfig config; + private final Duration fileLockTimeoutDuration; + private final OcflStorage storage; + private final Path workDir; + private final ObjectLock objectLock; + /** * @see OcflRepositoryBuilder * @@ -69,6 +76,7 @@ public class DefaultMutableOcflRepository extends DefaultOcflRepository implemen * @param contentPathConstraintProcessor content path constraint processor * @param config ocfl defaults configuration * @param verifyStaging true if the contents of a stage version should be double-checked + * @param fileLockTimeoutDuration the max amount of time to wait for a file lock */ public DefaultMutableOcflRepository( OcflStorage storage, @@ -78,7 +86,8 @@ public DefaultMutableOcflRepository( LogicalPathMapper logicalPathMapper, ContentPathConstraintProcessor contentPathConstraintProcessor, OcflConfig config, - boolean verifyStaging) { + boolean verifyStaging, + Duration fileLockTimeoutDuration) { super( storage, workDir, @@ -87,7 +96,14 @@ public DefaultMutableOcflRepository( logicalPathMapper, contentPathConstraintProcessor, config, - verifyStaging); + verifyStaging, + fileLockTimeoutDuration); + this.storage = Enforce.notNull(storage, "storage cannot be null"); + this.workDir = Enforce.notNull(workDir, "workDir cannot be null"); + this.objectLock = Enforce.notNull(objectLock, "objectLock cannot be null"); + this.config = Enforce.notNull(config, "config cannot be null"); + this.fileLockTimeoutDuration = + Enforce.notNull(fileLockTimeoutDuration, "fileLockTimeoutDuration cannot be null"); } /** @@ -118,9 +134,11 @@ public ObjectVersionId stageChanges( .getParent(); var inventoryUpdater = inventoryUpdaterBuilder.buildCopyStateMutable(inventory); + var fileLocker = new FileLocker(fileLockTimeoutDuration); var addFileProcessor = - addFileProcessorBuilder.build(inventoryUpdater, contentDir, inventory.getDigestAlgorithm()); - var updater = new DefaultOcflObjectUpdater(inventory, inventoryUpdater, contentDir, addFileProcessor); + addFileProcessorBuilder.build(inventoryUpdater, fileLocker, contentDir, inventory.getDigestAlgorithm()); + var updater = + new DefaultOcflObjectUpdater(inventory, inventoryUpdater, contentDir, addFileProcessor, fileLocker); try { objectUpdater.accept(updater); diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflObjectUpdater.java b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflObjectUpdater.java index be7f4fc9..6797e7c6 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflObjectUpdater.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflObjectUpdater.java @@ -45,16 +45,14 @@ import java.nio.file.Paths; import java.nio.file.StandardCopyOption; import java.security.DigestInputStream; -import java.util.HashMap; import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Default implementation of OcflObjectUpdater that is used by DefaultOcflRepository to provide write access to an object. - * - *

This class is NOT thread safe. */ public class DefaultOcflObjectUpdater implements OcflObjectUpdater { @@ -64,20 +62,21 @@ public class DefaultOcflObjectUpdater implements OcflObjectUpdater { private final InventoryUpdater inventoryUpdater; private final Path stagingDir; private final AddFileProcessor addFileProcessor; - + private final FileLocker fileLocker; private final Map stagedFileMap; public DefaultOcflObjectUpdater( Inventory inventory, InventoryUpdater inventoryUpdater, Path stagingDir, - AddFileProcessor addFileProcessor) { + AddFileProcessor addFileProcessor, + FileLocker fileLocker) { this.inventory = Enforce.notNull(inventory, "inventory cannot be null"); this.inventoryUpdater = Enforce.notNull(inventoryUpdater, "inventoryUpdater cannot be null"); this.stagingDir = Enforce.notNull(stagingDir, "stagingDir cannot be null"); this.addFileProcessor = Enforce.notNull(addFileProcessor, "addFileProcessor cannot be null"); - - this.stagedFileMap = new HashMap<>(); + this.fileLocker = Enforce.notNull(fileLocker, "fileLocker cannot be null"); + this.stagedFileMap = new ConcurrentHashMap<>(); } @Override @@ -129,51 +128,53 @@ public OcflObjectUpdater writeFile(InputStream input, String destinationPath, Oc Enforce.notNull(input, "input cannot be null"); Enforce.notBlank(destinationPath, "destinationPath cannot be blank"); - LOG.debug("Write stream to object <{}> at logical path <{}>", inventory.getId(), destinationPath); + return fileLocker.withLock(destinationPath, () -> { + LOG.debug("Write stream to object <{}> at logical path <{}>", inventory.getId(), destinationPath); - var stagingFullPath = stagingFullPath(inventoryUpdater.innerContentPath(destinationPath)); + var stagingFullPath = stagingFullPath(inventoryUpdater.innerContentPath(destinationPath)); - var digestInput = wrapInDigestInputStream(input); - LOG.debug("Writing input stream to: {}", stagingFullPath); - if (Files.notExists(stagingFullPath.getParent())) { - UncheckedFiles.createDirectories(stagingFullPath.getParent()); - } - UncheckedFiles.copy(digestInput, stagingFullPath, StandardCopyOption.REPLACE_EXISTING); - - if (input instanceof FixityCheckInputStream) { - try { - ((FixityCheckInputStream) input).checkFixity(); - } catch (FixityCheckException e) { - FileUtil.safeDelete(stagingFullPath); - FileUtil.deleteDirAndParentsIfEmpty(stagingFullPath.getParent(), stagingDir); - throw e; + var digestInput = wrapInDigestInputStream(input); + LOG.debug("Writing input stream to: {}", stagingFullPath); + if (Files.notExists(stagingFullPath.getParent())) { + UncheckedFiles.createDirectories(stagingFullPath.getParent()); + } + UncheckedFiles.copy(digestInput, stagingFullPath, StandardCopyOption.REPLACE_EXISTING); + + if (input instanceof FixityCheckInputStream) { + try { + ((FixityCheckInputStream) input).checkFixity(); + } catch (FixityCheckException e) { + FileUtil.safeDelete(stagingFullPath); + FileUtil.deleteDirAndParentsIfEmpty(stagingFullPath.getParent(), stagingDir); + throw e; + } } - } - String digest; + String digest; - if (digestInput instanceof FixityCheckInputStream) { - digest = ((FixityCheckInputStream) digestInput) - .getActualDigestValue() - .get(); - } else { - digest = Bytes.wrap(digestInput.getMessageDigest().digest()).encodeHex(); - } + if (digestInput instanceof FixityCheckInputStream) { + digest = ((FixityCheckInputStream) digestInput) + .getActualDigestValue() + .get(); + } else { + digest = Bytes.wrap(digestInput.getMessageDigest().digest()).encodeHex(); + } - var result = inventoryUpdater.addFile(digest, destinationPath, options); + var result = inventoryUpdater.addFile(digest, destinationPath, options); - if (!result.isNew()) { - LOG.debug( - "Deleting file <{}> because a file with same digest <{}> is already present in the object", - stagingFullPath, - digest); - UncheckedFiles.delete(stagingFullPath); - FileUtil.deleteDirAndParentsIfEmpty(stagingFullPath.getParent(), stagingDir); - } else { - stagedFileMap.put(destinationPath, stagingFullPath); - } + if (!result.isNew()) { + LOG.debug( + "Deleting file <{}> because a file with same digest <{}> is already present in the object", + stagingFullPath, + digest); + UncheckedFiles.delete(stagingFullPath); + FileUtil.deleteDirAndParentsIfEmpty(stagingFullPath.getParent(), stagingDir); + } else { + stagedFileMap.put(destinationPath, stagingFullPath); + } - return this; + return this; + }); } /** @@ -183,12 +184,14 @@ public OcflObjectUpdater writeFile(InputStream input, String destinationPath, Oc public OcflObjectUpdater removeFile(String path) { Enforce.notBlank(path, "path cannot be blank"); - LOG.debug("Remove <{}> from object <{}>", path, inventory.getId()); + return fileLocker.withLock(path, () -> { + LOG.debug("Remove <{}> from object <{}>", path, inventory.getId()); - var results = inventoryUpdater.removeFile(path); - removeUnneededStagedFiles(results); + var results = inventoryUpdater.removeFile(path); + removeUnneededStagedFiles(results); - return this; + return this; + }); } /** @@ -199,12 +202,23 @@ public OcflObjectUpdater renameFile(String sourcePath, String destinationPath, O Enforce.notBlank(sourcePath, "sourcePath cannot be blank"); Enforce.notBlank(destinationPath, "destinationPath cannot be blank"); - LOG.debug("Rename file in object <{}> from <{}> to <{}>", inventory.getId(), sourcePath, destinationPath); + var lock1 = fileLocker.lock(sourcePath); + try { + var lock2 = fileLocker.lock(destinationPath); + try { + LOG.debug( + "Rename file in object <{}> from <{}> to <{}>", inventory.getId(), sourcePath, destinationPath); - var results = inventoryUpdater.renameFile(sourcePath, destinationPath, options); - removeUnneededStagedFiles(results); + var results = inventoryUpdater.renameFile(sourcePath, destinationPath, options); + removeUnneededStagedFiles(results); - return this; + return this; + } finally { + lock2.unlock(); + } + } finally { + lock1.unlock(); + } } /** @@ -217,12 +231,14 @@ public OcflObjectUpdater reinstateFile( Enforce.notBlank(sourcePath, "sourcePath cannot be blank"); Enforce.notBlank(destinationPath, "destinationPath cannot be blank"); - LOG.debug("Reinstate file at <{}> in object <{}> to <{}>", sourcePath, sourceVersionNum, destinationPath); + return fileLocker.withLock(destinationPath, () -> { + LOG.debug("Reinstate file at <{}> in object <{}> to <{}>", sourcePath, sourceVersionNum, destinationPath); - var results = inventoryUpdater.reinstateFile(sourceVersionNum, sourcePath, destinationPath, options); - removeUnneededStagedFiles(results); + var results = inventoryUpdater.reinstateFile(sourceVersionNum, sourcePath, destinationPath, options); + removeUnneededStagedFiles(results); - return this; + return this; + }); } /** @@ -244,47 +260,49 @@ public OcflObjectUpdater addFileFixity(String logicalPath, DigestAlgorithm algor Enforce.notNull(algorithm, "algorithm cannot be null"); Enforce.notBlank(value, "value cannot be null"); - LOG.debug( - "Add file fixity for file <{}> in object <{}>: Algorithm: {}; Value: {}", - logicalPath, - inventory.getId(), - algorithm.getOcflName(), - value); + return fileLocker.withLock(logicalPath, () -> { + LOG.debug( + "Add file fixity for file <{}> in object <{}>: Algorithm: {}; Value: {}", + logicalPath, + inventory.getId(), + algorithm.getOcflName(), + value); - var digest = inventoryUpdater.getFixityDigest(logicalPath, algorithm); - var alreadyExists = true; + var digest = inventoryUpdater.getFixityDigest(logicalPath, algorithm); + var alreadyExists = true; - if (digest == null) { - alreadyExists = false; + if (digest == null) { + alreadyExists = false; - if (!stagedFileMap.containsKey(logicalPath)) { - throw new OcflInputException(String.format( - "%s was not newly added in this update. Fixity information can only be added on new files.", - logicalPath)); - } + if (!algorithm.hasJavaStandardName()) { + throw new OcflInputException( + "The specified digest algorithm is not mapped to a Java name: " + algorithm); + } - if (!algorithm.hasJavaStandardName()) { - throw new OcflInputException( - "The specified digest algorithm is not mapped to a Java name: " + algorithm); - } + var file = stagedFileMap.get(logicalPath); - var file = stagedFileMap.get(logicalPath); + if (file == null) { + throw new OcflInputException(String.format( + "%s was not newly added in this update. Fixity information can only be added on new files.", + logicalPath)); + } - LOG.debug("Computing {} hash of {}", algorithm.getJavaStandardName(), file); - digest = DigestUtil.computeDigestHex(algorithm, file); - } + LOG.debug("Computing {} hash of {}", algorithm.getJavaStandardName(), file); + digest = DigestUtil.computeDigestHex(algorithm, file); + } - if (!value.equalsIgnoreCase(digest)) { - throw new FixityCheckException(String.format( - "Expected %s digest of %s to be %s, but was %s.", - algorithm.getJavaStandardName(), logicalPath, value, digest)); - } + if (!value.equalsIgnoreCase(digest)) { + throw new FixityCheckException(String.format( + "Expected %s digest of %s to be %s, but was %s.", + algorithm.getJavaStandardName(), logicalPath, value, digest)); + } - if (!alreadyExists) { - inventoryUpdater.addFixity(logicalPath, algorithm, digest); - } + if (!alreadyExists) { + inventoryUpdater.addFixity(logicalPath, algorithm, digest); + } - return this; + return this; + }); } /** diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflRepository.java b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflRepository.java index 62c0d97c..4ff35bd9 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflRepository.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflRepository.java @@ -72,6 +72,7 @@ import java.nio.file.Path; import java.security.DigestOutputStream; import java.time.Clock; +import java.time.Duration; import java.time.OffsetDateTime; import java.util.HashMap; import java.util.HashSet; @@ -93,17 +94,17 @@ public class DefaultOcflRepository implements OcflRepository { private static final Logger LOG = LoggerFactory.getLogger(DefaultOcflRepository.class); + private final OcflConfig config; private final boolean verifyStaging; - protected final OcflStorage storage; - protected final InventoryMapper inventoryMapper; - protected final Path workDir; - protected final ObjectLock objectLock; - protected final ResponseMapper responseMapper; + private final Duration fileLockTimeoutDuration; + private final OcflStorage storage; + private final InventoryMapper inventoryMapper; + private final Path workDir; + private final ObjectLock objectLock; + private final ResponseMapper responseMapper; protected final InventoryUpdater.Builder inventoryUpdaterBuilder; protected final AddFileProcessor.Builder addFileProcessorBuilder; - protected final OcflConfig config; - private Clock clock; private final AtomicBoolean closed = new AtomicBoolean(false); @@ -119,6 +120,7 @@ public class DefaultOcflRepository implements OcflRepository { * @param contentPathConstraintProcessor content path constraint processor * @param config ocfl defaults configuration * @param verifyStaging true if the contents of a stage version should be double-checked + * @param fileLockTimeoutDuration the max amount of time to wait for a file lock */ public DefaultOcflRepository( OcflStorage storage, @@ -128,13 +130,16 @@ public DefaultOcflRepository( LogicalPathMapper logicalPathMapper, ContentPathConstraintProcessor contentPathConstraintProcessor, OcflConfig config, - boolean verifyStaging) { + boolean verifyStaging, + Duration fileLockTimeoutDuration) { this.storage = Enforce.notNull(storage, "storage cannot be null"); this.workDir = Enforce.notNull(workDir, "workDir cannot be null"); this.objectLock = Enforce.notNull(objectLock, "objectLock cannot be null"); this.inventoryMapper = Enforce.notNull(inventoryMapper, "inventoryMapper cannot be null"); this.config = Enforce.notNull(config, "config cannot be null"); this.verifyStaging = verifyStaging; + this.fileLockTimeoutDuration = + Enforce.notNull(fileLockTimeoutDuration, "fileLockTimeoutDuration cannot be null"); inventoryUpdaterBuilder = InventoryUpdater.builder() .contentPathMapperBuilder(ContentPathMapper.builder() @@ -170,7 +175,9 @@ public ObjectVersionId putObject( var stagingDir = createStagingDir(objectVersionId.getObjectId()); var contentDir = createStagingContentDir(inventory, stagingDir); - var fileProcessor = addFileProcessorBuilder.build(inventoryUpdater, contentDir, inventory.getDigestAlgorithm()); + var fileLocker = new FileLocker(fileLockTimeoutDuration); + var fileProcessor = + addFileProcessorBuilder.build(inventoryUpdater, fileLocker, contentDir, inventory.getDigestAlgorithm()); fileProcessor.processPath(path, options); var upgrade = inventoryUpdater.upgradeInventory(config); @@ -206,9 +213,11 @@ public ObjectVersionId updateObject( var contentDir = createStagingContentDir(inventory, stagingDir); var inventoryUpdater = inventoryUpdaterBuilder.buildCopyState(inventory); + var fileLocker = new FileLocker(fileLockTimeoutDuration); var addFileProcessor = - addFileProcessorBuilder.build(inventoryUpdater, contentDir, inventory.getDigestAlgorithm()); - var updater = new DefaultOcflObjectUpdater(inventory, inventoryUpdater, contentDir, addFileProcessor); + addFileProcessorBuilder.build(inventoryUpdater, fileLocker, contentDir, inventory.getDigestAlgorithm()); + var updater = + new DefaultOcflObjectUpdater(inventory, inventoryUpdater, contentDir, addFileProcessor, fileLocker); try { objectUpdater.accept(updater); diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/FileLocker.java b/ocfl-java-core/src/main/java/io/ocfl/core/FileLocker.java new file mode 100644 index 00000000..d5f2d311 --- /dev/null +++ b/ocfl-java-core/src/main/java/io/ocfl/core/FileLocker.java @@ -0,0 +1,115 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2019 University of Wisconsin Board of Regents + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package io.ocfl.core; + +import io.ocfl.api.exception.LockException; +import io.ocfl.api.util.Enforce; +import io.ocfl.core.util.UncheckedCallable; +import java.time.Duration; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.ReentrantLock; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Provides locks for logical paths, so that an object may be safely modified by multiple threads. + */ +public class FileLocker { + + private static final Logger log = LoggerFactory.getLogger(FileLocker.class); + + private final Map locks; + private final long timeoutMillis; + + /** + * @param timeoutDuration the max amount of time to wait for a file lock + */ + public FileLocker(Duration timeoutDuration) { + this.timeoutMillis = Enforce.notNull(timeoutDuration, "timeoutDuration cannot be null") + .toMillis(); + locks = new ConcurrentHashMap<>(); + } + + /** + * Returns a lock on the specified logical path or throws a {@link LockException} if a lock was unable to be + * acquired. This lock MUST be released in a finally block. + * + * @param logicalPath the path to lock + * @return the lock + * @throws LockException when unable to acquire a lock + */ + public ReentrantLock lock(String logicalPath) { + var lock = locks.computeIfAbsent(logicalPath, k -> new ReentrantLock()); + log.debug("Acquiring lock on {}", logicalPath); + boolean acquired; + try { + acquired = lock.tryLock(timeoutMillis, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new LockException("Failed to acquire lock on file " + logicalPath, e); + } + if (acquired) { + log.debug("Acquired lock on {}", logicalPath); + return lock; + } else { + throw new LockException("Failed to acquire lock on file " + logicalPath); + } + } + + /** + * Executes the runnable after acquire a lock on the specified logical path. If the lock cannot be acquired, + * a {@link LockException} is thrown. + * + * @param logicalPath the path to lock + * @throws LockException when unable to acquire a lock + */ + public void withLock(String logicalPath, Runnable runnable) { + var lock = lock(logicalPath); + try { + runnable.run(); + } finally { + lock.unlock(); + } + } + + /** + * Executes the callable after acquire a lock on the specified logical path. If the lock cannot be acquired, + * a {@link LockException} is thrown. + * + * @param logicalPath the path to lock + * @return the output of the callable + * @throws LockException when unable to acquire a lock + */ + public T withLock(String logicalPath, UncheckedCallable callable) { + var lock = lock(logicalPath); + try { + return callable.call(); + } finally { + lock.unlock(); + } + } +} diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/OcflRepositoryBuilder.java b/ocfl-java-core/src/main/java/io/ocfl/core/OcflRepositoryBuilder.java index 18729161..5d84ed39 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/OcflRepositoryBuilder.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/OcflRepositoryBuilder.java @@ -74,6 +74,7 @@ public class OcflRepositoryBuilder { private OcflExtensionConfig defaultLayoutConfig; private Path workDir; private boolean verifyStaging; + private Duration fileLockTimeoutDuration; private ObjectLock objectLock; private Cache inventoryCache; @@ -103,6 +104,7 @@ public OcflRepositoryBuilder() { unsupportedBehavior = UnsupportedExtensionBehavior.FAIL; ignoreUnsupportedExtensions = Collections.emptySet(); verifyStaging = true; + fileLockTimeoutDuration = Duration.ofMinutes(1); } /** @@ -373,6 +375,20 @@ public OcflRepositoryBuilder verifyStaging(boolean verifyStaging) { return this; } + /** + * Configures the max amount of time to wait for a file lock when updating an object from multiple threads. This + * only matters if you concurrently write files to the same object, and can otherwise be ignored. The default + * timeout is 1 minute. + * + * @param fileLockTimeoutDuration the max amount of time to wait for a file lock + * @return builder + */ + public OcflRepositoryBuilder fileLockTimeoutDuration(Duration fileLockTimeoutDuration) { + this.fileLockTimeoutDuration = + Enforce.notNull(fileLockTimeoutDuration, "fileLockTimeoutDuration cannot be null"); + return this; + } + /** * Constructs an OCFL repository. Brand new repositories are initialized. *

@@ -422,7 +438,8 @@ private T buildInternal(Class clazz) { logicalPathMapper, contentPathConstraintProcessor, config, - verifyStaging)); + verifyStaging, + fileLockTimeoutDuration)); } return clazz.cast(new DefaultOcflRepository( @@ -433,7 +450,8 @@ private T buildInternal(Class clazz) { logicalPathMapper, contentPathConstraintProcessor, config, - verifyStaging)); + verifyStaging, + fileLockTimeoutDuration)); } private OcflStorage cache(OcflStorage storage) { diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/inventory/AddFileProcessor.java b/ocfl-java-core/src/main/java/io/ocfl/core/inventory/AddFileProcessor.java index ee31544b..b8e4349b 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/inventory/AddFileProcessor.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/inventory/AddFileProcessor.java @@ -29,6 +29,7 @@ import io.ocfl.api.exception.OcflIOException; import io.ocfl.api.model.DigestAlgorithm; import io.ocfl.api.util.Enforce; +import io.ocfl.core.FileLocker; import io.ocfl.core.util.DigestUtil; import io.ocfl.core.util.FileUtil; import io.ocfl.core.util.UncheckedFiles; @@ -41,9 +42,10 @@ import java.nio.file.StandardCopyOption; import java.nio.file.StandardOpenOption; import java.security.DigestOutputStream; -import java.security.MessageDigest; +import java.util.ArrayList; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.locks.ReentrantLock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -55,9 +57,9 @@ public class AddFileProcessor { private static final Logger LOG = LoggerFactory.getLogger(AddFileProcessor.class); private final InventoryUpdater inventoryUpdater; + private final FileLocker fileLocker; private final Path stagingDir; private final DigestAlgorithm digestAlgorithm; - private final MessageDigest messageDigest; public static Builder builder() { return new Builder(); @@ -66,8 +68,11 @@ public static Builder builder() { public static class Builder { public AddFileProcessor build( - InventoryUpdater inventoryUpdater, Path stagingDir, DigestAlgorithm digestAlgorithm) { - return new AddFileProcessor(inventoryUpdater, stagingDir, digestAlgorithm); + InventoryUpdater inventoryUpdater, + FileLocker fileLocker, + Path stagingDir, + DigestAlgorithm digestAlgorithm) { + return new AddFileProcessor(inventoryUpdater, fileLocker, stagingDir, digestAlgorithm); } } @@ -78,11 +83,15 @@ public AddFileProcessor build( * @param stagingDir the staging directory to move files into * @param digestAlgorithm the digest algorithm */ - public AddFileProcessor(InventoryUpdater inventoryUpdater, Path stagingDir, DigestAlgorithm digestAlgorithm) { + public AddFileProcessor( + InventoryUpdater inventoryUpdater, + FileLocker fileLocker, + Path stagingDir, + DigestAlgorithm digestAlgorithm) { this.inventoryUpdater = Enforce.notNull(inventoryUpdater, "inventoryUpdater cannot be null"); + this.fileLocker = Enforce.notNull(fileLocker, "fileLocker cannot be null"); this.stagingDir = Enforce.notNull(stagingDir, "stagingDir cannot be null"); this.digestAlgorithm = Enforce.notNull(digestAlgorithm, "digestAlgorithm cannot be null"); - this.messageDigest = digestAlgorithm.getMessageDigest(); } /** @@ -110,15 +119,20 @@ public Map processPath(Path sourcePath, String destinationPath, Oc var results = new HashMap(); var optionsSet = OcflOption.toSet(options); + var isMove = optionsSet.contains(OcflOption.MOVE_SOURCE); var destination = destinationPath(destinationPath, sourcePath); + var messageDigest = digestAlgorithm.getMessageDigest(); + var locks = new ArrayList(); try (var paths = Files.find( sourcePath, Integer.MAX_VALUE, (file, attrs) -> attrs.isRegularFile(), FileVisitOption.FOLLOW_LINKS)) { - paths.forEach(file -> { + for (var it = paths.iterator(); it.hasNext(); ) { + var file = it.next(); messageDigest.reset(); var logicalPath = logicalPath(sourcePath, file, destination); + locks.add(fileLocker.lock(logicalPath)); - if (optionsSet.contains(OcflOption.MOVE_SOURCE)) { + if (isMove) { var digest = DigestUtil.computeDigestHex(messageDigest, file); var result = inventoryUpdater.addFile(digest, logicalPath, options); @@ -167,12 +181,14 @@ public Map processPath(Path sourcePath, String destinationPath, Oc FileUtil.deleteDirAndParentsIfEmpty(stagingFullPath.getParent(), stagingDir); } } - }); + } } catch (IOException e) { throw new OcflIOException(e); + } finally { + locks.forEach(ReentrantLock::unlock); } - if (optionsSet.contains(OcflOption.MOVE_SOURCE)) { + if (isMove) { // Cleanup empty dirs FileUtil.safeDeleteDirectory(sourcePath); } @@ -205,23 +221,26 @@ public Map processFileWithDigest( var destination = destinationPath(destinationPath, sourcePath); var logicalPath = logicalPath(sourcePath, sourcePath, destination); - var result = inventoryUpdater.addFile(digest, logicalPath, options); - if (result.isNew()) { - var stagingFullPath = stagingFullPath(result.getPathUnderContentDir()); + return fileLocker.withLock(logicalPath, () -> { + var result = inventoryUpdater.addFile(digest, logicalPath, options); - results.put(logicalPath, stagingFullPath); + if (result.isNew()) { + var stagingFullPath = stagingFullPath(result.getPathUnderContentDir()); - if (optionsSet.contains(OcflOption.MOVE_SOURCE)) { - LOG.debug("Moving file <{}> to <{}>", sourcePath, stagingFullPath); - FileUtil.moveFileMakeParents(sourcePath, stagingFullPath, StandardCopyOption.REPLACE_EXISTING); - } else { - LOG.debug("Copying file <{}> to <{}>", sourcePath, stagingFullPath); - FileUtil.copyFileMakeParents(sourcePath, stagingFullPath, StandardCopyOption.REPLACE_EXISTING); + results.put(logicalPath, stagingFullPath); + + if (optionsSet.contains(OcflOption.MOVE_SOURCE)) { + LOG.debug("Moving file <{}> to <{}>", sourcePath, stagingFullPath); + FileUtil.moveFileMakeParents(sourcePath, stagingFullPath, StandardCopyOption.REPLACE_EXISTING); + } else { + LOG.debug("Copying file <{}> to <{}>", sourcePath, stagingFullPath); + FileUtil.copyFileMakeParents(sourcePath, stagingFullPath, StandardCopyOption.REPLACE_EXISTING); + } } - } - return results; + return results; + }); } private String destinationPath(String path, Path sourcePath) { diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/inventory/InventoryUpdater.java b/ocfl-java-core/src/main/java/io/ocfl/core/inventory/InventoryUpdater.java index 8cb93976..4f6127a3 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/inventory/InventoryUpdater.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/inventory/InventoryUpdater.java @@ -186,7 +186,7 @@ private InventoryUpdater( * @param versionInfo information about the version * @return new inventory */ - public Inventory buildNewInventory(OffsetDateTime createdTimestamp, VersionInfo versionInfo) { + public synchronized Inventory buildNewInventory(OffsetDateTime createdTimestamp, VersionInfo versionInfo) { return inventoryBuilder .addHeadVersion(versionBuilder .versionInfo(versionInfo) @@ -202,7 +202,7 @@ public Inventory buildNewInventory(OffsetDateTime createdTimestamp, VersionInfo * @param config the OCFL configuration * @return true if the inventory is upgraded; false otherwise */ - public boolean upgradeInventory(OcflConfig config) { + public synchronized boolean upgradeInventory(OcflConfig config) { if (config.isUpgradeObjectsOnWrite() && inventoryBuilder.getType().compareTo(config.getOcflVersion().getInventoryType()) < 0) { inventoryBuilder.type(config.getOcflVersion().getInventoryType()); @@ -219,7 +219,7 @@ public boolean upgradeInventory(OcflConfig config) { * @param options options * @return details about the file if it was added to the manifest */ - public AddFileResult addFile(String fileId, String logicalPath, OcflOption... options) { + public synchronized AddFileResult addFile(String fileId, String logicalPath, OcflOption... options) { logicalPathConstraints.apply(logicalPath); overwriteProtection(logicalPath, options); @@ -260,7 +260,7 @@ public String innerContentPath(String logicalPath) { * @param algorithm algorithm used to calculate the digest * @param digest the digest value */ - public void addFixity(String logicalPath, DigestAlgorithm algorithm, String digest) { + public synchronized void addFixity(String logicalPath, DigestAlgorithm algorithm, String digest) { if (algorithm.equals(inventory.getDigestAlgorithm())) { return; } @@ -281,7 +281,7 @@ public void addFixity(String logicalPath, DigestAlgorithm algorithm, String dige * @param algorithm the digest algorithm * @return the digest or null */ - public String getFixityDigest(String logicalPath, DigestAlgorithm algorithm) { + public synchronized String getFixityDigest(String logicalPath, DigestAlgorithm algorithm) { if (inventory.getDigestAlgorithm().equals(algorithm)) { return versionBuilder.getFileId(logicalPath); } @@ -299,7 +299,7 @@ public String getFixityDigest(String logicalPath, DigestAlgorithm algorithm) { /** * Removes all entries from the fixity block. */ - public void clearFixity() { + public synchronized void clearFixity() { inventoryBuilder.clearFixity(); } @@ -310,7 +310,7 @@ public void clearFixity() { * @param logicalPath logical path to the file * @return files that were removed from the manifest */ - public Set removeFile(String logicalPath) { + public synchronized Set removeFile(String logicalPath) { var fileId = versionBuilder.removeLogicalPath(logicalPath); return removeFileFromManifestWithResults(fileId); } @@ -325,7 +325,8 @@ public Set removeFile(String logicalPath) { * @param options options * @return files that were removed from the manifest */ - public Set renameFile(String srcLogicalPath, String dstLogicalPath, OcflOption... options) { + public synchronized Set renameFile( + String srcLogicalPath, String dstLogicalPath, OcflOption... options) { logicalPathConstraints.apply(dstLogicalPath); var srcDigest = versionBuilder.getFileId(srcLogicalPath); @@ -358,7 +359,7 @@ public Set renameFile(String srcLogicalPath, String dstLogical * @param options options * @return files that were removed from the manifest */ - public Set reinstateFile( + public synchronized Set reinstateFile( VersionNum sourceVersion, String srcLogicalPath, String dstLogicalPath, OcflOption... options) { logicalPathConstraints.apply(dstLogicalPath); @@ -383,7 +384,7 @@ public Set reinstateFile( /** * Removes all of the files from the version's state. */ - public void clearState() { + public synchronized void clearState() { var state = new HashSet<>(versionBuilder.getInvertedState().keySet()); state.forEach(this::removeFile); } diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/lock/InMemoryObjectLock.java b/ocfl-java-core/src/main/java/io/ocfl/core/lock/InMemoryObjectLock.java index 6decedcb..84e99155 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/lock/InMemoryObjectLock.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/lock/InMemoryObjectLock.java @@ -95,6 +95,7 @@ private T doInLock(String objectId, Lock lock, Callable doInLock) { throw new LockException("Failed to acquire lock for object " + objectId); } } catch (InterruptedException e) { + Thread.currentThread().interrupt(); throw new LockException(e); } } diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/util/UncheckedCallable.java b/ocfl-java-core/src/main/java/io/ocfl/core/util/UncheckedCallable.java new file mode 100644 index 00000000..e1de42ac --- /dev/null +++ b/ocfl-java-core/src/main/java/io/ocfl/core/util/UncheckedCallable.java @@ -0,0 +1,31 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2019 University of Wisconsin Board of Regents + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package io.ocfl.core.util; + +@FunctionalInterface +public interface UncheckedCallable { + + V call(); +} From 75d4104aaefddc8307000452b96c42772ee1307d Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Sun, 30 Apr 2023 20:20:46 -0500 Subject: [PATCH 07/21] parallelize s3 writes --- .../main/java/io/ocfl/aws/OcflS3Client.java | 70 ++++++++------- .../src/main/java/io/ocfl/aws/OcflS3Util.java | 47 ++++++++++ .../main/java/io/ocfl/aws/UploadFuture.java | 87 +++++++++++++++++++ .../ocfl/core/storage/cloud/CloudClient.java | 19 ++++ .../ocfl/core/storage/cloud/CloudStorage.java | 49 +++++++++-- 5 files changed, 231 insertions(+), 41 deletions(-) create mode 100644 ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Util.java create mode 100644 ocfl-java-aws/src/main/java/io/ocfl/aws/UploadFuture.java diff --git a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java index 09337dd3..abab1199 100644 --- a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java +++ b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java @@ -42,7 +42,8 @@ import java.util.List; import java.util.Objects; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; import java.util.function.BiConsumer; import java.util.stream.Collectors; import org.slf4j.Logger; @@ -168,15 +169,15 @@ public String prefix() { * {@inheritDoc} */ @Override - public CloudObjectKey uploadFile(Path srcPath, String dstPath) { - return uploadFile(srcPath, dstPath, null); + public Future uploadFileAsync(Path srcPath, String dstPath) { + return uploadFileAsync(srcPath, dstPath, null); } /** * {@inheritDoc} */ @Override - public CloudObjectKey uploadFile(Path srcPath, String dstPath, String contentType) { + public Future uploadFileAsync(Path srcPath, String dstPath, String contentType) { var fileSize = UncheckedFiles.size(srcPath); var dstKey = keyBuilder.buildFromPath(dstPath); @@ -193,13 +194,31 @@ public CloudObjectKey uploadFile(Path srcPath, String dstPath, String contentTyp .build()) .build()); + return new UploadFuture(upload, srcPath, dstKey); + } + + /** + * {@inheritDoc} + */ + @Override + public CloudObjectKey uploadFile(Path srcPath, String dstPath) { + return uploadFile(srcPath, dstPath, null); + } + + /** + * {@inheritDoc} + */ + @Override + public CloudObjectKey uploadFile(Path srcPath, String dstPath, String contentType) { + var future = uploadFileAsync(srcPath, dstPath, contentType); try { - upload.completionFuture().join(); - } catch (RuntimeException e) { - throw new OcflS3Exception("Failed to upload " + srcPath + " to " + dstKey, unwrapCompletionEx(e)); + return future.get(); + } catch (ExecutionException e) { + throw (RuntimeException) e.getCause(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new OcflS3Exception("Failed ot upload " + srcPath, e); } - - return dstKey; } /** @@ -218,7 +237,7 @@ public CloudObjectKey uploadBytes(String dstPath, byte[] bytes, String contentTy s3Client.putObject(builder.bucket(bucket).key(dstKey.getKey()).build(), AsyncRequestBody.fromBytes(bytes)) .join(); } catch (RuntimeException e) { - throw new OcflS3Exception("Failed to upload bytes to " + dstKey, unwrapCompletionEx(e)); + throw new OcflS3Exception("Failed to upload bytes to " + dstKey, OcflS3Util.unwrapCompletionEx(e)); } return dstKey; @@ -244,7 +263,7 @@ public CloudObjectKey copyObject(String srcPath, String dstPath) { copy.completionFuture().join(); } catch (RuntimeException e) { - var cause = unwrapCompletionEx(e); + var cause = OcflS3Util.unwrapCompletionEx(e); if (wasNotFound(cause)) { throw new KeyNotFoundException("Key " + srcKey + " not found in bucket " + bucket, cause); } @@ -270,7 +289,7 @@ public Path downloadFile(String srcPath, Path dstPath) { download.completionFuture().join(); } catch (RuntimeException e) { - var cause = unwrapCompletionEx(e); + var cause = OcflS3Util.unwrapCompletionEx(e); if (wasNotFound(cause)) { throw new KeyNotFoundException("Key " + srcKey + " not found in bucket " + bucket, cause); } @@ -297,7 +316,7 @@ public InputStream downloadStream(String srcPath) { AsyncResponseTransformer.toBlockingInputStream()) .join(); } catch (RuntimeException e) { - var cause = unwrapCompletionEx(e); + var cause = OcflS3Util.unwrapCompletionEx(e); if (wasNotFound(cause)) { throw new KeyNotFoundException("Key " + srcKey + " not found in bucket " + bucket, cause); } @@ -337,7 +356,7 @@ public HeadResult head(String path) { .setETag(s3Result.eTag()) .setLastModified(s3Result.lastModified()); } catch (RuntimeException e) { - var cause = unwrapCompletionEx(e); + var cause = OcflS3Util.unwrapCompletionEx(e); if (wasNotFound(cause)) { throw new KeyNotFoundException("Key " + key + " not found in bucket " + bucket, cause); } @@ -396,7 +415,7 @@ public boolean directoryExists(String path) { return response.contents().stream().findAny().isPresent() || response.commonPrefixes().stream().findAny().isPresent(); } catch (RuntimeException e) { - throw new OcflS3Exception("Failed to list objects under " + prefix, unwrapCompletionEx(e)); + throw new OcflS3Exception("Failed to list objects under " + prefix, OcflS3Util.unwrapCompletionEx(e)); } } @@ -452,7 +471,7 @@ private void deleteObjectsInternal(Collection objectKeys) { CompletableFuture.allOf(futures.toArray(new CompletableFuture[] {})) .join(); } catch (RuntimeException e) { - throw new OcflS3Exception("Failed to delete objects " + objectIds, unwrapCompletionEx(e)); + throw new OcflS3Exception("Failed to delete objects " + objectIds, OcflS3Util.unwrapCompletionEx(e)); } } } @@ -487,7 +506,7 @@ public boolean bucketExists() { .join(); return true; } catch (RuntimeException e) { - var cause = unwrapCompletionEx(e); + var cause = OcflS3Util.unwrapCompletionEx(e); if (wasNotFound(cause)) { return false; } @@ -517,7 +536,7 @@ private ListResult toListResult(ListObjectsV2Request.Builder requestBuilder) { return new ListResult().setObjects(objects).setDirectories(dirs); } catch (RuntimeException e) { - throw new OcflS3Exception("Failed to list objects", unwrapCompletionEx(e)); + throw new OcflS3Exception("Failed to list objects", OcflS3Util.unwrapCompletionEx(e)); } } @@ -553,21 +572,6 @@ private int prefixLength(String prefix) { return prefixLength; } - /** - * If the exception is a CompletionException, then the exception's cause is returned. Otherwise, the exception - * itself is returned. - * - * @param e the exception - * @return the exception or its cause - */ - private Throwable unwrapCompletionEx(RuntimeException e) { - Throwable cause = e; - if (e instanceof CompletionException) { - cause = e.getCause(); - } - return cause; - } - /** * Returns true if the exception indicates the object/bucket was NOT found in S3. * diff --git a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Util.java b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Util.java new file mode 100644 index 00000000..0b136cbb --- /dev/null +++ b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Util.java @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2019 University of Wisconsin Board of Regents + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package io.ocfl.aws; + +import java.util.concurrent.CompletionException; + +final class OcflS3Util { + + private OcflS3Util() {} + + /** + * If the exception is a CompletionException, then the exception's cause is returned. Otherwise, the exception + * itself is returned. + * + * @param e the exception + * @return the exception or its cause + */ + static Throwable unwrapCompletionEx(RuntimeException e) { + Throwable cause = e; + if (e instanceof CompletionException) { + cause = e.getCause(); + } + return cause; + } +} diff --git a/ocfl-java-aws/src/main/java/io/ocfl/aws/UploadFuture.java b/ocfl-java-aws/src/main/java/io/ocfl/aws/UploadFuture.java new file mode 100644 index 00000000..ec10a110 --- /dev/null +++ b/ocfl-java-aws/src/main/java/io/ocfl/aws/UploadFuture.java @@ -0,0 +1,87 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2019 University of Wisconsin Board of Regents + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package io.ocfl.aws; + +import io.ocfl.core.storage.cloud.CloudObjectKey; +import java.nio.file.Path; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import software.amazon.awssdk.transfer.s3.model.FileUpload; + +/** + * Converts a FileUpload CompletionFuture into a regular Future. + */ +public class UploadFuture implements Future { + + private final FileUpload upload; + private final Path srcPath; + private final CloudObjectKey dstKey; + + public UploadFuture(FileUpload upload, Path srcPath, CloudObjectKey dstKey) { + this.upload = upload; + this.srcPath = srcPath; + this.dstKey = dstKey; + } + + @Override + public boolean cancel(boolean mayInterruptIfRunning) { + return upload.completionFuture().cancel(mayInterruptIfRunning); + } + + @Override + public boolean isCancelled() { + return upload.completionFuture().isCancelled(); + } + + @Override + public boolean isDone() { + return upload.completionFuture().isDone(); + } + + @Override + public CloudObjectKey get() throws InterruptedException, ExecutionException { + try { + upload.completionFuture().get(); + } catch (RuntimeException e) { + throw new ExecutionException(new OcflS3Exception( + "Failed to upload " + srcPath + " to " + dstKey, OcflS3Util.unwrapCompletionEx(e))); + } + return dstKey; + } + + @Override + public CloudObjectKey get(long timeout, TimeUnit unit) + throws InterruptedException, ExecutionException, TimeoutException { + try { + upload.completionFuture().get(timeout, unit); + } catch (RuntimeException e) { + throw new ExecutionException(new OcflS3Exception( + "Failed to upload " + srcPath + " to " + dstKey, OcflS3Util.unwrapCompletionEx(e))); + } + return dstKey; + } +} diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudClient.java b/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudClient.java index 4057e48f..a34d48a3 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudClient.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudClient.java @@ -27,6 +27,7 @@ import java.io.InputStream; import java.nio.file.Path; import java.util.Collection; +import java.util.concurrent.Future; /** * Wrapper interface abstracting cloud provider clients @@ -53,6 +54,24 @@ public interface CloudClient { */ String prefix(); + /** + * Asynchronously uploads a file to the destination, and returns the object key. + * + * @param srcPath src file + * @param dstPath object path + * @return object key + */ + Future uploadFileAsync(Path srcPath, String dstPath); + + /** + * Asynchronously uploads a file to the destination, and returns the object key. + * + * @param srcPath src file + * @param dstPath object path + * @param contentType the content type of the data + * @return object key + */ + Future uploadFileAsync(Path srcPath, String dstPath, String contentType); /** * Uploads a file to the destination, and returns the object key. * diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudStorage.java b/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudStorage.java index 10e02613..8df31f07 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudStorage.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudStorage.java @@ -27,6 +27,7 @@ import io.ocfl.api.OcflFileRetriever; import io.ocfl.api.exception.OcflFileAlreadyExistsException; import io.ocfl.api.exception.OcflIOException; +import io.ocfl.api.exception.OcflJavaException; import io.ocfl.api.exception.OcflNoSuchFileException; import io.ocfl.api.model.DigestAlgorithm; import io.ocfl.api.util.Enforce; @@ -42,8 +43,8 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; import java.util.List; +import java.util.concurrent.Future; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -239,16 +240,48 @@ public void copyFileInternal(String sourceFile, String destinationFile) { public void moveDirectoryInto(Path source, String destination) { failOnExistingDir(destination); - var objectKeys = Collections.synchronizedList(new ArrayList()); + var objectKeys = new ArrayList(); try (var paths = Files.find(source, Integer.MAX_VALUE, (file, attrs) -> attrs.isRegularFile())) { - paths.forEach(file -> { - var relative = FileUtil.pathToStringStandardSeparator(source.relativize(file)); - var key = FileUtil.pathJoinFailEmpty(destination, relative); - client.uploadFile(file, key); - objectKeys.add(key); - }); + var hasErrors = false; + var interrupted = false; + var futures = new ArrayList>(); + + try { + for (var it = paths.iterator(); it.hasNext(); ) { + var file = it.next(); + var relative = FileUtil.pathToStringStandardSeparator(source.relativize(file)); + var key = FileUtil.pathJoinFailEmpty(destination, relative); + futures.add(client.uploadFileAsync(file, key)); + } + } catch (RuntimeException e) { + // If any of the uploads fail before the future is created, we want to short-circuit but still need + // to wait for the successfully started uploads to complete. + hasErrors = true; + LOG.error(e.getMessage(), e); + } + + for (var future : futures) { + try { + objectKeys.add(future.get().getKey()); + } catch (InterruptedException e) { + hasErrors = true; + interrupted = true; + } catch (Exception e) { + hasErrors = true; + LOG.error(e.getMessage(), e); + } + } + + if (interrupted) { + Thread.currentThread().interrupt(); + } + + if (hasErrors) { + throw new OcflJavaException("Failed to move files in " + source + " into " + destination); + } } catch (IOException | RuntimeException e) { + // If any of the files failed to upload, then we must delete everything. client.safeDeleteObjects(objectKeys); if (e instanceof IOException) { From 136b5e701459e74da1ef1d686966498ea883d947 Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Sun, 30 Apr 2023 20:36:46 -0500 Subject: [PATCH 08/21] update docs --- docs/USAGE.md | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/docs/USAGE.md b/docs/USAGE.md index cacee0c3..9d4365a0 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -89,6 +89,10 @@ OCFL repository that supports the [mutable HEAD extension](https://ocfl.github.i most cloud storage, including S3, is now strongly consistent. Use `ObjectDetailsDatabaseBuilder` to construct an `ObjectDetailsDatabase`. +* **fileLockTimeoutDuration**: Configures the max amount of time to wait + for a file lock when updating an object from multiple threads. This + only matters if you concurrently write files to the same object, and + can otherwise be ignored. The default timeout is 1 minute. ## Storage Implementations @@ -238,6 +242,45 @@ default in-memory lock. Additionally, you may want to either adjust or disable inventory caching, or hook up a distributed cache implementation. +### Improving write performance + +If your objects have a lot of files, then you _might_ get better +performance by parallelizing file reads and writes. Parallel writes +are only supported as of `ocfl-java` 2.0.0 or later. `ocfl-java` does +not do this for you automatically, but the following is some example +code of one possible way that you could implement parallel writes +to an object: + +```java +repo.updateObject(ObjectVersionId.head(objectId), versionInfo, updater -> { + List> futures; + try (var files = Files.find( + objectPath, Integer.MAX_VALUE, (file, attrs) -> attrs.isRegularFile())) { + futures = files.map(file -> executor.submit(() -> { + var logical = objectPath + .relativize(file) + .toString(); + updater.addPath(file, logical); + })) + .toList(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + futures.forEach(future -> { + try { + future.get(); + } catch (Exception e) { + throw new RuntimeException("Error adding file to object " + objectId, e); + } + }); +}); +``` + +The key bit here is that you use an `ExecutorService` to add multiple +files to the object at the same. You would likely want to use one thread +pool per object. Additionally, note that this technique will likely +make writes _slower_ if you are not writing a lot of files. + ### Inventory size OCFL inventory files can grow quite large when an object has lots of From 234859dc6d73138cce722b3fa82a36babec3366a Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Thu, 29 Feb 2024 22:16:48 -0600 Subject: [PATCH 09/21] update docs to include notes about MultipartS3AsyncClient --- docs/USAGE.md | 40 ++++++++++++++----- ocfl-java-aws/pom.xml | 9 +++-- .../main/java/io/ocfl/aws/OcflS3Client.java | 22 +++++++++- .../java/io/ocfl/aws/OcflS3ClientTest.java | 29 ++++++++------ .../src/test/java/io/ocfl/aws/OcflS3Test.java | 29 ++++++++------ .../java/io/ocfl/itest/s3/S3ITestHelper.java | 27 ++++++++----- 6 files changed, 106 insertions(+), 50 deletions(-) diff --git a/docs/USAGE.md b/docs/USAGE.md index 9d4365a0..5854cf7d 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -179,23 +179,45 @@ application. Consult the official documentation for details. However, note that it is **crucial** that you configure the transfer manager to use the new [CRT S3 -client](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/crt-based-s3-client.html). -The CRT client is **required** by the transfer manager in order to -make multipart uploads. +client](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/crt-based-s3-client.html) +or wrap the old Netty async client in a `MultipartS3AsyncClient`. +The reason for this is because the transfer manager only supports +multipart uploads and downloads with the CRT client. However, you can +make multipart uploads work with the old client if it's wrapped in a +`MultipartS3AsyncClient`, but multipart downloads will still not work. + +Unfortunately, from our testing, it appears that the CRT client only +works with the official AWS S3, and it does not work with third party +implementations. So, if you are using a third party implementation, +please make sure you wrap your client in a `MultipartS3AsyncClient`. +Otherwise, you will experience performance degredation. If you do not specify a transfer manager when constructing the `OcflS3Client`, then it will create the default transfer manager using -the S3 client it was provided, which, again, should be a CRT client. -When you use the default transfer manager, you need to be sure to -close the `OcflRepository` when you are done with it, otherwise the -transfer manager will not be closed. +the S3 client it was provided. When you use the default transfer +manager, you need to be sure to close the `OcflRepository` when you +are done with it, otherwise the transfer manager will not be closed. -For example, you might construct the S3 client like: +If you are using the CRT client, then you need to add +`software.amazon.awssdk.crt:aws-crt` to your project, and create the +client similar to this, for the default settings: ``` java -S3AsyncClient.crtBuilder().build() +S3AsyncClient.crtBuilder().build(); ``` +If you are using the Netty async client, then you don't need to add +any additional dependencies, and you'd create the client similar to +this, for the default settings: + +``` java +MultipartS3AsyncClient.create( + S3AsyncClient.builder().build(), + MultipartConfiguration.builder().build()); +``` + +Note the use of `MultipartS3AsyncClient`. Very important! + ### Configuration Use `OcflStorageBuilder.builder()` to create and configure an diff --git a/ocfl-java-aws/pom.xml b/ocfl-java-aws/pom.xml index b6de0cc8..474c0e8c 100644 --- a/ocfl-java-aws/pom.xml +++ b/ocfl-java-aws/pom.xml @@ -70,10 +70,6 @@ software.amazon.awssdk s3-transfer-manager - - software.amazon.awssdk.crt - aws-crt - org.codehaus.woodstox stax2-api @@ -94,6 +90,11 @@ + + software.amazon.awssdk.crt + aws-crt + test + org.junit.jupiter junit-jupiter diff --git a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java index abab1199..5fe88768 100644 --- a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java +++ b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java @@ -600,11 +600,26 @@ public static class Builder { /** * The AWS SDK S3 client. Required. *

- * This SHOULD be a CRT client. - * The reason for this is that the {@link S3TransferManager} requires the CRT client for doing multipart uploads. + * Important: You MUST either use the CRT client + * or wrap the regular S3AsyncClient in {@link software.amazon.awssdk.services.s3.internal.multipart.MultipartS3AsyncClient} + * in order for multipart uploads to work. Otherwise, files will be uploaded in single PUT requests. + *

+ * Additionally, only the CRT client supports multipart downloads. However, from what I've seen, the CRT client + * only works with AWS, and it does not work with third party S3 implementations. In which case, + * you should use the regular S3AsyncClient with the MultipartS3AsyncClient wrapper. *

* This client is NOT closed when the repository is closed, and the user is responsible for closing it when appropriate. + *

+ *

{@code
+         * // When using the CRT client, create it something like this:
+         * S3AsyncClient.crtBuilder().build();
          *
+         * // When using the regular async client, create it something like this:
+         * MultipartS3AsyncClient.create(
+         *         S3AsyncClient.builder().build(),
+         *         MultipartConfiguration.builder().build());
+         * // The important part here is that you use the MultipartS3AsyncClient wrapper!
+         * }
* @param s3Client s3 client * @return builder */ @@ -618,6 +633,9 @@ public Builder s3Client(S3AsyncClient s3Client) { * if it is specified, it can use the same S3 client as was supplied in {@link #s3Client(S3AsyncClient)}. * Otherwise, when not specified, the default transfer manager is created using the provided S3 Client. *

+ * Please refer to the docs on {@link #s3Client(S3AsyncClient)} for additional details on how the S3 client + * used by the transfer manager should be configured. + *

* When a transfer manager is provided, it will NOT be closed when the repository is closed, and the user is * responsible for closing it when appropriate. * diff --git a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java index 04758f42..3faaa0ca 100644 --- a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java +++ b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java @@ -39,8 +39,10 @@ import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.internal.multipart.MultipartS3AsyncClient; import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import software.amazon.awssdk.services.s3.model.S3Object; +import software.amazon.awssdk.services.s3.multipart.MultipartConfiguration; import software.amazon.awssdk.utils.AttributeMap; public class OcflS3ClientTest { @@ -76,18 +78,21 @@ public static void beforeAll() { OcflS3ClientTest.bucket = bucket; } else { LOG.info("Running tests against S3 Mock"); - awsS3Client = S3AsyncClient.builder() - .endpointOverride(URI.create(S3_MOCK.getServiceEndpoint())) - .region(Region.US_EAST_2) - .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create("foo", "bar"))) - .serviceConfiguration(S3Configuration.builder() - .pathStyleAccessEnabled(true) - .build()) - .httpClient(NettyNioAsyncHttpClient.builder() - .buildWithDefaults(AttributeMap.builder() - .put(TRUST_ALL_CERTIFICATES, Boolean.TRUE) - .build())) - .build(); + awsS3Client = MultipartS3AsyncClient.create( + S3AsyncClient.builder() + .endpointOverride(URI.create(S3_MOCK.getServiceEndpoint())) + .region(Region.US_EAST_2) + .credentialsProvider( + StaticCredentialsProvider.create(AwsBasicCredentials.create("foo", "bar"))) + .serviceConfiguration(S3Configuration.builder() + .pathStyleAccessEnabled(true) + .build()) + .httpClient(NettyNioAsyncHttpClient.builder() + .buildWithDefaults(AttributeMap.builder() + .put(TRUST_ALL_CERTIFICATES, Boolean.TRUE) + .build())) + .build(), + MultipartConfiguration.builder().build()); ; OcflS3ClientTest.bucket = UUID.randomUUID().toString(); awsS3Client diff --git a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3Test.java b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3Test.java index ae27cb71..027ee36e 100644 --- a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3Test.java +++ b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3Test.java @@ -42,8 +42,10 @@ import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.internal.multipart.MultipartS3AsyncClient; import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import software.amazon.awssdk.services.s3.model.S3Object; +import software.amazon.awssdk.services.s3.multipart.MultipartConfiguration; import software.amazon.awssdk.utils.AttributeMap; public class OcflS3Test { @@ -82,18 +84,21 @@ public static void beforeAll() { OcflS3Test.bucket = bucket; } else { LOG.info("Running tests against S3 Mock"); - s3Client = S3AsyncClient.builder() - .endpointOverride(URI.create(S3_MOCK.getServiceEndpoint())) - .region(Region.US_EAST_2) - .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create("foo", "bar"))) - .serviceConfiguration(S3Configuration.builder() - .pathStyleAccessEnabled(true) - .build()) - .httpClient(NettyNioAsyncHttpClient.builder() - .buildWithDefaults(AttributeMap.builder() - .put(TRUST_ALL_CERTIFICATES, Boolean.TRUE) - .build())) - .build(); + s3Client = MultipartS3AsyncClient.create( + S3AsyncClient.builder() + .endpointOverride(URI.create(S3_MOCK.getServiceEndpoint())) + .region(Region.US_EAST_2) + .credentialsProvider( + StaticCredentialsProvider.create(AwsBasicCredentials.create("foo", "bar"))) + .serviceConfiguration(S3Configuration.builder() + .pathStyleAccessEnabled(true) + .build()) + .httpClient(NettyNioAsyncHttpClient.builder() + .buildWithDefaults(AttributeMap.builder() + .put(TRUST_ALL_CERTIFICATES, Boolean.TRUE) + .build())) + .build(), + MultipartConfiguration.builder().build()); OcflS3Test.bucket = UUID.randomUUID().toString(); s3Client.createBucket(request -> { request.bucket(OcflS3Test.bucket); diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3ITestHelper.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3ITestHelper.java index d0ba9db2..80806d20 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3ITestHelper.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3ITestHelper.java @@ -23,9 +23,11 @@ import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.internal.multipart.MultipartS3AsyncClient; import software.amazon.awssdk.services.s3.model.GetObjectRequest; import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import software.amazon.awssdk.services.s3.model.S3Object; +import software.amazon.awssdk.services.s3.multipart.MultipartConfiguration; import software.amazon.awssdk.utils.AttributeMap; public class S3ITestHelper { @@ -46,17 +48,20 @@ public static S3AsyncClient createS3Client(String accessKey, String secretKey) { } public static S3AsyncClient createMockS3Client(String endpoint) { - return S3AsyncClient.builder() - .endpointOverride(URI.create(endpoint)) - .region(Region.US_EAST_2) - .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create("foo", "bar"))) - .serviceConfiguration( - S3Configuration.builder().pathStyleAccessEnabled(true).build()) - .httpClient(NettyNioAsyncHttpClient.builder() - .buildWithDefaults(AttributeMap.builder() - .put(TRUST_ALL_CERTIFICATES, Boolean.TRUE) - .build())) - .build(); + return MultipartS3AsyncClient.create( + S3AsyncClient.builder() + .endpointOverride(URI.create(endpoint)) + .region(Region.US_EAST_2) + .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create("foo", "bar"))) + .serviceConfiguration(S3Configuration.builder() + .pathStyleAccessEnabled(true) + .build()) + .httpClient(NettyNioAsyncHttpClient.builder() + .buildWithDefaults(AttributeMap.builder() + .put(TRUST_ALL_CERTIFICATES, Boolean.TRUE) + .build())) + .build(), + MultipartConfiguration.builder().build()); } public void verifyRepo(Path expected, String bucket, String prefix) { From 78179d7e896c683c9b224a851687a6b63854131f Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Thu, 29 Feb 2024 22:51:37 -0600 Subject: [PATCH 10/21] fix download when using MultipartS3AsyncClient MultipartS3AsyncClient currently throws an unsupported operation exception if you attempt to download a file with it. However, it's needed if you want to use the transer manager with a non-CRT client. So, we annoyingly have to fish the delegate out instead. --- .../main/java/io/ocfl/aws/OcflS3Client.java | 36 +++++++++++++++---- .../java/io/ocfl/aws/OcflS3ClientTest.java | 12 +++++-- .../java/io/ocfl/itest/s3/S3ITestHelper.java | 17 ++++++++- .../java/io/ocfl/itest/s3/S3StorageTest.java | 3 +- 4 files changed, 57 insertions(+), 11 deletions(-) diff --git a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java index 5fe88768..ff7d2759 100644 --- a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java +++ b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java @@ -24,6 +24,7 @@ package io.ocfl.aws; +import io.ocfl.api.OcflRepository; import io.ocfl.api.exception.OcflIOException; import io.ocfl.api.util.Enforce; import io.ocfl.core.storage.cloud.CloudClient; @@ -51,6 +52,7 @@ import software.amazon.awssdk.core.async.AsyncRequestBody; import software.amazon.awssdk.core.async.AsyncResponseTransformer; import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.internal.multipart.MultipartS3AsyncClient; import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; import software.amazon.awssdk.services.s3.model.GetObjectRequest; import software.amazon.awssdk.services.s3.model.HeadBucketRequest; @@ -80,6 +82,7 @@ public class OcflS3Client implements CloudClient { private final BiConsumer putObjectModifier; private final boolean shouldCloseManager; + private final boolean useMultipartDownload; /** * Used to create a new OcflS3Client instance. @@ -115,7 +118,7 @@ public OcflS3Client( String prefix, S3TransferManager transferManager, BiConsumer putObjectModifier) { - this.s3Client = Enforce.notNull(s3Client, "s3Client cannot be null"); + Enforce.notNull(s3Client, "s3Client cannot be null"); this.bucket = Enforce.notBlank(bucket, "bucket cannot be blank"); this.repoPrefix = sanitizeRepoPrefix(prefix == null ? "" : prefix); this.shouldCloseManager = transferManager == null; @@ -124,6 +127,13 @@ public OcflS3Client( : transferManager; this.keyBuilder = CloudObjectKey.builder().prefix(repoPrefix); this.putObjectModifier = putObjectModifier != null ? putObjectModifier : (k, b) -> {}; + // This hacky nonsense is needed until MultipartS3AsyncClient supports downloads + this.useMultipartDownload = !(s3Client instanceof MultipartS3AsyncClient); + if (s3Client instanceof MultipartS3AsyncClient) { + this.s3Client = (S3AsyncClient) ((MultipartS3AsyncClient) s3Client).delegate(); + } else { + this.s3Client = s3Client; + } } private static String sanitizeRepoPrefix(String repoPrefix) { @@ -282,12 +292,24 @@ public Path downloadFile(String srcPath, Path dstPath) { LOG.debug("Downloading from bucket {} key {} to {}", bucket, srcKey, dstPath); try { - var download = transferManager.downloadFile(req -> req.getObjectRequest( - getReq -> getReq.bucket(bucket).key(srcKey.getKey()).build()) - .destination(dstPath) - .build()); - - download.completionFuture().join(); + if (useMultipartDownload) { + transferManager + .downloadFile(req -> req.getObjectRequest(getReq -> getReq.bucket(bucket) + .key(srcKey.getKey()) + .build()) + .destination(dstPath) + .build()) + .completionFuture() + .join(); + } else { + s3Client.getObject( + GetObjectRequest.builder() + .bucket(bucket) + .key(srcKey.getKey()) + .build(), + dstPath) + .join(); + } } catch (RuntimeException e) { var cause = OcflS3Util.unwrapCompletionEx(e); if (wasNotFound(cause)) { diff --git a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java index 3faaa0ca..95a617c1 100644 --- a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java +++ b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java @@ -152,7 +152,7 @@ public void putObjectWithModification() throws IOException { assertObjectsExist(bucket, List.of(key1, key2)); - try (var response = awsS3Client + try (var response = resolveClient() .getObject( builder -> builder.bucket(bucket) .key(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, key1)) @@ -161,7 +161,7 @@ public void putObjectWithModification() throws IOException { .join()) { assertEquals("text/plain", response.response().contentType()); } - try (var response = awsS3Client + try (var response = resolveClient() .getObject( builder -> builder.bucket(bucket) .key(FileUtil.pathJoinIgnoreEmpty(REPO_PREFIX, key2)) @@ -411,4 +411,12 @@ private void assertObjectsExist(String bucket, Collection expectedKeys) assertThat(actualKeys, containsInAnyOrder(prefixedExpected.toArray(String[]::new))); } + + private S3AsyncClient resolveClient() { + if (awsS3Client instanceof MultipartS3AsyncClient) { + return (S3AsyncClient) ((MultipartS3AsyncClient) awsS3Client).delegate(); + } else { + return awsS3Client; + } + } } diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3ITestHelper.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3ITestHelper.java index 80806d20..91a0d536 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3ITestHelper.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3ITestHelper.java @@ -64,6 +64,20 @@ public static S3AsyncClient createMockS3Client(String endpoint) { MultipartConfiguration.builder().build()); } + /** + * This nonsense is needed if you're using the MultipartS3AsyncClient client and want to download a file + * + * @param client + * @return + */ + public static S3AsyncClient resolveClient(S3AsyncClient client) { + if (client instanceof MultipartS3AsyncClient) { + return (S3AsyncClient) ((MultipartS3AsyncClient) client).delegate(); + } else { + return client; + } + } + public void verifyRepo(Path expected, String bucket, String prefix) { var expectedPaths = listAllFiles(expected); var actualObjects = listAllObjects(bucket, prefix); @@ -103,7 +117,8 @@ private List listAllFiles(Path root) { } private byte[] getObjectContent(String bucket, String prefix, String key) { - return s3Client.getObject( + return resolveClient(s3Client) + .getObject( GetObjectRequest.builder() .bucket(bucket) .key(prefix + "/" + key) diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3StorageTest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3StorageTest.java index 21aef035..45e607af 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3StorageTest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3StorageTest.java @@ -85,7 +85,8 @@ protected void file(String path, String content) { } protected String readFile(String path) { - try (var content = s3Client.getObject( + try (var content = S3ITestHelper.resolveClient(s3Client) + .getObject( request -> { request.bucket(bucket).key(FileUtil.pathJoinFailEmpty(prefix(name), path)); }, From 08d116ce102c52c34e9d81c95eda9bc08660f1f0 Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Fri, 1 Mar 2024 20:59:08 -0600 Subject: [PATCH 11/21] remove use of synchronized --- .../ocfl/core/inventory/InventoryUpdater.java | 222 +++++++++++------- 1 file changed, 139 insertions(+), 83 deletions(-) diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/inventory/InventoryUpdater.java b/ocfl-java-core/src/main/java/io/ocfl/core/inventory/InventoryUpdater.java index 4f6127a3..1488f055 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/inventory/InventoryUpdater.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/inventory/InventoryUpdater.java @@ -43,6 +43,8 @@ import java.time.OffsetDateTime; import java.util.HashSet; import java.util.Set; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; /** * This class is used to record changes to OCFL objects and construct an updated inventory. @@ -60,6 +62,8 @@ public class InventoryUpdater { private final ContentPathMapper contentPathMapper; private final PathConstraintProcessor logicalPathConstraints; + private final Lock lock = new ReentrantLock(); + public static Builder builder() { return new Builder(); } @@ -186,13 +190,18 @@ private InventoryUpdater( * @param versionInfo information about the version * @return new inventory */ - public synchronized Inventory buildNewInventory(OffsetDateTime createdTimestamp, VersionInfo versionInfo) { - return inventoryBuilder - .addHeadVersion(versionBuilder - .versionInfo(versionInfo) - .created(createdTimestamp) - .build()) - .build(); + public Inventory buildNewInventory(OffsetDateTime createdTimestamp, VersionInfo versionInfo) { + lock.lock(); + try { + return inventoryBuilder + .addHeadVersion(versionBuilder + .versionInfo(versionInfo) + .created(createdTimestamp) + .build()) + .build(); + } finally { + lock.unlock(); + } } /** @@ -202,13 +211,21 @@ public synchronized Inventory buildNewInventory(OffsetDateTime createdTimestamp, * @param config the OCFL configuration * @return true if the inventory is upgraded; false otherwise */ - public synchronized boolean upgradeInventory(OcflConfig config) { - if (config.isUpgradeObjectsOnWrite() - && inventoryBuilder.getType().compareTo(config.getOcflVersion().getInventoryType()) < 0) { - inventoryBuilder.type(config.getOcflVersion().getInventoryType()); - return true; + public boolean upgradeInventory(OcflConfig config) { + lock.lock(); + try { + if (config.isUpgradeObjectsOnWrite() + && inventoryBuilder + .getType() + .compareTo(config.getOcflVersion().getInventoryType()) + < 0) { + inventoryBuilder.type(config.getOcflVersion().getInventoryType()); + return true; + } + return false; + } finally { + lock.unlock(); } - return false; } /** @@ -219,27 +236,32 @@ public synchronized boolean upgradeInventory(OcflConfig config) { * @param options options * @return details about the file if it was added to the manifest */ - public synchronized AddFileResult addFile(String fileId, String logicalPath, OcflOption... options) { - logicalPathConstraints.apply(logicalPath); + public AddFileResult addFile(String fileId, String logicalPath, OcflOption... options) { + lock.lock(); + try { + logicalPathConstraints.apply(logicalPath); - overwriteProtection(logicalPath, options); - versionBuilder.validateNonConflictingPath(logicalPath); + overwriteProtection(logicalPath, options); + versionBuilder.validateNonConflictingPath(logicalPath); - if (versionBuilder.containsLogicalPath(logicalPath)) { - var oldFileId = versionBuilder.removeLogicalPath(logicalPath); - removeFileFromManifest(oldFileId); - } + if (versionBuilder.containsLogicalPath(logicalPath)) { + var oldFileId = versionBuilder.removeLogicalPath(logicalPath); + removeFileFromManifest(oldFileId); + } - String contentPath = null; + String contentPath = null; - if (!inventoryBuilder.containsFileId(fileId)) { - contentPath = contentPathMapper.fromLogicalPath(logicalPath); - inventoryBuilder.addFileToManifest(fileId, contentPath); - } + if (!inventoryBuilder.containsFileId(fileId)) { + contentPath = contentPathMapper.fromLogicalPath(logicalPath); + inventoryBuilder.addFileToManifest(fileId, contentPath); + } - versionBuilder.addFile(fileId, logicalPath); + versionBuilder.addFile(fileId, logicalPath); - return new AddFileResult(contentPath, pathUnderContentDir(contentPath)); + return new AddFileResult(contentPath, pathUnderContentDir(contentPath)); + } finally { + lock.unlock(); + } } /** @@ -260,17 +282,22 @@ public String innerContentPath(String logicalPath) { * @param algorithm algorithm used to calculate the digest * @param digest the digest value */ - public synchronized void addFixity(String logicalPath, DigestAlgorithm algorithm, String digest) { - if (algorithm.equals(inventory.getDigestAlgorithm())) { - return; - } + public void addFixity(String logicalPath, DigestAlgorithm algorithm, String digest) { + lock.lock(); + try { + if (algorithm.equals(inventory.getDigestAlgorithm())) { + return; + } - var fileId = versionBuilder.getFileId(logicalPath); + var fileId = versionBuilder.getFileId(logicalPath); - if (fileId != null) { - inventoryBuilder.getContentPaths(fileId).forEach(contentPath -> { - inventoryBuilder.addFixityForFile(contentPath, algorithm, digest); - }); + if (fileId != null) { + inventoryBuilder.getContentPaths(fileId).forEach(contentPath -> { + inventoryBuilder.addFixityForFile(contentPath, algorithm, digest); + }); + } + } finally { + lock.unlock(); } } @@ -281,26 +308,36 @@ public synchronized void addFixity(String logicalPath, DigestAlgorithm algorithm * @param algorithm the digest algorithm * @return the digest or null */ - public synchronized String getFixityDigest(String logicalPath, DigestAlgorithm algorithm) { - if (inventory.getDigestAlgorithm().equals(algorithm)) { - return versionBuilder.getFileId(logicalPath); - } + public String getFixityDigest(String logicalPath, DigestAlgorithm algorithm) { + lock.lock(); + try { + if (inventory.getDigestAlgorithm().equals(algorithm)) { + return versionBuilder.getFileId(logicalPath); + } - String digest = null; - var fileId = versionBuilder.getFileId(logicalPath); + String digest = null; + var fileId = versionBuilder.getFileId(logicalPath); - if (fileId != null) { - digest = inventoryBuilder.getFileFixity(fileId, algorithm); - } + if (fileId != null) { + digest = inventoryBuilder.getFileFixity(fileId, algorithm); + } - return digest; + return digest; + } finally { + lock.unlock(); + } } /** * Removes all entries from the fixity block. */ - public synchronized void clearFixity() { - inventoryBuilder.clearFixity(); + public void clearFixity() { + lock.lock(); + try { + inventoryBuilder.clearFixity(); + } finally { + lock.unlock(); + } } /** @@ -310,9 +347,14 @@ public synchronized void clearFixity() { * @param logicalPath logical path to the file * @return files that were removed from the manifest */ - public synchronized Set removeFile(String logicalPath) { - var fileId = versionBuilder.removeLogicalPath(logicalPath); - return removeFileFromManifestWithResults(fileId); + public Set removeFile(String logicalPath) { + lock.lock(); + try { + var fileId = versionBuilder.removeLogicalPath(logicalPath); + return removeFileFromManifestWithResults(fileId); + } finally { + lock.unlock(); + } } /** @@ -325,27 +367,31 @@ public synchronized Set removeFile(String logicalPath) { * @param options options * @return files that were removed from the manifest */ - public synchronized Set renameFile( - String srcLogicalPath, String dstLogicalPath, OcflOption... options) { - logicalPathConstraints.apply(dstLogicalPath); + public Set renameFile(String srcLogicalPath, String dstLogicalPath, OcflOption... options) { + lock.lock(); + try { + logicalPathConstraints.apply(dstLogicalPath); - var srcDigest = versionBuilder.getFileId(srcLogicalPath); + var srcDigest = versionBuilder.getFileId(srcLogicalPath); - if (srcDigest == null) { - throw new OcflInputException( - String.format("The following path was not found in object %s: %s", objectId, srcLogicalPath)); - } + if (srcDigest == null) { + throw new OcflInputException( + String.format("The following path was not found in object %s: %s", objectId, srcLogicalPath)); + } - overwriteProtection(dstLogicalPath, options); - versionBuilder.validateNonConflictingPath(dstLogicalPath); + overwriteProtection(dstLogicalPath, options); + versionBuilder.validateNonConflictingPath(dstLogicalPath); - var dstFileId = versionBuilder.getFileId(dstLogicalPath); + var dstFileId = versionBuilder.getFileId(dstLogicalPath); - versionBuilder.removeLogicalPath(srcLogicalPath); - versionBuilder.removeLogicalPath(dstLogicalPath); - versionBuilder.addFile(srcDigest, dstLogicalPath); + versionBuilder.removeLogicalPath(srcLogicalPath); + versionBuilder.removeLogicalPath(dstLogicalPath); + versionBuilder.addFile(srcDigest, dstLogicalPath); - return removeFileFromManifestWithResults(dstFileId); + return removeFileFromManifestWithResults(dstFileId); + } finally { + lock.unlock(); + } } /** @@ -359,34 +405,44 @@ public synchronized Set renameFile( * @param options options * @return files that were removed from the manifest */ - public synchronized Set reinstateFile( + public Set reinstateFile( VersionNum sourceVersion, String srcLogicalPath, String dstLogicalPath, OcflOption... options) { - logicalPathConstraints.apply(dstLogicalPath); + lock.lock(); + try { + logicalPathConstraints.apply(dstLogicalPath); - var srcDigest = getDigestFromVersion(sourceVersion, srcLogicalPath); + var srcDigest = getDigestFromVersion(sourceVersion, srcLogicalPath); - if (srcDigest == null) { - throw new OcflInputException(String.format( - "Object %s version %s does not contain a file at %s", objectId, sourceVersion, srcLogicalPath)); - } + if (srcDigest == null) { + throw new OcflInputException(String.format( + "Object %s version %s does not contain a file at %s", objectId, sourceVersion, srcLogicalPath)); + } - overwriteProtection(dstLogicalPath, options); - versionBuilder.validateNonConflictingPath(dstLogicalPath); + overwriteProtection(dstLogicalPath, options); + versionBuilder.validateNonConflictingPath(dstLogicalPath); - var dstFileId = versionBuilder.getFileId(dstLogicalPath); + var dstFileId = versionBuilder.getFileId(dstLogicalPath); - versionBuilder.removeLogicalPath(dstLogicalPath); - versionBuilder.addFile(srcDigest, dstLogicalPath); + versionBuilder.removeLogicalPath(dstLogicalPath); + versionBuilder.addFile(srcDigest, dstLogicalPath); - return removeFileFromManifestWithResults(dstFileId); + return removeFileFromManifestWithResults(dstFileId); + } finally { + lock.unlock(); + } } /** * Removes all of the files from the version's state. */ - public synchronized void clearState() { - var state = new HashSet<>(versionBuilder.getInvertedState().keySet()); - state.forEach(this::removeFile); + public void clearState() { + lock.lock(); + try { + var state = new HashSet<>(versionBuilder.getInvertedState().keySet()); + state.forEach(this::removeFile); + } finally { + lock.unlock(); + } } private String getDigestFromVersion(VersionNum versionNum, String logicalPath) { From 68841cac82febeb1214ee22c8c5698ba586064ac Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Sat, 2 Mar 2024 13:59:20 -0600 Subject: [PATCH 12/21] fix duplicate file bug and add concurrency test 1. Fixes a bug that can result in a file being incorrectly deleted from the staging directory. The bug is triggered by adding two files with identical content, and then adding the first file a second time, all with the same commit. 2. Adds a test for concurrently writing to a version. --- .../ocfl/core/inventory/InventoryUpdater.java | 16 +- ocfl-java-itest/pom.xml | 5 + .../test/java/io/ocfl/itest/OcflITest.java | 2 +- .../itest/filesystem/FileSystemOcflITest.java | 148 +++++++++ .../java/io/ocfl/itest/s3/S3OcflITest.java | 153 +++++++++ .../expected/output/repo18/o1v1/a/b/c/d/0.txt | 1 + .../expected/output/repo18/o1v1/a/b/c/d/1.txt | 1 + .../expected/output/repo18/o1v1/a/b/c/d/2.txt | 1 + .../expected/output/repo18/o1v1/a/b/c/d/3.txt | 1 + .../expected/output/repo18/o1v1/a/b/c/d/4.txt | 1 + .../output/repo18/o1v1/a/b/c/file1.txt | 1 + .../expected/output/repo18/o1v2/a/b/c/d/1.txt | 1 + .../expected/output/repo18/o1v2/a/b/c/d/3.txt | 1 + .../output/repo18/o1v2/a/b/c/file2.txt | 1 + .../expected/output/repo18/o1v2/a/new.txt | 1 + .../expected/output/repo18/o1v2/test.txt | 1 + .../expected/output/repo18/o1v3/a/b/c/d/1.txt | 1 + .../expected/output/repo18/o1v3/a/b/c/d/3.txt | 1 + .../output/repo18/o1v3/a/b/c/file2.txt | 1 + .../expected/output/repo18/o1v3/a/new.txt | 1 + .../0004-hashed-n-tuple-storage-layout.md | 303 ++++++++++++++++++ .../output/repo18/o1v3/repo15/0=ocfl_1.1 | 1 + .../0=ocfl_object_1.1 | 1 + .../inventory.json | 24 ++ .../inventory.json.sha512 | 1 + .../v1/content/file1 | 1 + .../v1/inventory.json | 24 ++ .../v1/inventory.json.sha512 | 1 + .../config.json | 7 + .../repo18/o1v3/repo15/ocfl_extensions_1.0.md | 118 +++++++ .../repo18/o1v3/repo15/ocfl_layout.json | 4 + .../expected/output/repo18/o1v3/test.txt | 1 + .../expected/output/repo18/o1v4/a/b/c/d/1.txt | 1 + .../expected/output/repo18/o1v4/a/b/c/d/3.txt | 1 + .../output/repo18/o1v4/a/b/c/file2.txt | 1 + .../expected/output/repo18/o1v4/a/new.txt | 1 + .../0004-hashed-n-tuple-storage-layout.md | 303 ++++++++++++++++++ .../output/repo18/o1v4/repo15/0=ocfl_1.1 | 1 + .../0=ocfl_object_1.1 | 1 + .../inventory.json | 24 ++ .../inventory.json.sha512 | 1 + .../v1/content/file1 | 1 + .../v1/inventory.json | 24 ++ .../v1/inventory.json.sha512 | 1 + .../config.json | 7 + .../repo18/o1v4/repo15/ocfl_extensions_1.0.md | 118 +++++++ .../repo18/o1v4/repo15/ocfl_layout.json | 4 + .../0004-hashed-n-tuple-storage-layout.md | 303 ++++++++++++++++++ .../output/repo18/o1v4/repo17/0=ocfl_1.1 | 1 + .../0=ocfl_object_1.1 | 1 + .../inventory.json | 67 ++++ .../inventory.json.sha512 | 1 + .../v1/content/file1 | 1 + .../v1/inventory.json | 24 ++ .../v1/inventory.json.sha512 | 1 + .../v2/content/dir1/file2 | 1 + .../v2/content/file3 | 1 + .../v2/inventory.json | 39 +++ .../v2/inventory.json.sha512 | 1 + .../v3/content/.gitkeep | 0 .../v3/inventory.json | 52 +++ .../v3/inventory.json.sha512 | 1 + .../v4/content/file5 | 1 + .../v4/inventory.json | 67 ++++ .../v4/inventory.json.sha512 | 1 + .../config.json | 7 + .../repo18/o1v4/repo17/ocfl_extensions_1.0.md | 118 +++++++ .../repo18/o1v4/repo17/ocfl_layout.json | 4 + .../expected/output/repo18/o1v4/test.txt | 1 + 69 files changed, 2004 insertions(+), 3 deletions(-) create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/0.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/1.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/2.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/3.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/4.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/file1.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/d/1.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/d/3.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/file2.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/new.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/test.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/d/1.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/d/3.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/file2.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/new.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/0004-hashed-n-tuple-storage-layout.md create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/0=ocfl_1.1 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/content/file1 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/extensions/0004-hashed-n-tuple-storage-layout/config.json create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/ocfl_extensions_1.0.md create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/ocfl_layout.json create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/test.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/d/1.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/d/3.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/file2.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/new.txt create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/0004-hashed-n-tuple-storage-layout.md create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/0=ocfl_1.1 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/content/file1 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/extensions/0004-hashed-n-tuple-storage-layout/config.json create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/ocfl_extensions_1.0.md create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/ocfl_layout.json create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/0004-hashed-n-tuple-storage-layout.md create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/0=ocfl_1.1 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/0=ocfl_object_1.1 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/inventory.json create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/inventory.json.sha512 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/content/file1 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/inventory.json create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/inventory.json.sha512 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/content/dir1/file2 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/content/file3 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/inventory.json create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/inventory.json.sha512 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/content/.gitkeep create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/inventory.json create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/inventory.json.sha512 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/content/file5 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/inventory.json create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/inventory.json.sha512 create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/extensions/0004-hashed-n-tuple-storage-layout/config.json create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/ocfl_extensions_1.0.md create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/ocfl_layout.json create mode 100644 ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/test.txt diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/inventory/InventoryUpdater.java b/ocfl-java-core/src/main/java/io/ocfl/core/inventory/InventoryUpdater.java index 1488f055..bf62409c 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/inventory/InventoryUpdater.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/inventory/InventoryUpdater.java @@ -244,8 +244,20 @@ public AddFileResult addFile(String fileId, String logicalPath, OcflOption... op overwriteProtection(logicalPath, options); versionBuilder.validateNonConflictingPath(logicalPath); - if (versionBuilder.containsLogicalPath(logicalPath)) { - var oldFileId = versionBuilder.removeLogicalPath(logicalPath); + var oldFileId = versionBuilder.getFileId(logicalPath); + + if (fileId.equalsIgnoreCase(oldFileId)) { + var contentPath = contentPathMapper.fromLogicalPath(logicalPath); + if (inventoryBuilder.containsContentPath(contentPath)) { + // This means that the exact same file was added multiple times and it is being used as the source + // of the file content + return new AddFileResult(contentPath, pathUnderContentDir(contentPath)); + } + } + + // This is the case when the same logical path was added multiple times, but the content changed + if (oldFileId != null) { + versionBuilder.removeLogicalPath(logicalPath); removeFileFromManifest(oldFileId); } diff --git a/ocfl-java-itest/pom.xml b/ocfl-java-itest/pom.xml index e4201f95..c227458c 100644 --- a/ocfl-java-itest/pom.xml +++ b/ocfl-java-itest/pom.xml @@ -71,6 +71,11 @@ junit-jupiter test + + org.assertj + assertj-core + test + org.hamcrest hamcrest diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/OcflITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/OcflITest.java index d2cda81f..b41ae09f 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/OcflITest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/OcflITest.java @@ -2790,7 +2790,7 @@ private void verifyStream(Path expectedFile, OcflObjectVersionFile actual) throw } } - private Path outputPath(String repoName, String path) { + protected Path outputPath(String repoName, String path) { try { var output = outputDir.resolve(Paths.get(repoName, path)); Files.createDirectories(output.getParent()); diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/filesystem/FileSystemOcflITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/filesystem/FileSystemOcflITest.java index 2ada8119..84c87538 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/filesystem/FileSystemOcflITest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/filesystem/FileSystemOcflITest.java @@ -7,7 +7,9 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import io.ocfl.api.OcflConstants; +import io.ocfl.api.OcflOption; import io.ocfl.api.OcflRepository; +import io.ocfl.api.exception.OcflInputException; import io.ocfl.api.model.ObjectVersionId; import io.ocfl.core.OcflRepositoryBuilder; import io.ocfl.core.cache.NoOpCache; @@ -29,8 +31,13 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; import java.util.stream.Collectors; +import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.EnabledOnOs; import org.junit.jupiter.api.condition.OS; @@ -171,6 +178,147 @@ public void shouldNotCreateEmptyContentDirWhenVersionHasNoContent() { assertFalse(Files.exists(v2ContentPath), "empty content directories should not exist"); } + // There appears to be a bug with s3mock's copy object that makes this test fail for some reason + @Test + public void writeToObjectConcurrently() { + var repoName = "repo18"; + var repo = defaultRepo(repoName); + + var objectId = "o1"; + + var executor = Executors.newFixedThreadPool(10); + + try { + repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("1"), updater -> { + var latch = new CountDownLatch(10); + var futures = new ArrayList>(); + + for (int i = 0; i < 5; i++) { + futures.add(executor.submit(() -> { + latch.countDown(); + updater.writeFile( + ITestHelper.streamString("file1".repeat(100)), "a/b/c/file1.txt", OcflOption.OVERWRITE); + })); + } + + for (int i = 0; i < 5; i++) { + var n = i; + futures.add(executor.submit(() -> { + latch.countDown(); + updater.writeFile( + ITestHelper.streamString(String.valueOf(n).repeat(100)), + String.format("a/b/c/d/%s.txt", n)); + })); + } + + joinFutures(futures); + }); + + repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("2"), updater -> { + var latch = new CountDownLatch(10); + var futures = new ArrayList>(); + + var errors = new AtomicInteger(); + + for (int i = 0; i < 5; i++) { + futures.add(executor.submit(() -> { + latch.countDown(); + try { + updater.renameFile("a/b/c/file1.txt", "a/b/c/file2.txt"); + } catch (OcflInputException e) { + errors.getAndIncrement(); + } + })); + } + + futures.add(executor.submit(() -> { + latch.countDown(); + updater.removeFile("a/b/c/d/0.txt"); + })); + futures.add(executor.submit(() -> { + latch.countDown(); + updater.removeFile("a/b/c/d/2.txt"); + })); + futures.add(executor.submit(() -> { + latch.countDown(); + updater.writeFile(ITestHelper.streamString("test".repeat(100)), "test.txt"); + })); + futures.add(executor.submit(() -> { + latch.countDown(); + updater.renameFile("a/b/c/d/4.txt", "a/b/c/d/1.txt", OcflOption.OVERWRITE); + })); + futures.add(executor.submit(() -> { + latch.countDown(); + updater.writeFile(ITestHelper.streamString("new".repeat(100)), "a/new.txt"); + })); + + joinFutures(futures); + + assertEquals(4, errors.get(), "4 out of 5 renames should have failed"); + }); + + repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("3"), updater -> { + var latch = new CountDownLatch(5); + var futures = new ArrayList>(); + + for (int i = 0; i < 5; i++) { + futures.add(executor.submit(() -> { + latch.countDown(); + updater.addPath(ITestHelper.expectedRepoPath("repo15"), "repo15", OcflOption.OVERWRITE); + })); + } + + joinFutures(futures); + }); + + repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("4"), updater -> { + var root = ITestHelper.expectedRepoPath("repo17"); + var futures = new ArrayList>(); + + try (var files = Files.find(root, Integer.MAX_VALUE, (file, attrs) -> attrs.isRegularFile())) { + files.map(file -> executor.submit(() -> updater.addPath( + file, "repo17/" + FileUtil.pathToStringStandardSeparator(root.relativize(file))))) + .forEach(futures::add); + } catch (IOException e) { + throw new RuntimeException(e); + } + + joinFutures(futures); + }); + + Assertions.assertThat(repo.validateObject(objectId, true).getErrors()) + .isEmpty(); + + var outputPath1 = outputPath(repoName, objectId + "v1"); + repo.getObject(ObjectVersionId.version(objectId, 1), outputPath1); + ITestHelper.verifyDirectoryContentsSame(ITestHelper.expectedOutputPath(repoName, "o1v1"), outputPath1); + + var outputPath2 = outputPath(repoName, objectId + "v2"); + repo.getObject(ObjectVersionId.version(objectId, 2), outputPath2); + ITestHelper.verifyDirectoryContentsSame(ITestHelper.expectedOutputPath(repoName, "o1v2"), outputPath2); + + var outputPath3 = outputPath(repoName, objectId + "v3"); + repo.getObject(ObjectVersionId.version(objectId, 3), outputPath3); + ITestHelper.verifyDirectoryContentsSame(ITestHelper.expectedOutputPath(repoName, "o1v3"), outputPath3); + + var outputPath4 = outputPath(repoName, objectId + "v4"); + repo.getObject(ObjectVersionId.version(objectId, 4), outputPath4); + ITestHelper.verifyDirectoryContentsSame(ITestHelper.expectedOutputPath(repoName, "o1v4"), outputPath4); + } finally { + executor.shutdownNow(); + } + } + + private void joinFutures(List> futures) { + for (var future : futures) { + try { + future.get(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } + @Override protected void onBefore() { reposDir = UncheckedFiles.createDirectories(tempRoot.resolve("repos")); diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3OcflITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3OcflITest.java index 0a1e3c81..9b0e57cd 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3OcflITest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3OcflITest.java @@ -8,7 +8,9 @@ import com.adobe.testing.s3mock.junit5.S3MockExtension; import com.mchange.v2.c3p0.ComboPooledDataSource; +import io.ocfl.api.OcflOption; import io.ocfl.api.OcflRepository; +import io.ocfl.api.exception.OcflInputException; import io.ocfl.api.model.ObjectVersionId; import io.ocfl.api.model.VersionInfo; import io.ocfl.aws.OcflS3Client; @@ -26,15 +28,22 @@ import io.ocfl.itest.OcflITest; import java.io.ByteArrayInputStream; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.UUID; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import org.assertj.core.api.Assertions; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -234,6 +243,150 @@ public void hashedIdLayoutLongEncoded() { verifyRepo(repoName); } + // There appears to be a bug with s3mock's copy object that makes this test fail for some reason + @Test + @EnabledIfEnvironmentVariable(named = ENV_ACCESS_KEY, matches = ".+") + @EnabledIfEnvironmentVariable(named = ENV_SECRET_KEY, matches = ".+") + @EnabledIfEnvironmentVariable(named = ENV_BUCKET, matches = ".+") + public void writeToObjectConcurrently() { + var repoName = "repo18"; + var repo = defaultRepo(repoName); + + var objectId = "o1"; + + var executor = Executors.newFixedThreadPool(10); + + try { + repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("1"), updater -> { + var latch = new CountDownLatch(10); + var futures = new ArrayList>(); + + for (int i = 0; i < 5; i++) { + futures.add(executor.submit(() -> { + latch.countDown(); + updater.writeFile( + ITestHelper.streamString("file1".repeat(100)), "a/b/c/file1.txt", OcflOption.OVERWRITE); + })); + } + + for (int i = 0; i < 5; i++) { + var n = i; + futures.add(executor.submit(() -> { + latch.countDown(); + updater.writeFile( + ITestHelper.streamString(String.valueOf(n).repeat(100)), + String.format("a/b/c/d/%s.txt", n)); + })); + } + + joinFutures(futures); + }); + + repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("2"), updater -> { + var latch = new CountDownLatch(10); + var futures = new ArrayList>(); + + var errors = new AtomicInteger(); + + for (int i = 0; i < 5; i++) { + futures.add(executor.submit(() -> { + latch.countDown(); + try { + updater.renameFile("a/b/c/file1.txt", "a/b/c/file2.txt"); + } catch (OcflInputException e) { + errors.getAndIncrement(); + } + })); + } + + futures.add(executor.submit(() -> { + latch.countDown(); + updater.removeFile("a/b/c/d/0.txt"); + })); + futures.add(executor.submit(() -> { + latch.countDown(); + updater.removeFile("a/b/c/d/2.txt"); + })); + futures.add(executor.submit(() -> { + latch.countDown(); + updater.writeFile(ITestHelper.streamString("test".repeat(100)), "test.txt"); + })); + futures.add(executor.submit(() -> { + latch.countDown(); + updater.renameFile("a/b/c/d/4.txt", "a/b/c/d/1.txt", OcflOption.OVERWRITE); + })); + futures.add(executor.submit(() -> { + latch.countDown(); + updater.writeFile(ITestHelper.streamString("new".repeat(100)), "a/new.txt"); + })); + + joinFutures(futures); + + assertEquals(4, errors.get(), "4 out of 5 renames should have failed"); + }); + + repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("3"), updater -> { + var latch = new CountDownLatch(5); + var futures = new ArrayList>(); + + for (int i = 0; i < 5; i++) { + futures.add(executor.submit(() -> { + latch.countDown(); + updater.addPath(ITestHelper.expectedRepoPath("repo15"), "repo15", OcflOption.OVERWRITE); + })); + } + + joinFutures(futures); + }); + + repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo.setMessage("4"), updater -> { + var root = ITestHelper.expectedRepoPath("repo17"); + var futures = new ArrayList>(); + + try (var files = Files.find(root, Integer.MAX_VALUE, (file, attrs) -> attrs.isRegularFile())) { + files.map(file -> executor.submit(() -> updater.addPath( + file, "repo17/" + FileUtil.pathToStringStandardSeparator(root.relativize(file))))) + .forEach(futures::add); + } catch (IOException e) { + throw new RuntimeException(e); + } + + joinFutures(futures); + }); + + Assertions.assertThat(repo.validateObject(objectId, true).getErrors()) + .isEmpty(); + + var outputPath1 = outputPath(repoName, objectId + "v1"); + repo.getObject(ObjectVersionId.version(objectId, 1), outputPath1); + ITestHelper.verifyDirectoryContentsSame(ITestHelper.expectedOutputPath(repoName, "o1v1"), outputPath1); + + var outputPath2 = outputPath(repoName, objectId + "v2"); + repo.getObject(ObjectVersionId.version(objectId, 2), outputPath2); + ITestHelper.verifyDirectoryContentsSame(ITestHelper.expectedOutputPath(repoName, "o1v2"), outputPath2); + + var outputPath3 = outputPath(repoName, objectId + "v3"); + repo.getObject(ObjectVersionId.version(objectId, 3), outputPath3); + ITestHelper.verifyDirectoryContentsSame(ITestHelper.expectedOutputPath(repoName, "o1v3"), outputPath3); + + var outputPath4 = outputPath(repoName, objectId + "v4"); + repo.getObject(ObjectVersionId.version(objectId, 4), outputPath4); + ITestHelper.verifyDirectoryContentsSame(ITestHelper.expectedOutputPath(repoName, "o1v4"), outputPath4); + } finally { + executor.shutdownNow(); + } + } + + private void joinFutures(List> futures) { + for (var future : futures) { + try { + future.get(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } + @Override protected OcflRepository defaultRepo(String name, Consumer consumer) { var builder = new OcflRepositoryBuilder() diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/0.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/0.txt new file mode 100644 index 00000000..e70fee9a --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/0.txt @@ -0,0 +1 @@ +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/1.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/1.txt new file mode 100644 index 00000000..8bb6cc73 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/1.txt @@ -0,0 +1 @@ +1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/2.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/2.txt new file mode 100644 index 00000000..e5a9d5e2 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/2.txt @@ -0,0 +1 @@ +2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/3.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/3.txt new file mode 100644 index 00000000..451d6d0d --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/3.txt @@ -0,0 +1 @@ +3333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/4.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/4.txt new file mode 100644 index 00000000..11db4a97 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/d/4.txt @@ -0,0 +1 @@ +4444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/file1.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/file1.txt new file mode 100644 index 00000000..5544d839 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v1/a/b/c/file1.txt @@ -0,0 +1 @@ +file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/d/1.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/d/1.txt new file mode 100644 index 00000000..11db4a97 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/d/1.txt @@ -0,0 +1 @@ +4444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/d/3.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/d/3.txt new file mode 100644 index 00000000..451d6d0d --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/d/3.txt @@ -0,0 +1 @@ +3333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/file2.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/file2.txt new file mode 100644 index 00000000..5544d839 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/b/c/file2.txt @@ -0,0 +1 @@ +file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/new.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/new.txt new file mode 100644 index 00000000..1048ef56 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/a/new.txt @@ -0,0 +1 @@ +newnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnew \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/test.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/test.txt new file mode 100644 index 00000000..588846e7 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v2/test.txt @@ -0,0 +1 @@ +testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/d/1.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/d/1.txt new file mode 100644 index 00000000..11db4a97 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/d/1.txt @@ -0,0 +1 @@ +4444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/d/3.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/d/3.txt new file mode 100644 index 00000000..451d6d0d --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/d/3.txt @@ -0,0 +1 @@ +3333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/file2.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/file2.txt new file mode 100644 index 00000000..5544d839 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/b/c/file2.txt @@ -0,0 +1 @@ +file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/new.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/new.txt new file mode 100644 index 00000000..1048ef56 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/a/new.txt @@ -0,0 +1 @@ +newnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnew \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/0004-hashed-n-tuple-storage-layout.md b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/0004-hashed-n-tuple-storage-layout.md new file mode 100644 index 00000000..81a4dc1b --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/0004-hashed-n-tuple-storage-layout.md @@ -0,0 +1,303 @@ +# OCFL Community Extension 0004: Hashed N-tuple Storage Layout + +* **Extension Name:** 0004-hashed-n-tuple-storage-layout +* **Authors:** Peter Winckles +* **Minimum OCFL Version:** 1.0 +* **OCFL Community Extensions Version:** 1.0 +* **Obsoletes:** n/a +* **Obsoleted by:** n/a + +## Overview + +This storage root extension describes how to safely map OCFL object identifiers +of any length, containing any characters to OCFL object root directories with +the primary goals of ensuring portability and filesystem performance at the cost +of directory name transparency. + +Using this extension, OCFL object identifiers are hashed and encoded +as lowercase hex strings. These digests are then divided into _N_ +n-tuple segments, which are used to create nested paths under the OCFL +storage root. + +This approach allows OCFL object identifiers of any composition to be evenly +distributed across the storage hierarchy. The maximum number of files under any +given directory is controlled by the number of characters in each n-tuple, and +the tree depth is controlled by the number of n-tuple segments each digest is +divided into. Additionally, it obviates the need to handle special characters in +OCFL object identifiers because the mapped directory names will only ever +contain the characters `0-9a-f`. + +However, this comes at the cost of not being able to identify the OCFL object +identifier of an object simply by browsing the OCFL storage hierarchy. The ID of +an object may only be found within its `inventory.json`. + +## Parameters + +### Summary + +* **Name:** `digestAlgorithm` + * **Description:** The digest algorithm to apply on the OCFL object + identifier; MUST be an algorithm that is allowed in the OCFL fixity block + * **Type:** string + * **Constraints:** Must not be empty + * **Default:** sha256 +* **Name**: `tupleSize` + * **Description:** Indicates the segment size (in characters) to split the + digest is split into + * **Type:** number + * **Constraints:** An integer between 0 and 32 inclusive + * **Default:** 3 +* **Name:** `numberOfTuples` + * **Description:** Indicates the number of segments to use for path generation + * **Type:** number + * **Constraints:** An integer between 0 and 32 inclusive + * **Default:** 3 +* **Name:** `shortObjectRoot` + * **Description:** When true, indicates that the OCFL object root directory + name should contain the remainder of the digest not used in the n-tuple + segments + * **Type:** boolean + * **Default:** false + +### Details + +#### digestAlgorithm + +`digestAlgorithm` is defaulted to `sha256`, and it MUST either contain a digest +algorithm that's [officially supported by the OCFL +specification](https://ocfl.io/1.0/spec/#digest-algorithms) or defined in a community +extension. The specified algorithm is applied to OCFL object identifiers to +produce hex encoded digest values that are then mapped to OCFL object root +paths. + +#### tupleSize + +`tupleSize` determines the number of digest characters to include in +each tuple. The tuples are used as directory names. The default value +is `3`, which means that each intermediate directory in the OCFL +storage hierarchy could contain up to 4096 sub-directories. Increasing +this value increases the maximum number of sub-directories per +directory. + +If `tupleSize` is set to `0`, then no tuples are created and `numberOfTuples` +MUST also equal `0`. + +The product of `tupleSize` and `numberOfTuples` MUST be less than or equal to +the number of characters in the hex encoded digest. + +#### numberOfTuples + +`numberOfTuples` determines how many tuples to create from the digest. The +tuples are used as directory names, and each successive directory is nested +within the previous. The default value is `3`, which means that every OCFL +object root will be 4 directories removed from the OCFL storage root, 3 tuple +directories plus 1 encapsulation directory. Increasing this value increases the +depth of the OCFL storage hierarchy. + +If `numberOfTuples` is set to `0`, then no tuples are created and `tupleSize` +MUST also equal `0`. + +The product of `numberOfTuples` and `tupleSize` MUST be less than or equal to +the number of characters in the hex encoded digest. + +#### shortObjectRoot + +The directory that immediately encapsulates an OCFL object MUST either be named +using the entire digest or the remainder of the digest that was not used in a +tuple. When `shortObjectRoot` is set to `false`, the default, the entire digest +is used, and, when it's `true` only the previously unused remainder is used. + +If the product of `tupleSize` and `numberOfTuples` is equal to the number of +characters in the hex encoded digest, then `shortObjectRoot` MUST be `false`. + +## Procedure + +The following is an outline of the steps to map an OCFL object identifier to an +OCFL object root path: + +1. The OCFL object identifier, UTF-8 encoded, is hashed using the specified + `digestAlgorithm`. +2. The digest is encoded as a lowercase hex string. +3. Starting at the beginning of the digest and working forwards, the digest is + divided into `numberOfTuples` tuples each containing `tupleSize` characters. +4. The tuples are joined, in order, using the filesystem path separator. +5. If `shortObjectRoot` is `true`, the remaining, unused portion of the digest + is joined on the end of this path. Otherwise, the entire digest is joined on + the end. + +## Examples + +### Example 1 + +This example demonstrates what the OCFL storage hierarchy looks like when using +the default configuration. + +#### Parameters + +It is not necessary to specify any parameters to use the default configuration. +However, if you were to do so, it would look like the following: + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "sha256", + "tupleSize": 3, + "numberOfTuples": 3, + "shortObjectRoot": false +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4 | `3c0/ff4/240/3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4` | +| ..hor/rib:le-$id | 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d | `487/326/d8c/487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 3c0/ +│ └── ff4/ +│ └── 240/ +│ └── 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── 487/ + └── 326/ + └── d8c/ + └── 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` + +### Example 2 + +This example demonstrates the effects of modifying the default parameters to use +a different `digestAlgoirthm`, smaller `tupleSize`, and a larger +`numberOfTuples`. + +#### Parameters + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "md5", + "tupleSize": 2, + "numberOfTuples": 15, + "shortObjectRoot": true +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | ff75534492485eabb39f86356728884e | `ff/75/53/44/92/48/5e/ab/b3/9f/86/35/67/28/88/4e` | +| ..hor/rib:le-$id | 08319766fb6c2935dd175b94267717e0 | `08/31/97/66/fb/6c/29/35/dd/17/5b/94/26/77/17/e0` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 08/ +│ └── 31/ +│ └── 97/ +│ └── 66/ +│ └── fb/ +│ └── 6c/ +│ └── 29/ +│ └── 35/ +│ └── dd/ +│ └── 17/ +│ └── 5b/ +│ └── 94/ +│ └── 26/ +│ └── 77/ +│ └── 17/ +│ └── e0/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── ff/ + └── 75/ + └── 53/ + └── 44/ + └── 92/ + └── 48/ + └── 5e/ + └── ab/ + └── b3/ + └── 9f/ + └── 86/ + └── 35/ + └── 67/ + └── 28/ + └── 88/ + └── 4e/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` + +### Example 3 + +This example demonstrates what happens when `tupleSize` and `numberOfTuples` are +set to `0`. This is an edge case and not a recommended configuration. + +#### Parameters + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "sha256", + "tupleSize": 0, + "numberOfTuples": 0, + "shortObjectRoot": false +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4 | `3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4` | +| ..hor/rib:le-$id | 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d | `487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/0=ocfl_1.1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/0=ocfl_1.1 new file mode 100644 index 00000000..0deb99e4 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/0=ocfl_1.1 @@ -0,0 +1 @@ +ocfl_1.1 diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 new file mode 100644 index 00000000..14705cb1 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 @@ -0,0 +1 @@ +ocfl_object_1.1 diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json new file mode 100644 index 00000000..e90da6af --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json @@ -0,0 +1,24 @@ +{ + "id" : "o1", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v1", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "96a26e7629b55187f9ba3edc4acc940495d582093b8a88cb1f0303cf3399fe6b1f5283d76dfd561fc401a0cdf878c5aad9f2d6e7e2d9ceee678757bb5d95c39e" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "96a26e7629b55187f9ba3edc4acc940495d582093b8a88cb1f0303cf3399fe6b1f5283d76dfd561fc401a0cdf878c5aad9f2d6e7e2d9ceee678757bb5d95c39e" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 new file mode 100644 index 00000000..2658ea72 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 @@ -0,0 +1 @@ +40a7deef92d370a4d8cd797bc4d2d021be4aae4deeaaa272215a3a02e186ec4bb2de1e6250a2df613040bfcdb1e1e1b064b77190c1932bf3d8bca772c9cbdefa inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/content/file1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/content/file1 new file mode 100644 index 00000000..663554bf --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/content/file1 @@ -0,0 +1 @@ +Test file 1 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json new file mode 100644 index 00000000..e90da6af --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json @@ -0,0 +1,24 @@ +{ + "id" : "o1", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v1", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "96a26e7629b55187f9ba3edc4acc940495d582093b8a88cb1f0303cf3399fe6b1f5283d76dfd561fc401a0cdf878c5aad9f2d6e7e2d9ceee678757bb5d95c39e" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "96a26e7629b55187f9ba3edc4acc940495d582093b8a88cb1f0303cf3399fe6b1f5283d76dfd561fc401a0cdf878c5aad9f2d6e7e2d9ceee678757bb5d95c39e" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 new file mode 100644 index 00000000..2658ea72 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 @@ -0,0 +1 @@ +40a7deef92d370a4d8cd797bc4d2d021be4aae4deeaaa272215a3a02e186ec4bb2de1e6250a2df613040bfcdb1e1e1b064b77190c1932bf3d8bca772c9cbdefa inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/extensions/0004-hashed-n-tuple-storage-layout/config.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/extensions/0004-hashed-n-tuple-storage-layout/config.json new file mode 100644 index 00000000..4644b116 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/extensions/0004-hashed-n-tuple-storage-layout/config.json @@ -0,0 +1,7 @@ +{ + "digestAlgorithm" : "sha256", + "tupleSize" : 3, + "numberOfTuples" : 3, + "shortObjectRoot" : false, + "extensionName" : "0004-hashed-n-tuple-storage-layout" +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/ocfl_extensions_1.0.md b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/ocfl_extensions_1.0.md new file mode 100644 index 00000000..23582668 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/ocfl_extensions_1.0.md @@ -0,0 +1,118 @@ +# OCFL Community Extensions + +**Version**: 1.0 + +This repository contains community extensions to the [OCFL Specification and Implementation Notes](https://ocfl.io/). Extensions are a means of adding new functionality and documenting standards outside of the main OCFL specification process. For example, storage layout extensions define how OCFL object IDs are mapped to OCFL object root directories within an OCFL storage root. This mapping is outside of the scope of the OCFL specification, but is valuable information to capture so that repositories are self-describing and easily accessible using generic OCFL tooling. + +This is a community driven repository. Community members are encouraged to contribute by submitting new extensions and reviewing others' submissions. For more details, see the [review/merge policy](#review--merge-policy) below. + +See the current set of [adopted extensions](https://ocfl.github.io/extensions/) and [extensions open for review and discussion](https://github.com/OCFL/extensions/pulls). + +## Using Community Extensions + +To use OCFL extensions you first need an OCFL client that supports the desired extensions. OCFL clients are not required to support extensions to be compliant with the OCFL specification, and the extensions that any given client supports will vary. The idea behind this repository is to encourage the development and implementation of common extensions so that there can be interoperability between OCFL clients. + +## Implementing Community Extensions + +Reference the OCFL specification's description of [object extensions](https://ocfl.io/1.0/spec/#object-extensions) and [storage root extensions](https://ocfl.io/1.0/spec/#storage-root-extensions). + +The OCFL storage root MAY contain a copy of an extension's specification. + +Each extension specification details how it should be implemented, but there are a few rules that apply to every extension. + +A *root extension directory* refers to the directory named `extensions` that is located in either the storage root or an object root. An *extension directory* is an extension specific directory that is the child of a root extension directory and MUST be named using the extension's *Registered Name*, or `initial` (see [Optional Initial Extension](#optional-initial-extension)). For example, `extensions/0000-example-extension` is the extension directory for the extension [0000-example-extension](docs/0000-example-extension.md). + +### Configuration Files + +An extension's parameters are serialized as a JSON object and written to a configuration file named `config.json` within the extension's extension directory. + +If an extension includes a configuration file, one of the properties in that file MUST be `extensionName`, where the value is the *Registered Name* of the extension. + +For example, the extension [0000-example-extension](docs/0000-example-extension.md) could be parameterized as follows: + +```json +{ + "extensionName": "0000-example-extension", + "firstExampleParameter": 12, + "secondExampleParameter": "Hello", + "thirdExampleParameter": "Green" +} +``` + +Based on how the extension is used, its configuration file is written to one of the following locations, relative the storage root: + +* `extensions/0000-example-extension/config.json`, if it is a [storage root extension](https://ocfl.io/1.0/spec/#storage-root-extensions) +* `OBJECT_ROOT/extensions/0000-example-extension/config.json`, if it is an [object extension](https://ocfl.io/1.0/spec/#object-extensions) + +### Undefined Behavior + +It is conceivable that some extensions may not be compatible with other extensions, or may be rendered incompatible based on how they're implemented in a client. For example, suppose that there are multiple extensions that define how logs should be written to an object's log directory. You could declare that your objects are using multiple log extensions, but the result is undefined and up to the implementing client. It may only write one log format or the other, it may write all of them, or it may reject the configuration entirely. + +Because OCFL clients are not required to implement any or all extensions, it is also possible that a client may encounter an extension that it does not implement. In these cases, it is up to the client to decide how to proceed. A client may fail on unsupported extensions, or it may choose to ignore the extensions and carry on. + +### Optional Initial Extension + +A _root extension directory_ MAY optionally contain an _initial_ extension that, if it exists, SHOULD be applied before all other extensions in the directory. +An _initial extension_ is identified by the extension directory name "initial". + +An _initial extension_ could be used to address some of the [undefined behaviors](#undefined-behavior), define how extensions are applied, and answer questions such as: + +- Is an extension deactivated, only applying to earlier versions of the object? +- Should extensions be applied in a specific order? +- Does one extension depend on another? + +## Specifying Community Extensions + +### Layout + +Community extensions MUST be written as GitHub flavored markdown files in the `docs` directory of this repository. The +filename of an extension is based on its *Registered Name* with a `.md` extension. + +Extensions are numbered sequentially, and the *Registered Name* of an extension is prefixed with this 4-digit, zero-padded +decimal number. The *Registered Name* should be descriptive, use hyphens to separate words, and have a maximum of 250 +characters in total. + +New extensions should use `NNNN` as a place-holder for the next available prefix number at the time of merging. New extension pull-requests should not update the index document (`docs/index.md`), this will be done post-approval. + +Extensions are intended to be mostly static once published. Substantial revisions of content beyond simple fixes warrants publishing a new extension, and marking the old extension obsolete by updating the *Obsoletes/Obsoleted by* sections in each extension respectively. + +An example/template is available in this repository as "[OCFL Community Extension 0000: Example Extension](docs/0000-example-extension.md)" and is rendered +via GitHub pages as https://ocfl.github.io/extensions/0000-example-extension + +### Headers + +Extension definitions MUST contain a header section that defines the following fields: + +* **Extension Name**: The extension's unique *Registered Name* +* **Authors**: The names of the individuals who authored the extension +* **Minimum OCFL Version**: The minimum OCFL version that the extension requires, eg. *1.0* +* **OCFL Community Extensions Version**: The version of the OCFL Extensions Specification that the extension conforms to, eg. *1.0* +* **Obsoletes**: The *Registered Name* of the extension that this extension obsoletes, or *n/a* +* **Obsoleted by**: The *Registered Name* of the extension that obsoletes this extension, or *n/a* + +### Parameters + +Extension definitions MAY define parameters to enable configuration as needed. Extension parameters are serialized as JSON values, and therefore must conform to the [JSON specification](https://tools.ietf.org/html/rfc8259). Parameters MUST be defined in the following structure: + +* **Name**: A short, descriptive name for the parameter. The name is used as the parameter's key within its JSON representation. + * **Description**: A brief description of the function of the parameter. This should be expanded on in the main description of the extension which MUST reference all the parameters. + * **Type**: The JSON data type of the parameter value. One of `string`, `number`, `boolean`, `array`, or `object`. The structure of complex types MUST be further described. + * **Constraints**: A description of any constraints to apply to parameter values. Constraints may be plain text, regular expressions, [JSON Schema](https://www.ietf.org/archive/id/draft-handrews-json-schema-02.txt), or whatever makes the most sense for the extension. + * **Default**: The default value of parameter. If no default is specified, then the parameter is mandatory. + +### Body + +Each specification MUST thoroughly document how it is intended to be implemented and used, including detailed examples is helpful. If the extension uses parameters, the parameters MUST be described in detail in the body of the specification. + +## Review / Merge Policy + +1. A pull-request is submitted per the guidelines described in the "[Organization of this repository](https://github.com/OCFL/extensions#organization-of-this-repository)" section of this document +1. Authors of (legitimate) pull-requests will be added by an owner of the OCFL GitHub organization to the [extension-authors](https://github.com/orgs/OCFL/teams/extension-authors) team + - The purpose of being added to this team is to enable adding `labels` to their pull-request(s) +1. If a pull-request is submitted in order to facilitate discussion, the `draft` label should be applied by the author +1. If a pull-request is ready for review, it should have a title that is suitable for merge (i.e. not have a title indicating "draft"), and optionally have the `in-review` label applied by the author +1. A pull-request must be merged by an OCFL Editor if the following criteria are met: + 1. At least two OCFL Editors have "[Approved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/approving-a-pull-request-with-required-reviews)" the pull-request + 1. At least one other community member has "[Approved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/approving-a-pull-request-with-required-reviews)" the pull-request + 1. The approvers represent three distinct organizations +1. After the pull-request has been merged with `NNNN` as a placeholder for the extension number in the _Registered Name_, an OCFL Editor will determine the extension number based on the next sequentially available number. They will create an additional administrative pull-request to change `NNNN` to the appropriate number in the extension file name and the extension document itself, as well as adding an entry to the index page entry (`docs/index.md`). \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/ocfl_layout.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/ocfl_layout.json new file mode 100644 index 00000000..e2e09e8f --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/repo15/ocfl_layout.json @@ -0,0 +1,4 @@ +{ + "extension" : "0004-hashed-n-tuple-storage-layout", + "description" : "OCFL object identifiers are hashed and encoded as lowercase hex strings. These digests are then divided into N n-tuple segments, which are used to create nested paths under the OCFL storage root." +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/test.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/test.txt new file mode 100644 index 00000000..588846e7 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v3/test.txt @@ -0,0 +1 @@ +testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/d/1.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/d/1.txt new file mode 100644 index 00000000..11db4a97 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/d/1.txt @@ -0,0 +1 @@ +4444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/d/3.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/d/3.txt new file mode 100644 index 00000000..451d6d0d --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/d/3.txt @@ -0,0 +1 @@ +3333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/file2.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/file2.txt new file mode 100644 index 00000000..5544d839 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/b/c/file2.txt @@ -0,0 +1 @@ +file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1file1 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/new.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/new.txt new file mode 100644 index 00000000..1048ef56 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/a/new.txt @@ -0,0 +1 @@ +newnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnewnew \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/0004-hashed-n-tuple-storage-layout.md b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/0004-hashed-n-tuple-storage-layout.md new file mode 100644 index 00000000..81a4dc1b --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/0004-hashed-n-tuple-storage-layout.md @@ -0,0 +1,303 @@ +# OCFL Community Extension 0004: Hashed N-tuple Storage Layout + +* **Extension Name:** 0004-hashed-n-tuple-storage-layout +* **Authors:** Peter Winckles +* **Minimum OCFL Version:** 1.0 +* **OCFL Community Extensions Version:** 1.0 +* **Obsoletes:** n/a +* **Obsoleted by:** n/a + +## Overview + +This storage root extension describes how to safely map OCFL object identifiers +of any length, containing any characters to OCFL object root directories with +the primary goals of ensuring portability and filesystem performance at the cost +of directory name transparency. + +Using this extension, OCFL object identifiers are hashed and encoded +as lowercase hex strings. These digests are then divided into _N_ +n-tuple segments, which are used to create nested paths under the OCFL +storage root. + +This approach allows OCFL object identifiers of any composition to be evenly +distributed across the storage hierarchy. The maximum number of files under any +given directory is controlled by the number of characters in each n-tuple, and +the tree depth is controlled by the number of n-tuple segments each digest is +divided into. Additionally, it obviates the need to handle special characters in +OCFL object identifiers because the mapped directory names will only ever +contain the characters `0-9a-f`. + +However, this comes at the cost of not being able to identify the OCFL object +identifier of an object simply by browsing the OCFL storage hierarchy. The ID of +an object may only be found within its `inventory.json`. + +## Parameters + +### Summary + +* **Name:** `digestAlgorithm` + * **Description:** The digest algorithm to apply on the OCFL object + identifier; MUST be an algorithm that is allowed in the OCFL fixity block + * **Type:** string + * **Constraints:** Must not be empty + * **Default:** sha256 +* **Name**: `tupleSize` + * **Description:** Indicates the segment size (in characters) to split the + digest is split into + * **Type:** number + * **Constraints:** An integer between 0 and 32 inclusive + * **Default:** 3 +* **Name:** `numberOfTuples` + * **Description:** Indicates the number of segments to use for path generation + * **Type:** number + * **Constraints:** An integer between 0 and 32 inclusive + * **Default:** 3 +* **Name:** `shortObjectRoot` + * **Description:** When true, indicates that the OCFL object root directory + name should contain the remainder of the digest not used in the n-tuple + segments + * **Type:** boolean + * **Default:** false + +### Details + +#### digestAlgorithm + +`digestAlgorithm` is defaulted to `sha256`, and it MUST either contain a digest +algorithm that's [officially supported by the OCFL +specification](https://ocfl.io/1.0/spec/#digest-algorithms) or defined in a community +extension. The specified algorithm is applied to OCFL object identifiers to +produce hex encoded digest values that are then mapped to OCFL object root +paths. + +#### tupleSize + +`tupleSize` determines the number of digest characters to include in +each tuple. The tuples are used as directory names. The default value +is `3`, which means that each intermediate directory in the OCFL +storage hierarchy could contain up to 4096 sub-directories. Increasing +this value increases the maximum number of sub-directories per +directory. + +If `tupleSize` is set to `0`, then no tuples are created and `numberOfTuples` +MUST also equal `0`. + +The product of `tupleSize` and `numberOfTuples` MUST be less than or equal to +the number of characters in the hex encoded digest. + +#### numberOfTuples + +`numberOfTuples` determines how many tuples to create from the digest. The +tuples are used as directory names, and each successive directory is nested +within the previous. The default value is `3`, which means that every OCFL +object root will be 4 directories removed from the OCFL storage root, 3 tuple +directories plus 1 encapsulation directory. Increasing this value increases the +depth of the OCFL storage hierarchy. + +If `numberOfTuples` is set to `0`, then no tuples are created and `tupleSize` +MUST also equal `0`. + +The product of `numberOfTuples` and `tupleSize` MUST be less than or equal to +the number of characters in the hex encoded digest. + +#### shortObjectRoot + +The directory that immediately encapsulates an OCFL object MUST either be named +using the entire digest or the remainder of the digest that was not used in a +tuple. When `shortObjectRoot` is set to `false`, the default, the entire digest +is used, and, when it's `true` only the previously unused remainder is used. + +If the product of `tupleSize` and `numberOfTuples` is equal to the number of +characters in the hex encoded digest, then `shortObjectRoot` MUST be `false`. + +## Procedure + +The following is an outline of the steps to map an OCFL object identifier to an +OCFL object root path: + +1. The OCFL object identifier, UTF-8 encoded, is hashed using the specified + `digestAlgorithm`. +2. The digest is encoded as a lowercase hex string. +3. Starting at the beginning of the digest and working forwards, the digest is + divided into `numberOfTuples` tuples each containing `tupleSize` characters. +4. The tuples are joined, in order, using the filesystem path separator. +5. If `shortObjectRoot` is `true`, the remaining, unused portion of the digest + is joined on the end of this path. Otherwise, the entire digest is joined on + the end. + +## Examples + +### Example 1 + +This example demonstrates what the OCFL storage hierarchy looks like when using +the default configuration. + +#### Parameters + +It is not necessary to specify any parameters to use the default configuration. +However, if you were to do so, it would look like the following: + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "sha256", + "tupleSize": 3, + "numberOfTuples": 3, + "shortObjectRoot": false +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4 | `3c0/ff4/240/3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4` | +| ..hor/rib:le-$id | 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d | `487/326/d8c/487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 3c0/ +│ └── ff4/ +│ └── 240/ +│ └── 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── 487/ + └── 326/ + └── d8c/ + └── 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` + +### Example 2 + +This example demonstrates the effects of modifying the default parameters to use +a different `digestAlgoirthm`, smaller `tupleSize`, and a larger +`numberOfTuples`. + +#### Parameters + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "md5", + "tupleSize": 2, + "numberOfTuples": 15, + "shortObjectRoot": true +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | ff75534492485eabb39f86356728884e | `ff/75/53/44/92/48/5e/ab/b3/9f/86/35/67/28/88/4e` | +| ..hor/rib:le-$id | 08319766fb6c2935dd175b94267717e0 | `08/31/97/66/fb/6c/29/35/dd/17/5b/94/26/77/17/e0` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 08/ +│ └── 31/ +│ └── 97/ +│ └── 66/ +│ └── fb/ +│ └── 6c/ +│ └── 29/ +│ └── 35/ +│ └── dd/ +│ └── 17/ +│ └── 5b/ +│ └── 94/ +│ └── 26/ +│ └── 77/ +│ └── 17/ +│ └── e0/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── ff/ + └── 75/ + └── 53/ + └── 44/ + └── 92/ + └── 48/ + └── 5e/ + └── ab/ + └── b3/ + └── 9f/ + └── 86/ + └── 35/ + └── 67/ + └── 28/ + └── 88/ + └── 4e/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` + +### Example 3 + +This example demonstrates what happens when `tupleSize` and `numberOfTuples` are +set to `0`. This is an edge case and not a recommended configuration. + +#### Parameters + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "sha256", + "tupleSize": 0, + "numberOfTuples": 0, + "shortObjectRoot": false +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4 | `3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4` | +| ..hor/rib:le-$id | 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d | `487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/0=ocfl_1.1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/0=ocfl_1.1 new file mode 100644 index 00000000..0deb99e4 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/0=ocfl_1.1 @@ -0,0 +1 @@ +ocfl_1.1 diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 new file mode 100644 index 00000000..14705cb1 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/0=ocfl_object_1.1 @@ -0,0 +1 @@ +ocfl_object_1.1 diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json new file mode 100644 index 00000000..e90da6af --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json @@ -0,0 +1,24 @@ +{ + "id" : "o1", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v1", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "96a26e7629b55187f9ba3edc4acc940495d582093b8a88cb1f0303cf3399fe6b1f5283d76dfd561fc401a0cdf878c5aad9f2d6e7e2d9ceee678757bb5d95c39e" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "96a26e7629b55187f9ba3edc4acc940495d582093b8a88cb1f0303cf3399fe6b1f5283d76dfd561fc401a0cdf878c5aad9f2d6e7e2d9ceee678757bb5d95c39e" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 new file mode 100644 index 00000000..2658ea72 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/inventory.json.sha512 @@ -0,0 +1 @@ +40a7deef92d370a4d8cd797bc4d2d021be4aae4deeaaa272215a3a02e186ec4bb2de1e6250a2df613040bfcdb1e1e1b064b77190c1932bf3d8bca772c9cbdefa inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/content/file1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/content/file1 new file mode 100644 index 00000000..663554bf --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/content/file1 @@ -0,0 +1 @@ +Test file 1 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json new file mode 100644 index 00000000..e90da6af --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json @@ -0,0 +1,24 @@ +{ + "id" : "o1", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v1", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "96a26e7629b55187f9ba3edc4acc940495d582093b8a88cb1f0303cf3399fe6b1f5283d76dfd561fc401a0cdf878c5aad9f2d6e7e2d9ceee678757bb5d95c39e" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "96a26e7629b55187f9ba3edc4acc940495d582093b8a88cb1f0303cf3399fe6b1f5283d76dfd561fc401a0cdf878c5aad9f2d6e7e2d9ceee678757bb5d95c39e" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 new file mode 100644 index 00000000..2658ea72 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/235/2da/728/2352da7280f1decc3acf1ba84eb945c9fc2b7b541094e1d0992dbffd1b6664cc/v1/inventory.json.sha512 @@ -0,0 +1 @@ +40a7deef92d370a4d8cd797bc4d2d021be4aae4deeaaa272215a3a02e186ec4bb2de1e6250a2df613040bfcdb1e1e1b064b77190c1932bf3d8bca772c9cbdefa inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/extensions/0004-hashed-n-tuple-storage-layout/config.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/extensions/0004-hashed-n-tuple-storage-layout/config.json new file mode 100644 index 00000000..4644b116 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/extensions/0004-hashed-n-tuple-storage-layout/config.json @@ -0,0 +1,7 @@ +{ + "digestAlgorithm" : "sha256", + "tupleSize" : 3, + "numberOfTuples" : 3, + "shortObjectRoot" : false, + "extensionName" : "0004-hashed-n-tuple-storage-layout" +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/ocfl_extensions_1.0.md b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/ocfl_extensions_1.0.md new file mode 100644 index 00000000..23582668 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/ocfl_extensions_1.0.md @@ -0,0 +1,118 @@ +# OCFL Community Extensions + +**Version**: 1.0 + +This repository contains community extensions to the [OCFL Specification and Implementation Notes](https://ocfl.io/). Extensions are a means of adding new functionality and documenting standards outside of the main OCFL specification process. For example, storage layout extensions define how OCFL object IDs are mapped to OCFL object root directories within an OCFL storage root. This mapping is outside of the scope of the OCFL specification, but is valuable information to capture so that repositories are self-describing and easily accessible using generic OCFL tooling. + +This is a community driven repository. Community members are encouraged to contribute by submitting new extensions and reviewing others' submissions. For more details, see the [review/merge policy](#review--merge-policy) below. + +See the current set of [adopted extensions](https://ocfl.github.io/extensions/) and [extensions open for review and discussion](https://github.com/OCFL/extensions/pulls). + +## Using Community Extensions + +To use OCFL extensions you first need an OCFL client that supports the desired extensions. OCFL clients are not required to support extensions to be compliant with the OCFL specification, and the extensions that any given client supports will vary. The idea behind this repository is to encourage the development and implementation of common extensions so that there can be interoperability between OCFL clients. + +## Implementing Community Extensions + +Reference the OCFL specification's description of [object extensions](https://ocfl.io/1.0/spec/#object-extensions) and [storage root extensions](https://ocfl.io/1.0/spec/#storage-root-extensions). + +The OCFL storage root MAY contain a copy of an extension's specification. + +Each extension specification details how it should be implemented, but there are a few rules that apply to every extension. + +A *root extension directory* refers to the directory named `extensions` that is located in either the storage root or an object root. An *extension directory* is an extension specific directory that is the child of a root extension directory and MUST be named using the extension's *Registered Name*, or `initial` (see [Optional Initial Extension](#optional-initial-extension)). For example, `extensions/0000-example-extension` is the extension directory for the extension [0000-example-extension](docs/0000-example-extension.md). + +### Configuration Files + +An extension's parameters are serialized as a JSON object and written to a configuration file named `config.json` within the extension's extension directory. + +If an extension includes a configuration file, one of the properties in that file MUST be `extensionName`, where the value is the *Registered Name* of the extension. + +For example, the extension [0000-example-extension](docs/0000-example-extension.md) could be parameterized as follows: + +```json +{ + "extensionName": "0000-example-extension", + "firstExampleParameter": 12, + "secondExampleParameter": "Hello", + "thirdExampleParameter": "Green" +} +``` + +Based on how the extension is used, its configuration file is written to one of the following locations, relative the storage root: + +* `extensions/0000-example-extension/config.json`, if it is a [storage root extension](https://ocfl.io/1.0/spec/#storage-root-extensions) +* `OBJECT_ROOT/extensions/0000-example-extension/config.json`, if it is an [object extension](https://ocfl.io/1.0/spec/#object-extensions) + +### Undefined Behavior + +It is conceivable that some extensions may not be compatible with other extensions, or may be rendered incompatible based on how they're implemented in a client. For example, suppose that there are multiple extensions that define how logs should be written to an object's log directory. You could declare that your objects are using multiple log extensions, but the result is undefined and up to the implementing client. It may only write one log format or the other, it may write all of them, or it may reject the configuration entirely. + +Because OCFL clients are not required to implement any or all extensions, it is also possible that a client may encounter an extension that it does not implement. In these cases, it is up to the client to decide how to proceed. A client may fail on unsupported extensions, or it may choose to ignore the extensions and carry on. + +### Optional Initial Extension + +A _root extension directory_ MAY optionally contain an _initial_ extension that, if it exists, SHOULD be applied before all other extensions in the directory. +An _initial extension_ is identified by the extension directory name "initial". + +An _initial extension_ could be used to address some of the [undefined behaviors](#undefined-behavior), define how extensions are applied, and answer questions such as: + +- Is an extension deactivated, only applying to earlier versions of the object? +- Should extensions be applied in a specific order? +- Does one extension depend on another? + +## Specifying Community Extensions + +### Layout + +Community extensions MUST be written as GitHub flavored markdown files in the `docs` directory of this repository. The +filename of an extension is based on its *Registered Name* with a `.md` extension. + +Extensions are numbered sequentially, and the *Registered Name* of an extension is prefixed with this 4-digit, zero-padded +decimal number. The *Registered Name* should be descriptive, use hyphens to separate words, and have a maximum of 250 +characters in total. + +New extensions should use `NNNN` as a place-holder for the next available prefix number at the time of merging. New extension pull-requests should not update the index document (`docs/index.md`), this will be done post-approval. + +Extensions are intended to be mostly static once published. Substantial revisions of content beyond simple fixes warrants publishing a new extension, and marking the old extension obsolete by updating the *Obsoletes/Obsoleted by* sections in each extension respectively. + +An example/template is available in this repository as "[OCFL Community Extension 0000: Example Extension](docs/0000-example-extension.md)" and is rendered +via GitHub pages as https://ocfl.github.io/extensions/0000-example-extension + +### Headers + +Extension definitions MUST contain a header section that defines the following fields: + +* **Extension Name**: The extension's unique *Registered Name* +* **Authors**: The names of the individuals who authored the extension +* **Minimum OCFL Version**: The minimum OCFL version that the extension requires, eg. *1.0* +* **OCFL Community Extensions Version**: The version of the OCFL Extensions Specification that the extension conforms to, eg. *1.0* +* **Obsoletes**: The *Registered Name* of the extension that this extension obsoletes, or *n/a* +* **Obsoleted by**: The *Registered Name* of the extension that obsoletes this extension, or *n/a* + +### Parameters + +Extension definitions MAY define parameters to enable configuration as needed. Extension parameters are serialized as JSON values, and therefore must conform to the [JSON specification](https://tools.ietf.org/html/rfc8259). Parameters MUST be defined in the following structure: + +* **Name**: A short, descriptive name for the parameter. The name is used as the parameter's key within its JSON representation. + * **Description**: A brief description of the function of the parameter. This should be expanded on in the main description of the extension which MUST reference all the parameters. + * **Type**: The JSON data type of the parameter value. One of `string`, `number`, `boolean`, `array`, or `object`. The structure of complex types MUST be further described. + * **Constraints**: A description of any constraints to apply to parameter values. Constraints may be plain text, regular expressions, [JSON Schema](https://www.ietf.org/archive/id/draft-handrews-json-schema-02.txt), or whatever makes the most sense for the extension. + * **Default**: The default value of parameter. If no default is specified, then the parameter is mandatory. + +### Body + +Each specification MUST thoroughly document how it is intended to be implemented and used, including detailed examples is helpful. If the extension uses parameters, the parameters MUST be described in detail in the body of the specification. + +## Review / Merge Policy + +1. A pull-request is submitted per the guidelines described in the "[Organization of this repository](https://github.com/OCFL/extensions#organization-of-this-repository)" section of this document +1. Authors of (legitimate) pull-requests will be added by an owner of the OCFL GitHub organization to the [extension-authors](https://github.com/orgs/OCFL/teams/extension-authors) team + - The purpose of being added to this team is to enable adding `labels` to their pull-request(s) +1. If a pull-request is submitted in order to facilitate discussion, the `draft` label should be applied by the author +1. If a pull-request is ready for review, it should have a title that is suitable for merge (i.e. not have a title indicating "draft"), and optionally have the `in-review` label applied by the author +1. A pull-request must be merged by an OCFL Editor if the following criteria are met: + 1. At least two OCFL Editors have "[Approved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/approving-a-pull-request-with-required-reviews)" the pull-request + 1. At least one other community member has "[Approved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/approving-a-pull-request-with-required-reviews)" the pull-request + 1. The approvers represent three distinct organizations +1. After the pull-request has been merged with `NNNN` as a placeholder for the extension number in the _Registered Name_, an OCFL Editor will determine the extension number based on the next sequentially available number. They will create an additional administrative pull-request to change `NNNN` to the appropriate number in the extension file name and the extension document itself, as well as adding an entry to the index page entry (`docs/index.md`). \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/ocfl_layout.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/ocfl_layout.json new file mode 100644 index 00000000..e2e09e8f --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo15/ocfl_layout.json @@ -0,0 +1,4 @@ +{ + "extension" : "0004-hashed-n-tuple-storage-layout", + "description" : "OCFL object identifiers are hashed and encoded as lowercase hex strings. These digests are then divided into N n-tuple segments, which are used to create nested paths under the OCFL storage root." +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/0004-hashed-n-tuple-storage-layout.md b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/0004-hashed-n-tuple-storage-layout.md new file mode 100644 index 00000000..81a4dc1b --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/0004-hashed-n-tuple-storage-layout.md @@ -0,0 +1,303 @@ +# OCFL Community Extension 0004: Hashed N-tuple Storage Layout + +* **Extension Name:** 0004-hashed-n-tuple-storage-layout +* **Authors:** Peter Winckles +* **Minimum OCFL Version:** 1.0 +* **OCFL Community Extensions Version:** 1.0 +* **Obsoletes:** n/a +* **Obsoleted by:** n/a + +## Overview + +This storage root extension describes how to safely map OCFL object identifiers +of any length, containing any characters to OCFL object root directories with +the primary goals of ensuring portability and filesystem performance at the cost +of directory name transparency. + +Using this extension, OCFL object identifiers are hashed and encoded +as lowercase hex strings. These digests are then divided into _N_ +n-tuple segments, which are used to create nested paths under the OCFL +storage root. + +This approach allows OCFL object identifiers of any composition to be evenly +distributed across the storage hierarchy. The maximum number of files under any +given directory is controlled by the number of characters in each n-tuple, and +the tree depth is controlled by the number of n-tuple segments each digest is +divided into. Additionally, it obviates the need to handle special characters in +OCFL object identifiers because the mapped directory names will only ever +contain the characters `0-9a-f`. + +However, this comes at the cost of not being able to identify the OCFL object +identifier of an object simply by browsing the OCFL storage hierarchy. The ID of +an object may only be found within its `inventory.json`. + +## Parameters + +### Summary + +* **Name:** `digestAlgorithm` + * **Description:** The digest algorithm to apply on the OCFL object + identifier; MUST be an algorithm that is allowed in the OCFL fixity block + * **Type:** string + * **Constraints:** Must not be empty + * **Default:** sha256 +* **Name**: `tupleSize` + * **Description:** Indicates the segment size (in characters) to split the + digest is split into + * **Type:** number + * **Constraints:** An integer between 0 and 32 inclusive + * **Default:** 3 +* **Name:** `numberOfTuples` + * **Description:** Indicates the number of segments to use for path generation + * **Type:** number + * **Constraints:** An integer between 0 and 32 inclusive + * **Default:** 3 +* **Name:** `shortObjectRoot` + * **Description:** When true, indicates that the OCFL object root directory + name should contain the remainder of the digest not used in the n-tuple + segments + * **Type:** boolean + * **Default:** false + +### Details + +#### digestAlgorithm + +`digestAlgorithm` is defaulted to `sha256`, and it MUST either contain a digest +algorithm that's [officially supported by the OCFL +specification](https://ocfl.io/1.0/spec/#digest-algorithms) or defined in a community +extension. The specified algorithm is applied to OCFL object identifiers to +produce hex encoded digest values that are then mapped to OCFL object root +paths. + +#### tupleSize + +`tupleSize` determines the number of digest characters to include in +each tuple. The tuples are used as directory names. The default value +is `3`, which means that each intermediate directory in the OCFL +storage hierarchy could contain up to 4096 sub-directories. Increasing +this value increases the maximum number of sub-directories per +directory. + +If `tupleSize` is set to `0`, then no tuples are created and `numberOfTuples` +MUST also equal `0`. + +The product of `tupleSize` and `numberOfTuples` MUST be less than or equal to +the number of characters in the hex encoded digest. + +#### numberOfTuples + +`numberOfTuples` determines how many tuples to create from the digest. The +tuples are used as directory names, and each successive directory is nested +within the previous. The default value is `3`, which means that every OCFL +object root will be 4 directories removed from the OCFL storage root, 3 tuple +directories plus 1 encapsulation directory. Increasing this value increases the +depth of the OCFL storage hierarchy. + +If `numberOfTuples` is set to `0`, then no tuples are created and `tupleSize` +MUST also equal `0`. + +The product of `numberOfTuples` and `tupleSize` MUST be less than or equal to +the number of characters in the hex encoded digest. + +#### shortObjectRoot + +The directory that immediately encapsulates an OCFL object MUST either be named +using the entire digest or the remainder of the digest that was not used in a +tuple. When `shortObjectRoot` is set to `false`, the default, the entire digest +is used, and, when it's `true` only the previously unused remainder is used. + +If the product of `tupleSize` and `numberOfTuples` is equal to the number of +characters in the hex encoded digest, then `shortObjectRoot` MUST be `false`. + +## Procedure + +The following is an outline of the steps to map an OCFL object identifier to an +OCFL object root path: + +1. The OCFL object identifier, UTF-8 encoded, is hashed using the specified + `digestAlgorithm`. +2. The digest is encoded as a lowercase hex string. +3. Starting at the beginning of the digest and working forwards, the digest is + divided into `numberOfTuples` tuples each containing `tupleSize` characters. +4. The tuples are joined, in order, using the filesystem path separator. +5. If `shortObjectRoot` is `true`, the remaining, unused portion of the digest + is joined on the end of this path. Otherwise, the entire digest is joined on + the end. + +## Examples + +### Example 1 + +This example demonstrates what the OCFL storage hierarchy looks like when using +the default configuration. + +#### Parameters + +It is not necessary to specify any parameters to use the default configuration. +However, if you were to do so, it would look like the following: + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "sha256", + "tupleSize": 3, + "numberOfTuples": 3, + "shortObjectRoot": false +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4 | `3c0/ff4/240/3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4` | +| ..hor/rib:le-$id | 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d | `487/326/d8c/487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 3c0/ +│ └── ff4/ +│ └── 240/ +│ └── 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── 487/ + └── 326/ + └── d8c/ + └── 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` + +### Example 2 + +This example demonstrates the effects of modifying the default parameters to use +a different `digestAlgoirthm`, smaller `tupleSize`, and a larger +`numberOfTuples`. + +#### Parameters + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "md5", + "tupleSize": 2, + "numberOfTuples": 15, + "shortObjectRoot": true +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | ff75534492485eabb39f86356728884e | `ff/75/53/44/92/48/5e/ab/b3/9f/86/35/67/28/88/4e` | +| ..hor/rib:le-$id | 08319766fb6c2935dd175b94267717e0 | `08/31/97/66/fb/6c/29/35/dd/17/5b/94/26/77/17/e0` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 08/ +│ └── 31/ +│ └── 97/ +│ └── 66/ +│ └── fb/ +│ └── 6c/ +│ └── 29/ +│ └── 35/ +│ └── dd/ +│ └── 17/ +│ └── 5b/ +│ └── 94/ +│ └── 26/ +│ └── 77/ +│ └── 17/ +│ └── e0/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── ff/ + └── 75/ + └── 53/ + └── 44/ + └── 92/ + └── 48/ + └── 5e/ + └── ab/ + └── b3/ + └── 9f/ + └── 86/ + └── 35/ + └── 67/ + └── 28/ + └── 88/ + └── 4e/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` + +### Example 3 + +This example demonstrates what happens when `tupleSize` and `numberOfTuples` are +set to `0`. This is an edge case and not a recommended configuration. + +#### Parameters + +```json +{ + "extensionName": "0004-hashed-n-tuple-storage-layout", + "digestAlgorithm": "sha256", + "tupleSize": 0, + "numberOfTuples": 0, + "shortObjectRoot": false +} +``` + +#### Mappings + +| Object ID | Digest | Object Root Path | +| --------- | ------ | ---------------- | +| object-01 | 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4 | `3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4` | +| ..hor/rib:le-$id | 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d | `487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d` | + +#### Storage Hierarchy + +``` +[storage_root]/ +├── 0=ocfl_1.0 +├── ocfl_layout.json +├── extensions/ +│ └── 0004-hashed-n-tuple-storage-layout/ +│ └── config.json +├── 3c0ff4240c1e116dba14c7627f2319b58aa3d77606d0d90dfc6161608ac987d4/ +│ ├── 0=ocfl_object_1.0 +│ ├── inventory.json +│ ├── inventory.json.sha512 +│ └── v1 [...] +└── 487326d8c2a3c0b885e23da1469b4d6671fd4e76978924b4443e9e3c316cda6d/ + ├── 0=ocfl_object_1.0 + ├── inventory.json + ├── inventory.json.sha512 + └── v1 [...] +``` diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/0=ocfl_1.1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/0=ocfl_1.1 new file mode 100644 index 00000000..0deb99e4 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/0=ocfl_1.1 @@ -0,0 +1 @@ +ocfl_1.1 diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/0=ocfl_object_1.1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/0=ocfl_object_1.1 new file mode 100644 index 00000000..14705cb1 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/0=ocfl_object_1.1 @@ -0,0 +1 @@ +ocfl_object_1.1 diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/inventory.json new file mode 100644 index 00000000..b7673122 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/inventory.json @@ -0,0 +1,67 @@ +{ + "id" : "o3", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v4", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "2870fe7622f9b84d39acfc3e85b6337f78118ebb207df60d944a11d0b127b20886d2278bd7e7ae728ad1c45136b29fc1efe25222cc3f5e3cb91fbec19edaf199" : [ "v4/content/file5" ], + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "v2/content/dir1/file2" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "v2/content/file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v2" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "2", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v3" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "3", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v4" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "3", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "2870fe7622f9b84d39acfc3e85b6337f78118ebb207df60d944a11d0b127b20886d2278bd7e7ae728ad1c45136b29fc1efe25222cc3f5e3cb91fbec19edaf199" : [ "file5" ], + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/inventory.json.sha512 new file mode 100644 index 00000000..94f4c1ab --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/inventory.json.sha512 @@ -0,0 +1 @@ +30a3924608f567a5d0b1f65f54946bef2a89c94f3a7affaaced6019fe348dc8363938f432a0ebd3e6227489ec16aff2255446f52de3756c18b89d5a9c15bf18c inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/content/file1 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/content/file1 new file mode 100644 index 00000000..49351eb5 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/content/file1 @@ -0,0 +1 @@ +File 1 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/inventory.json new file mode 100644 index 00000000..e0f738ff --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/inventory.json @@ -0,0 +1,24 @@ +{ + "id" : "o3", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v1", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/inventory.json.sha512 new file mode 100644 index 00000000..437fe9a9 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v1/inventory.json.sha512 @@ -0,0 +1 @@ +53876869be2d544f58e25262264b1e7246121db66458e1698ccbb46610392650434457e46ef6db0e19b8dc764c420ec3bbb8f50a10482c7696347c2ca5c20e32 inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/content/dir1/file2 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/content/dir1/file2 new file mode 100644 index 00000000..9fbb45ed --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/content/dir1/file2 @@ -0,0 +1 @@ +File 2 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/content/file3 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/content/file3 new file mode 100644 index 00000000..7b648e9c --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/content/file3 @@ -0,0 +1 @@ +File 3 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/inventory.json new file mode 100644 index 00000000..3e9b5a8e --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/inventory.json @@ -0,0 +1,39 @@ +{ + "id" : "o3", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v2", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "v2/content/dir1/file2" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "v2/content/file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v2" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "2", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/inventory.json.sha512 new file mode 100644 index 00000000..c7d12058 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v2/inventory.json.sha512 @@ -0,0 +1 @@ +4cc9f9c9e393ee6ddee579970ba6db0a5bf69f65f88a875e4f73189072d90a6e2d3d3d3672fc7bca4d81369c5a1d96837713c1d89f398b78c8a117412925720c inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/content/.gitkeep b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/content/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/inventory.json new file mode 100644 index 00000000..0e78e561 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/inventory.json @@ -0,0 +1,52 @@ +{ + "id" : "o3", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v3", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "v2/content/dir1/file2" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "v2/content/file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v2" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "2", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v3" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "3", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/inventory.json.sha512 new file mode 100644 index 00000000..a3fafdc7 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v3/inventory.json.sha512 @@ -0,0 +1 @@ +6b85187577372f8eb1377b6db79df82eecf965bf7175c26355278341c5766556a2af7d9b1f35db4ffdd33826352e4f76cfbd8c77342af1ab1d68d804c9821857 inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/content/file5 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/content/file5 new file mode 100644 index 00000000..a205b376 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/content/file5 @@ -0,0 +1 @@ +6543210 \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/inventory.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/inventory.json new file mode 100644 index 00000000..b7673122 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/inventory.json @@ -0,0 +1,67 @@ +{ + "id" : "o3", + "type" : "https://ocfl.io/1.1/spec/#inventory", + "digestAlgorithm" : "sha512", + "head" : "v4", + "contentDirectory" : "content", + "fixity" : { }, + "manifest" : { + "2870fe7622f9b84d39acfc3e85b6337f78118ebb207df60d944a11d0b127b20886d2278bd7e7ae728ad1c45136b29fc1efe25222cc3f5e3cb91fbec19edaf199" : [ "v4/content/file5" ], + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "v2/content/dir1/file2" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "v2/content/file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "v1/content/file1" ] + }, + "versions" : { + "v1" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "commit message", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v2" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "2", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v3" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "3", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + }, + "v4" : { + "created" : "2019-08-05T15:57:53Z", + "message" : "3", + "user" : { + "name" : "Peter", + "address" : "peter@example.com" + }, + "state" : { + "2870fe7622f9b84d39acfc3e85b6337f78118ebb207df60d944a11d0b127b20886d2278bd7e7ae728ad1c45136b29fc1efe25222cc3f5e3cb91fbec19edaf199" : [ "file5" ], + "70ffe50550ae07cd0fc154cc1cd3a47b71499b5f67921b52219750441791981fb36476cd478440601bc26da16b28c8a2be4478b36091f2615ac94a575581902c" : [ "dir2/file3" ], + "79c994f97612eb4ee6a3cb1fbbb45278da184ea73bfb483274bb783f0bce6a7bf8dd8cb0d4fc0eb2b065ebd28b2959b59d9a489929edf9ea7db4dcda8a09a76f" : [ "file3" ], + "9c614ba0d58c976d0b39f8f5536eb8af89fae745cbe3783ac2ca3e3055bb0b1e3687417a1d1104288d2883a4368d3dacb9931460c6e523117ff3eaa28810481a" : [ "file1" ] + } + } + } +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/inventory.json.sha512 b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/inventory.json.sha512 new file mode 100644 index 00000000..94f4c1ab --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/de2/d91/dc0/de2d91dc0a2580414e9a70f7dfc76af727b69cac0838f2cbe0a88d12642efcbf/v4/inventory.json.sha512 @@ -0,0 +1 @@ +30a3924608f567a5d0b1f65f54946bef2a89c94f3a7affaaced6019fe348dc8363938f432a0ebd3e6227489ec16aff2255446f52de3756c18b89d5a9c15bf18c inventory.json diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/extensions/0004-hashed-n-tuple-storage-layout/config.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/extensions/0004-hashed-n-tuple-storage-layout/config.json new file mode 100644 index 00000000..4644b116 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/extensions/0004-hashed-n-tuple-storage-layout/config.json @@ -0,0 +1,7 @@ +{ + "digestAlgorithm" : "sha256", + "tupleSize" : 3, + "numberOfTuples" : 3, + "shortObjectRoot" : false, + "extensionName" : "0004-hashed-n-tuple-storage-layout" +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/ocfl_extensions_1.0.md b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/ocfl_extensions_1.0.md new file mode 100644 index 00000000..23582668 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/ocfl_extensions_1.0.md @@ -0,0 +1,118 @@ +# OCFL Community Extensions + +**Version**: 1.0 + +This repository contains community extensions to the [OCFL Specification and Implementation Notes](https://ocfl.io/). Extensions are a means of adding new functionality and documenting standards outside of the main OCFL specification process. For example, storage layout extensions define how OCFL object IDs are mapped to OCFL object root directories within an OCFL storage root. This mapping is outside of the scope of the OCFL specification, but is valuable information to capture so that repositories are self-describing and easily accessible using generic OCFL tooling. + +This is a community driven repository. Community members are encouraged to contribute by submitting new extensions and reviewing others' submissions. For more details, see the [review/merge policy](#review--merge-policy) below. + +See the current set of [adopted extensions](https://ocfl.github.io/extensions/) and [extensions open for review and discussion](https://github.com/OCFL/extensions/pulls). + +## Using Community Extensions + +To use OCFL extensions you first need an OCFL client that supports the desired extensions. OCFL clients are not required to support extensions to be compliant with the OCFL specification, and the extensions that any given client supports will vary. The idea behind this repository is to encourage the development and implementation of common extensions so that there can be interoperability between OCFL clients. + +## Implementing Community Extensions + +Reference the OCFL specification's description of [object extensions](https://ocfl.io/1.0/spec/#object-extensions) and [storage root extensions](https://ocfl.io/1.0/spec/#storage-root-extensions). + +The OCFL storage root MAY contain a copy of an extension's specification. + +Each extension specification details how it should be implemented, but there are a few rules that apply to every extension. + +A *root extension directory* refers to the directory named `extensions` that is located in either the storage root or an object root. An *extension directory* is an extension specific directory that is the child of a root extension directory and MUST be named using the extension's *Registered Name*, or `initial` (see [Optional Initial Extension](#optional-initial-extension)). For example, `extensions/0000-example-extension` is the extension directory for the extension [0000-example-extension](docs/0000-example-extension.md). + +### Configuration Files + +An extension's parameters are serialized as a JSON object and written to a configuration file named `config.json` within the extension's extension directory. + +If an extension includes a configuration file, one of the properties in that file MUST be `extensionName`, where the value is the *Registered Name* of the extension. + +For example, the extension [0000-example-extension](docs/0000-example-extension.md) could be parameterized as follows: + +```json +{ + "extensionName": "0000-example-extension", + "firstExampleParameter": 12, + "secondExampleParameter": "Hello", + "thirdExampleParameter": "Green" +} +``` + +Based on how the extension is used, its configuration file is written to one of the following locations, relative the storage root: + +* `extensions/0000-example-extension/config.json`, if it is a [storage root extension](https://ocfl.io/1.0/spec/#storage-root-extensions) +* `OBJECT_ROOT/extensions/0000-example-extension/config.json`, if it is an [object extension](https://ocfl.io/1.0/spec/#object-extensions) + +### Undefined Behavior + +It is conceivable that some extensions may not be compatible with other extensions, or may be rendered incompatible based on how they're implemented in a client. For example, suppose that there are multiple extensions that define how logs should be written to an object's log directory. You could declare that your objects are using multiple log extensions, but the result is undefined and up to the implementing client. It may only write one log format or the other, it may write all of them, or it may reject the configuration entirely. + +Because OCFL clients are not required to implement any or all extensions, it is also possible that a client may encounter an extension that it does not implement. In these cases, it is up to the client to decide how to proceed. A client may fail on unsupported extensions, or it may choose to ignore the extensions and carry on. + +### Optional Initial Extension + +A _root extension directory_ MAY optionally contain an _initial_ extension that, if it exists, SHOULD be applied before all other extensions in the directory. +An _initial extension_ is identified by the extension directory name "initial". + +An _initial extension_ could be used to address some of the [undefined behaviors](#undefined-behavior), define how extensions are applied, and answer questions such as: + +- Is an extension deactivated, only applying to earlier versions of the object? +- Should extensions be applied in a specific order? +- Does one extension depend on another? + +## Specifying Community Extensions + +### Layout + +Community extensions MUST be written as GitHub flavored markdown files in the `docs` directory of this repository. The +filename of an extension is based on its *Registered Name* with a `.md` extension. + +Extensions are numbered sequentially, and the *Registered Name* of an extension is prefixed with this 4-digit, zero-padded +decimal number. The *Registered Name* should be descriptive, use hyphens to separate words, and have a maximum of 250 +characters in total. + +New extensions should use `NNNN` as a place-holder for the next available prefix number at the time of merging. New extension pull-requests should not update the index document (`docs/index.md`), this will be done post-approval. + +Extensions are intended to be mostly static once published. Substantial revisions of content beyond simple fixes warrants publishing a new extension, and marking the old extension obsolete by updating the *Obsoletes/Obsoleted by* sections in each extension respectively. + +An example/template is available in this repository as "[OCFL Community Extension 0000: Example Extension](docs/0000-example-extension.md)" and is rendered +via GitHub pages as https://ocfl.github.io/extensions/0000-example-extension + +### Headers + +Extension definitions MUST contain a header section that defines the following fields: + +* **Extension Name**: The extension's unique *Registered Name* +* **Authors**: The names of the individuals who authored the extension +* **Minimum OCFL Version**: The minimum OCFL version that the extension requires, eg. *1.0* +* **OCFL Community Extensions Version**: The version of the OCFL Extensions Specification that the extension conforms to, eg. *1.0* +* **Obsoletes**: The *Registered Name* of the extension that this extension obsoletes, or *n/a* +* **Obsoleted by**: The *Registered Name* of the extension that obsoletes this extension, or *n/a* + +### Parameters + +Extension definitions MAY define parameters to enable configuration as needed. Extension parameters are serialized as JSON values, and therefore must conform to the [JSON specification](https://tools.ietf.org/html/rfc8259). Parameters MUST be defined in the following structure: + +* **Name**: A short, descriptive name for the parameter. The name is used as the parameter's key within its JSON representation. + * **Description**: A brief description of the function of the parameter. This should be expanded on in the main description of the extension which MUST reference all the parameters. + * **Type**: The JSON data type of the parameter value. One of `string`, `number`, `boolean`, `array`, or `object`. The structure of complex types MUST be further described. + * **Constraints**: A description of any constraints to apply to parameter values. Constraints may be plain text, regular expressions, [JSON Schema](https://www.ietf.org/archive/id/draft-handrews-json-schema-02.txt), or whatever makes the most sense for the extension. + * **Default**: The default value of parameter. If no default is specified, then the parameter is mandatory. + +### Body + +Each specification MUST thoroughly document how it is intended to be implemented and used, including detailed examples is helpful. If the extension uses parameters, the parameters MUST be described in detail in the body of the specification. + +## Review / Merge Policy + +1. A pull-request is submitted per the guidelines described in the "[Organization of this repository](https://github.com/OCFL/extensions#organization-of-this-repository)" section of this document +1. Authors of (legitimate) pull-requests will be added by an owner of the OCFL GitHub organization to the [extension-authors](https://github.com/orgs/OCFL/teams/extension-authors) team + - The purpose of being added to this team is to enable adding `labels` to their pull-request(s) +1. If a pull-request is submitted in order to facilitate discussion, the `draft` label should be applied by the author +1. If a pull-request is ready for review, it should have a title that is suitable for merge (i.e. not have a title indicating "draft"), and optionally have the `in-review` label applied by the author +1. A pull-request must be merged by an OCFL Editor if the following criteria are met: + 1. At least two OCFL Editors have "[Approved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/approving-a-pull-request-with-required-reviews)" the pull-request + 1. At least one other community member has "[Approved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/approving-a-pull-request-with-required-reviews)" the pull-request + 1. The approvers represent three distinct organizations +1. After the pull-request has been merged with `NNNN` as a placeholder for the extension number in the _Registered Name_, an OCFL Editor will determine the extension number based on the next sequentially available number. They will create an additional administrative pull-request to change `NNNN` to the appropriate number in the extension file name and the extension document itself, as well as adding an entry to the index page entry (`docs/index.md`). \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/ocfl_layout.json b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/ocfl_layout.json new file mode 100644 index 00000000..e2e09e8f --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/repo17/ocfl_layout.json @@ -0,0 +1,4 @@ +{ + "extension" : "0004-hashed-n-tuple-storage-layout", + "description" : "OCFL object identifiers are hashed and encoded as lowercase hex strings. These digests are then divided into N n-tuple segments, which are used to create nested paths under the OCFL storage root." +} \ No newline at end of file diff --git a/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/test.txt b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/test.txt new file mode 100644 index 00000000..588846e7 --- /dev/null +++ b/ocfl-java-itest/src/test/resources/expected/output/repo18/o1v4/test.txt @@ -0,0 +1 @@ +testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest \ No newline at end of file From d52b346d0859fb7b133b2eb6ac94d947f549d29e Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Sat, 2 Mar 2024 14:12:17 -0600 Subject: [PATCH 13/21] tweak concurrent docs --- docs/USAGE.md | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/docs/USAGE.md b/docs/USAGE.md index 5854cf7d..2562fb8b 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -268,31 +268,28 @@ implementation. If your objects have a lot of files, then you _might_ get better performance by parallelizing file reads and writes. Parallel writes -are only supported as of `ocfl-java` 2.0.0 or later. `ocfl-java` does +are only supported as of `ocfl-java` 2.1.0 or later. `ocfl-java` does not do this for you automatically, but the following is some example code of one possible way that you could implement parallel writes to an object: ```java -repo.updateObject(ObjectVersionId.head(objectId), versionInfo, updater -> { - List> futures; - try (var files = Files.find( - objectPath, Integer.MAX_VALUE, (file, attrs) -> attrs.isRegularFile())) { - futures = files.map(file -> executor.submit(() -> { - var logical = objectPath - .relativize(file) - .toString(); - updater.addPath(file, logical); - })) - .toList(); +repo.updateObject(ObjectVersionId.head(objectId), null, updater -> { + List> futures; + + try (var files = Files.find(sourceDir, Integer.MAX_VALUE, (file, attrs) -> attrs.isRegularFile())) { + futures = files.map(file -> executor.submit(() -> updater.addPath( + file, sourceDir.relativize(file).toString()))) + .collect(Collectors.toList()); } catch (IOException e) { throw new UncheckedIOException(e); } + futures.forEach(future -> { try { future.get(); } catch (Exception e) { - throw new RuntimeException("Error adding file to object " + objectId, e); + throw new RuntimeException(e); } }); }); From e1791660d36dc3c930d71a529c33abe036587d4a Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Sun, 3 Mar 2024 12:53:59 -0600 Subject: [PATCH 14/21] update changelog --- CHANGELOG.md | 16 ++++++++++++++++ docs/USAGE.md | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8616fd6a..b13451f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,22 @@ ## [Unreleased] - ReleaseDate +### Fixed + +- Deleting an object in S3 that contains more than 1,000 files now works. +- Writing to files with identical content and writing the first file a second time to the same version no longer causes + the staged file to be erroneously deleted. + +### Changed + +- **Breaking:** A `S3AsyncClient` S3 client now must be used with ocfl-java-aws, and the sync version is no longer supported. +- ocfl-java-aws now uses the [S3 Transfer Manager](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/transfer-manager.html) + to upload and download files from S3. See the [usage guide](docs/USAGE.md#s3-transfer-manager) for more details. +- ocfl-java-aws now concurrently uploads files when writing an object to S3. This should improve object write performance. +- The `OcflObjectUpdater` was updated to be thread safe, enabling concurrently writing files to it. This _may_ speed up + writing a large number of files to an object. See the [usage guide](docs/USAGE.md#improving-write-performance) for + more details. + ## [2.0.1] - 2024-03-01 ### Fixed diff --git a/docs/USAGE.md b/docs/USAGE.md index 2562fb8b..c1a9430d 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -190,7 +190,7 @@ Unfortunately, from our testing, it appears that the CRT client only works with the official AWS S3, and it does not work with third party implementations. So, if you are using a third party implementation, please make sure you wrap your client in a `MultipartS3AsyncClient`. -Otherwise, you will experience performance degredation. +Otherwise, you will experience performance degradation. If you do not specify a transfer manager when constructing the `OcflS3Client`, then it will create the default transfer manager using From 403e4eb27e4b24e878e58c73a69ccd87dcdd9c06 Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Sun, 3 Mar 2024 15:50:51 -0600 Subject: [PATCH 15/21] fix race condition creating and deleting dirs when doing concurrent writes --- .../ocfl/core/DefaultMutableOcflRepository.java | 4 ++-- .../io/ocfl/core/DefaultOcflObjectUpdater.java | 17 +++++++++++++++-- .../io/ocfl/core/DefaultOcflRepository.java | 14 ++++++++++---- .../ocfl/core/inventory/AddFileProcessor.java | 15 ++++++++++++++- .../main/java/io/ocfl/core/util/FileUtil.java | 4 +++- .../java/io/ocfl/core/util/FileUtilTest.java | 3 +-- 6 files changed, 45 insertions(+), 12 deletions(-) diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultMutableOcflRepository.java b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultMutableOcflRepository.java index 64d33f76..6c4beb0f 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultMutableOcflRepository.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultMutableOcflRepository.java @@ -143,7 +143,7 @@ public ObjectVersionId stageChanges( try { objectUpdater.accept(updater); var newInventory = buildNewInventory(inventoryUpdater, versionInfo); - writeNewVersion(newInventory, stagingDir, false); + writeNewVersion(newInventory, stagingDir, false, updater.checkForEmptyDirs()); return ObjectVersionId.version(objectVersionId.getObjectId(), newInventory.getHead()); } finally { FileUtil.safeDeleteDirectory(stagingDir); @@ -231,7 +231,7 @@ private Inventory createAndPersistEmptyVersion(ObjectVersionId objectId) { .build()) .build(); - writeNewVersion(inventory, stagingDir, false); + writeNewVersion(inventory, stagingDir, false, false); return inventory; } finally { FileUtil.safeDeleteDirectory(stagingDir); diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflObjectUpdater.java b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflObjectUpdater.java index 6797e7c6..ec73120f 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflObjectUpdater.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflObjectUpdater.java @@ -48,6 +48,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -64,6 +65,7 @@ public class DefaultOcflObjectUpdater implements OcflObjectUpdater { private final AddFileProcessor addFileProcessor; private final FileLocker fileLocker; private final Map stagedFileMap; + private final AtomicBoolean checkForEmptyDirs; public DefaultOcflObjectUpdater( Inventory inventory, @@ -77,6 +79,7 @@ public DefaultOcflObjectUpdater( this.addFileProcessor = Enforce.notNull(addFileProcessor, "addFileProcessor cannot be null"); this.fileLocker = Enforce.notNull(fileLocker, "fileLocker cannot be null"); this.stagedFileMap = new ConcurrentHashMap<>(); + this.checkForEmptyDirs = new AtomicBoolean(false); } @Override @@ -145,7 +148,7 @@ public OcflObjectUpdater writeFile(InputStream input, String destinationPath, Oc ((FixityCheckInputStream) input).checkFixity(); } catch (FixityCheckException e) { FileUtil.safeDelete(stagingFullPath); - FileUtil.deleteDirAndParentsIfEmpty(stagingFullPath.getParent(), stagingDir); + checkForEmptyDirs.set(true); throw e; } } @@ -168,7 +171,7 @@ public OcflObjectUpdater writeFile(InputStream input, String destinationPath, Oc stagingFullPath, digest); UncheckedFiles.delete(stagingFullPath); - FileUtil.deleteDirAndParentsIfEmpty(stagingFullPath.getParent(), stagingDir); + checkForEmptyDirs.set(true); } else { stagedFileMap.put(destinationPath, stagingFullPath); } @@ -315,6 +318,16 @@ public OcflObjectUpdater clearFixityBlock() { return this; } + /** + * Returns true if the processor deleted a file and thus we need to look for empty directories to delete prior to + * writing the version. + * + * @return true if we need to look for empty directories + */ + public boolean checkForEmptyDirs() { + return checkForEmptyDirs.get() || addFileProcessor.checkForEmptyDirs(); + } + private void removeUnneededStagedFiles(Set removeFiles) { removeFiles.forEach(remove -> { var stagingPath = stagingFullPath(remove.getPathUnderContentDir()); diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflRepository.java b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflRepository.java index 4ff35bd9..d82ddfc2 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflRepository.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflRepository.java @@ -184,7 +184,7 @@ public ObjectVersionId putObject( var newInventory = buildNewInventory(inventoryUpdater, versionInfo); try { - writeNewVersion(newInventory, stagingDir, upgrade); + writeNewVersion(newInventory, stagingDir, upgrade, fileProcessor.checkForEmptyDirs()); return ObjectVersionId.version(objectVersionId.getObjectId(), newInventory.getHead()); } finally { FileUtil.safeDeleteDirectory(stagingDir); @@ -223,7 +223,7 @@ public ObjectVersionId updateObject( objectUpdater.accept(updater); var upgrade = inventoryUpdater.upgradeInventory(config); var newInventory = buildNewInventory(inventoryUpdater, versionInfo); - writeNewVersion(newInventory, stagingDir, upgrade); + writeNewVersion(newInventory, stagingDir, upgrade, updater.checkForEmptyDirs()); return ObjectVersionId.version(objectVersionId.getObjectId(), newInventory.getHead()); } finally { FileUtil.safeDeleteDirectory(stagingDir); @@ -400,7 +400,7 @@ public ObjectVersionId replicateVersionAsHead(ObjectVersionId objectVersionId, V createStagingContentDir(inventory, stagingDir); try { - writeNewVersion(newInventory, stagingDir, upgrade); + writeNewVersion(newInventory, stagingDir, upgrade, false); return ObjectVersionId.version(objectVersionId.getObjectId(), newInventory.getHead()); } finally { FileUtil.safeDeleteDirectory(stagingDir); @@ -633,10 +633,16 @@ private void getObjectInternal(Inventory inventory, VersionNum versionNum, Path } } - protected void writeNewVersion(Inventory inventory, Path stagingDir, boolean upgradedOcflVersion) { + protected void writeNewVersion( + Inventory inventory, Path stagingDir, boolean upgradedOcflVersion, boolean checkForEmptyDirs) { var finalInventory = writeInventory(inventory, stagingDir); var contentDir = stagingDir.resolve(inventory.resolveContentDirectory()); + + if (checkForEmptyDirs) { + FileUtil.deleteEmptyDirs(contentDir); + } + if (!FileUtil.hasChildren(contentDir)) { UncheckedFiles.delete(contentDir); } diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/inventory/AddFileProcessor.java b/ocfl-java-core/src/main/java/io/ocfl/core/inventory/AddFileProcessor.java index b8e4349b..ec495a41 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/inventory/AddFileProcessor.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/inventory/AddFileProcessor.java @@ -45,6 +45,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.ReentrantLock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -60,6 +61,7 @@ public class AddFileProcessor { private final FileLocker fileLocker; private final Path stagingDir; private final DigestAlgorithm digestAlgorithm; + private final AtomicBoolean checkForEmptyDirs; public static Builder builder() { return new Builder(); @@ -92,6 +94,7 @@ public AddFileProcessor( this.fileLocker = Enforce.notNull(fileLocker, "fileLocker cannot be null"); this.stagingDir = Enforce.notNull(stagingDir, "stagingDir cannot be null"); this.digestAlgorithm = Enforce.notNull(digestAlgorithm, "digestAlgorithm cannot be null"); + this.checkForEmptyDirs = new AtomicBoolean(false); } /** @@ -178,7 +181,7 @@ public Map processPath(Path sourcePath, String destinationPath, Oc stagingFullPath, digest); UncheckedFiles.delete(stagingFullPath); - FileUtil.deleteDirAndParentsIfEmpty(stagingFullPath.getParent(), stagingDir); + checkForEmptyDirs.set(true); } } } @@ -243,6 +246,16 @@ public Map processFileWithDigest( }); } + /** + * Returns true if the processor deleted a file and thus we need to look for empty directories to delete prior to + * writing the version. + * + * @return true if we need to look for empty directories + */ + public boolean checkForEmptyDirs() { + return checkForEmptyDirs.get(); + } + private String destinationPath(String path, Path sourcePath) { if (path.isBlank() && Files.isRegularFile(sourcePath)) { return sourcePath.getFileName().toString(); diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/util/FileUtil.java b/ocfl-java-core/src/main/java/io/ocfl/core/util/FileUtil.java index a1408ebe..0c7fcb73 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/util/FileUtil.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/util/FileUtil.java @@ -222,7 +222,9 @@ public static void deleteChildren(Path root) { */ public static void deleteEmptyDirs(Path root) { try (var files = Files.find(root, Integer.MAX_VALUE, (file, attrs) -> attrs.isDirectory())) { - files.filter(f -> !f.equals(root)).forEach(FileUtil::deleteDirIfEmpty); + files.filter(f -> !f.equals(root)) + .sorted(Comparator.reverseOrder()) + .forEach(FileUtil::deleteDirIfEmpty); } catch (NoSuchFileException e) { // ignore } catch (IOException e) { diff --git a/ocfl-java-core/src/test/java/io/ocfl/core/util/FileUtilTest.java b/ocfl-java-core/src/test/java/io/ocfl/core/util/FileUtilTest.java index 86e0e9f1..9e9c61f7 100644 --- a/ocfl-java-core/src/test/java/io/ocfl/core/util/FileUtilTest.java +++ b/ocfl-java-core/src/test/java/io/ocfl/core/util/FileUtilTest.java @@ -97,9 +97,8 @@ public void shouldDeleteAllEmptyDirectories() throws IOException { FileUtil.deleteEmptyDirs(tempRoot); - assertThat(tempRoot.resolve("a/b/c").toFile(), anExistingDirectory()); + assertThat(tempRoot.resolve("a/b/c").toFile(), not(anExistingDirectory())); assertThat(tempRoot.resolve("a/c/file3").toFile(), anExistingFile()); - assertThat(tempRoot.resolve("a/b/c/d").toFile(), not(anExistingDirectory())); assertThat(tempRoot.resolve("a/d").toFile(), not(anExistingDirectory())); } From 0f8cece69860d0caa17e9fbb28b5285746f7a778 Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Sun, 3 Mar 2024 15:55:13 -0600 Subject: [PATCH 16/21] spotless --- ocfl-java-core/src/main/java/io/ocfl/core/util/FileUtil.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/util/FileUtil.java b/ocfl-java-core/src/main/java/io/ocfl/core/util/FileUtil.java index 0c7fcb73..d84e9cf4 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/util/FileUtil.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/util/FileUtil.java @@ -222,9 +222,7 @@ public static void deleteChildren(Path root) { */ public static void deleteEmptyDirs(Path root) { try (var files = Files.find(root, Integer.MAX_VALUE, (file, attrs) -> attrs.isDirectory())) { - files.filter(f -> !f.equals(root)) - .sorted(Comparator.reverseOrder()) - .forEach(FileUtil::deleteDirIfEmpty); + files.filter(f -> !f.equals(root)).sorted(Comparator.reverseOrder()).forEach(FileUtil::deleteDirIfEmpty); } catch (NoSuchFileException e) { // ignore } catch (IOException e) { From 0aace04ccfb584d085bd23b1b175d865be42c6ad Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Sun, 3 Mar 2024 18:51:52 -0600 Subject: [PATCH 17/21] add javadoc note that class is thread safe --- .../src/main/java/io/ocfl/api/OcflObjectUpdater.java | 5 ++++- .../src/main/java/io/ocfl/core/DefaultOcflObjectUpdater.java | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ocfl-java-api/src/main/java/io/ocfl/api/OcflObjectUpdater.java b/ocfl-java-api/src/main/java/io/ocfl/api/OcflObjectUpdater.java index a2221803..fe95e0d2 100644 --- a/ocfl-java-api/src/main/java/io/ocfl/api/OcflObjectUpdater.java +++ b/ocfl-java-api/src/main/java/io/ocfl/api/OcflObjectUpdater.java @@ -34,6 +34,9 @@ /** * Exposes methods for selectively updating a specific OCFL object. + *

+ * Implementations are thread safe, and you can concurrently use the same updater to add multiple files to the same + * object version. */ public interface OcflObjectUpdater { @@ -42,7 +45,7 @@ public interface OcflObjectUpdater { * it's a directory, the contents of the directory are inserted into the object's root. * *

By default, files are copied into the OCFL repository. If {@link OcflOption#MOVE_SOURCE} is specified, then - * files will be moved instead. Warning: If an exception occurs and the new version is not created, the files that were + * files will be moved instead. Warning: If an exception occurs and the new version is not created, the files that * will be lost. This operation is more efficient but less safe than the default copy. * *

By default, the change will be rejected if there is an existing file in an object at a logical path. diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflObjectUpdater.java b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflObjectUpdater.java index ec73120f..53668a73 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflObjectUpdater.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/DefaultOcflObjectUpdater.java @@ -54,6 +54,9 @@ /** * Default implementation of OcflObjectUpdater that is used by DefaultOcflRepository to provide write access to an object. + *

+ * This class is thread safe, and you can concurrently use the same updater to add multiple files to the same + * object version. */ public class DefaultOcflObjectUpdater implements OcflObjectUpdater { From 9332ace869a5d9d1fd9ffd97f8e0d38437283558 Mon Sep 17 00:00:00 2001 From: Peter Winckles Date: Wed, 20 Mar 2024 22:16:30 -0500 Subject: [PATCH 18/21] update docs with more s3 details Update the docs to describe how to configure the s3 client to work with 3rd party s3 implementations. --- docs/USAGE.md | 43 ++++++++++++++++--- .../main/java/io/ocfl/aws/OcflS3Client.java | 25 ++++++----- ocfl-java-itest/pom.xml | 5 +++ pom.xml | 4 +- 4 files changed, 60 insertions(+), 17 deletions(-) diff --git a/docs/USAGE.md b/docs/USAGE.md index c1a9430d..c329d986 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -186,17 +186,24 @@ multipart uploads and downloads with the CRT client. However, you can make multipart uploads work with the old client if it's wrapped in a `MultipartS3AsyncClient`, but multipart downloads will still not work. -Unfortunately, from our testing, it appears that the CRT client only -works with the official AWS S3, and it does not work with third party -implementations. So, if you are using a third party implementation, -please make sure you wrap your client in a `MultipartS3AsyncClient`. -Otherwise, you will experience performance degradation. +Additionally, if you are using a 3rd party S3 implementation, you will +likely need to disable [object integrity +checks](https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html) +on the client that is used by the transfer manager. This is because +most/all 3rd party implementations do not support it, and it causes +the requests to fail. If you do not specify a transfer manager when constructing the `OcflS3Client`, then it will create the default transfer manager using the S3 client it was provided. When you use the default transfer manager, you need to be sure to close the `OcflRepository` when you are done with it, otherwise the transfer manager will not be closed. +Note that if you construct your own transfer manager, which is +advisable so that you can configure it to your specifications, it does +not need to use the same S3 client as the one already specified on +`OcflS3Client` but it can. For example, maybe you only want to use the +CRT client in the transfer manager, and you want to run everything +else through the regular client. If you are using the CRT client, then you need to add `software.amazon.awssdk.crt:aws-crt` to your project, and create the @@ -218,6 +225,32 @@ MultipartS3AsyncClient.create( Note the use of `MultipartS3AsyncClient`. Very important! +If you are using a 3rd party S3 implementation and need to disable the +object integrity check, then you can do so as follows: + +``` java +S3AsyncClient.crtBuilder().checksumValidationEnabled(false).build(); +``` + +Unfortunately, this is harder to do if you use the Netty client +wrapped in `MultipartS3AsyncClient`. As of this writing, it must be +disabled per-request as follows: + +``` java +OcflS3Client.builder() + .bucket(bucket) + .s3Client(MultipartS3AsyncClient.create( + S3AsyncClient.builder().build(), + MultipartConfiguration.builder().build())) + .putObjectModifier( + (key, builder) -> builder.overrideConfiguration(override -> override.putExecutionAttribute( + AwsSignerExecutionAttribute.SERVICE_CONFIG, + S3Configuration.builder() + .checksumValidationEnabled(false) + .build()))) + .build(); +``` + ### Configuration Use `OcflStorageBuilder.builder()` to create and configure an diff --git a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java index ff7d2759..559ee817 100644 --- a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java +++ b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java @@ -198,10 +198,7 @@ public Future uploadFileAsync(Path srcPath, String dstPath, Stri putObjectModifier.accept(dstKey.getKey(), builder); var upload = transferManager.uploadFile(req -> req.source(srcPath) - .putObjectRequest(builder.bucket(bucket) - .key(dstKey.getKey()) - .contentLength(fileSize) - .build()) + .putObjectRequest(builder.bucket(bucket).key(dstKey.getKey()).build()) .build()); return new UploadFuture(upload, srcPath, dstKey); @@ -622,17 +619,25 @@ public static class Builder { /** * The AWS SDK S3 client. Required. *

- * Important: You MUST either use the CRT client - * or wrap the regular S3AsyncClient in {@link software.amazon.awssdk.services.s3.internal.multipart.MultipartS3AsyncClient} - * in order for multipart uploads to work. Otherwise, files will be uploaded in single PUT requests. + * If a {@link #transferManager(S3TransferManager)} is not specified, then the client specified here will be + * used to create a default transfer manager. If you specify a transfer manager, it does not need to use the + * same client as the one specified here. However, when creating a client to be used by the transfer manager, + * it is important to understand the following gotchas. *

- * Additionally, only the CRT client supports multipart downloads. However, from what I've seen, the CRT client - * only works with AWS, and it does not work with third party S3 implementations. In which case, - * you should use the regular S3AsyncClient with the MultipartS3AsyncClient wrapper. + * The client used by the transfer manager MUST either be the CRT client + * or the regular S3AsyncClient wrapped in {@link software.amazon.awssdk.services.s3.internal.multipart.MultipartS3AsyncClient} + * in order for multipart uploads to work. Otherwise, files will be uploaded in single PUT requests. Additionally, + * only the CRT client supports multipart downloads. + *

+ * If you are using a 3rd party S3 implementation, then you will likely additionally need to disable the + * object integrity check + * as most 3rd party implementations do not support it. This easy to do on the CRT client builder by setting + * {@code checksumValidationEnabled()} to {@code false}. *

* This client is NOT closed when the repository is closed, and the user is responsible for closing it when appropriate. *

*

{@code
+         * // Please refer to the official documentation to properly configure your client.
          * // When using the CRT client, create it something like this:
          * S3AsyncClient.crtBuilder().build();
          *
diff --git a/ocfl-java-itest/pom.xml b/ocfl-java-itest/pom.xml
index c227458c..dfc3a0a7 100644
--- a/ocfl-java-itest/pom.xml
+++ b/ocfl-java-itest/pom.xml
@@ -126,6 +126,11 @@
             s3mock-junit5
             test
         
+        
+            software.amazon.awssdk.crt
+            aws-crt
+            test
+        
         
             io.micrometer
             micrometer-core
diff --git a/pom.xml b/pom.xml
index 1158d1e4..9abb4c57 100644
--- a/pom.xml
+++ b/pom.xml
@@ -340,14 +340,14 @@
             
                 software.amazon.awssdk
                 bom
-                2.24.13
+                2.25.13
                 pom
                 import
             
             
                 software.amazon.awssdk.crt
                 aws-crt
-                0.21.9
+                0.29.12
             
 
             

From e20978acc0fbcd3c0fba4ba98f505324ec816256 Mon Sep 17 00:00:00 2001
From: Peter Winckles 
Date: Sun, 24 Mar 2024 10:13:26 -0500
Subject: [PATCH 19/21] add spaces around secrets

---
 .github/workflows/build.yml                          | 12 ++++++------
 .../src/test/java/io/ocfl/aws/OcflS3ClientTest.java  |  4 ++--
 .../src/test/java/io/ocfl/aws/OcflS3Test.java        |  4 ++--
 .../test/java/io/ocfl/itest/s3/S3BadReposITest.java  |  4 ++--
 .../java/io/ocfl/itest/s3/S3MutableHeadITest.java    |  4 ++--
 .../src/test/java/io/ocfl/itest/s3/S3OcflITest.java  |  4 ++--
 6 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index e8dcbf4d..cab42377 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -60,9 +60,9 @@ jobs:
     runs-on: ubuntu-latest
     environment: ci
     env:
-      OCFL_TEST_AWS_ACCESS_KEY: ${{secrets.OCFL_TEST_AWS_ACCESS_KEY}}
-      OCFL_TEST_AWS_SECRET_KEY: ${{secrets.OCFL_TEST_AWS_SECRET_KEY}}
-      OCFL_TEST_S3_BUCKET: ${{secrets.OCFL_TEST_S3_BUCKET}}
+      OCFL_TEST_AWS_ACCESS_KEY: ${{ secrets.OCFL_TEST_AWS_ACCESS_KEY }}
+      OCFL_TEST_AWS_SECRET_KEY: ${{ secrets.OCFL_TEST_AWS_SECRET_KEY }}
+      OCFL_TEST_S3_BUCKET: ${{ secrets.OCFL_TEST_S3_BUCKET }}
     services:
       postgres:
         image: postgres:12
@@ -99,9 +99,9 @@ jobs:
     runs-on: ubuntu-latest
     environment: ci
     env:
-      OCFL_TEST_AWS_ACCESS_KEY: ${{secrets.OCFL_TEST_AWS_ACCESS_KEY}}
-      OCFL_TEST_AWS_SECRET_KEY: ${{secrets.OCFL_TEST_AWS_SECRET_KEY}}
-      OCFL_TEST_S3_BUCKET: ${{secrets.OCFL_TEST_S3_BUCKET}}
+      OCFL_TEST_AWS_ACCESS_KEY: ${{ secrets.OCFL_TEST_AWS_ACCESS_KEY }}
+      OCFL_TEST_AWS_SECRET_KEY: ${{ secrets.OCFL_TEST_AWS_SECRET_KEY }}
+      OCFL_TEST_S3_BUCKET: ${{ secrets.OCFL_TEST_S3_BUCKET }}
     services:
       mariadb:
         image: mariadb:10.6
diff --git a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java
index 95a617c1..5f80030d 100644
--- a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java
+++ b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3ClientTest.java
@@ -69,7 +69,7 @@ public static void beforeAll() {
         var bucket = System.getenv().get("OCFL_TEST_S3_BUCKET");
 
         if (StringUtils.isNotBlank(accessKey) && StringUtils.isNotBlank(secretKey) && StringUtils.isNotBlank(bucket)) {
-            LOG.info("Running tests against AWS");
+            LOG.warn("Running tests against AWS");
             awsS3Client = S3AsyncClient.crtBuilder()
                     .region(Region.US_EAST_2)
                     .credentialsProvider(
@@ -77,7 +77,7 @@ public static void beforeAll() {
                     .build();
             OcflS3ClientTest.bucket = bucket;
         } else {
-            LOG.info("Running tests against S3 Mock");
+            LOG.warn("Running tests against S3 Mock");
             awsS3Client = MultipartS3AsyncClient.create(
                     S3AsyncClient.builder()
                             .endpointOverride(URI.create(S3_MOCK.getServiceEndpoint()))
diff --git a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3Test.java b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3Test.java
index 027ee36e..7ec03c6b 100644
--- a/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3Test.java
+++ b/ocfl-java-aws/src/test/java/io/ocfl/aws/OcflS3Test.java
@@ -75,7 +75,7 @@ public static void beforeAll() {
         var bucket = System.getenv().get("OCFL_TEST_S3_BUCKET");
 
         if (StringUtils.isNotBlank(accessKey) && StringUtils.isNotBlank(secretKey) && StringUtils.isNotBlank(bucket)) {
-            LOG.info("Running tests against AWS");
+            LOG.warn("Running tests against AWS");
             s3Client = S3AsyncClient.crtBuilder()
                     .region(Region.US_EAST_2)
                     .credentialsProvider(
@@ -83,7 +83,7 @@ public static void beforeAll() {
                     .build();
             OcflS3Test.bucket = bucket;
         } else {
-            LOG.info("Running tests against S3 Mock");
+            LOG.warn("Running tests against S3 Mock");
             s3Client = MultipartS3AsyncClient.create(
                     S3AsyncClient.builder()
                             .endpointOverride(URI.create(S3_MOCK.getServiceEndpoint()))
diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3BadReposITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3BadReposITest.java
index 06152d87..d4231726 100644
--- a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3BadReposITest.java
+++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3BadReposITest.java
@@ -51,11 +51,11 @@ public static void beforeAll() {
         var bucket = System.getenv().get("OCFL_TEST_S3_BUCKET");
 
         if (StringUtils.isNotBlank(accessKey) && StringUtils.isNotBlank(secretKey) && StringUtils.isNotBlank(bucket)) {
-            LOG.info("Running tests against AWS");
+            LOG.warn("Running tests against AWS");
             s3Client = S3ITestHelper.createS3Client(accessKey, secretKey);
             S3BadReposITest.bucket = bucket;
         } else {
-            LOG.info("Running tests against S3 Mock");
+            LOG.warn("Running tests against S3 Mock");
             s3Client = S3ITestHelper.createMockS3Client(S3_MOCK.getServiceEndpoint());
             S3BadReposITest.bucket = UUID.randomUUID().toString();
             s3Client.createBucket(request -> {
diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3MutableHeadITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3MutableHeadITest.java
index 0e7b503a..12944c90 100644
--- a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3MutableHeadITest.java
+++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3MutableHeadITest.java
@@ -55,11 +55,11 @@ public static void beforeAll() {
         var bucket = System.getenv().get("OCFL_TEST_S3_BUCKET");
 
         if (StringUtils.isNotBlank(accessKey) && StringUtils.isNotBlank(secretKey) && StringUtils.isNotBlank(bucket)) {
-            LOG.info("Running tests against AWS");
+            LOG.warn("Running tests against AWS");
             s3Client = S3ITestHelper.createS3Client(accessKey, secretKey);
             S3MutableHeadITest.bucket = bucket;
         } else {
-            LOG.info("Running tests against S3 Mock");
+            LOG.warn("Running tests against S3 Mock");
             s3Client = S3ITestHelper.createMockS3Client(S3_MOCK.getServiceEndpoint());
             S3MutableHeadITest.bucket = UUID.randomUUID().toString();
             s3Client.createBucket(request -> {
diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3OcflITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3OcflITest.java
index 9b0e57cd..b503da27 100644
--- a/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3OcflITest.java
+++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/s3/S3OcflITest.java
@@ -84,11 +84,11 @@ public static void beforeAll() {
         var bucket = System.getenv().get(ENV_BUCKET);
 
         if (StringUtils.isNotBlank(accessKey) && StringUtils.isNotBlank(secretKey) && StringUtils.isNotBlank(bucket)) {
-            LOG.info("Running tests against AWS");
+            LOG.warn("Running tests against AWS");
             s3Client = S3ITestHelper.createS3Client(accessKey, secretKey);
             S3OcflITest.bucket = bucket;
         } else {
-            LOG.info("Running tests against S3 Mock");
+            LOG.warn("Running tests against S3 Mock");
             s3Client = S3ITestHelper.createMockS3Client(S3_MOCK.getServiceEndpoint());
             S3OcflITest.bucket = UUID.randomUUID().toString();
             s3Client.createBucket(request -> {

From 206cfe7bb3c2fa09c94fdb8ced016b169f976803 Mon Sep 17 00:00:00 2001
From: Peter Winckles 
Date: Sun, 24 Mar 2024 15:23:47 -0500
Subject: [PATCH 20/21] mess with load tests

---
 docs/USAGE.md                                 |  33 +++++
 ocfl-java-itest/pom.xml                       |  23 +---
 .../test/java/io/ocfl/itest/LoadITest.java    | 118 +++++-------------
 pom.xml                                       |   5 +
 4 files changed, 74 insertions(+), 105 deletions(-)

diff --git a/docs/USAGE.md b/docs/USAGE.md
index c329d986..3372bd1f 100644
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -253,6 +253,39 @@ OcflS3Client.builder()
 
 ### Configuration
 
+#### AWS SDK
+
+If you are using the [CRT
+client](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/crt-based-s3-client.html),
+remember to set `targetThroughputInGbps()` on the builder, which
+controls the client's concurrency.
+
+If you are using the regular async Netty client, you will likely want
+to set `connectionAcquisitionTimeout`, `writeTimeout`, `readTimeout`,
+and `maxConcurrency`. This is critical because `ocfl-java` queues
+concurrent writes, and Netty needs to be configured to handle your
+application's load. An example configuration looks something like:
+
+``` java
+S3AsyncClient.builder()
+        .region(Region.US_EAST_2)
+        .httpClientBuilder(NettyNioAsyncHttpClient.builder()
+                .connectionAcquisitionTimeout(Duration.ofSeconds(60))
+                .writeTimeout(Duration.ofSeconds(120))
+                .readTimeout(Duration.ofSeconds(60))
+                .maxConcurrency(100))
+        .build();
+```
+
+If you see failures related to acquiring a connection from the pool,
+then you either need to increase the concurrency, increase the
+acquisition timeout, or both.
+
+That said, it is generally recommended to use the CRT client. It is
+easier to configure and seems to have better performance.
+
+#### ocfl-java
+
 Use `OcflStorageBuilder.builder()` to create and configure an
 `OcflStorage` instance.
 
diff --git a/ocfl-java-itest/pom.xml b/ocfl-java-itest/pom.xml
index dfc3a0a7..07629f4e 100644
--- a/ocfl-java-itest/pom.xml
+++ b/ocfl-java-itest/pom.xml
@@ -132,27 +132,8 @@
             test
         
         
-            io.micrometer
-            micrometer-core
-            1.12.3
-            test
-        
-        
-            io.micrometer
-            micrometer-registry-prometheus
-            1.12.3
-            test
-        
-        
-            io.prometheus
-            simpleclient_httpserver
-            0.16.0
-            test
-        
-        
-            io.prometheus
-            simpleclient
-            0.16.0
+            org.hdrhistogram
+            HdrHistogram
             test
         
     
diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/LoadITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/LoadITest.java
index f271d4bd..f0123a52 100644
--- a/ocfl-java-itest/src/test/java/io/ocfl/itest/LoadITest.java
+++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/LoadITest.java
@@ -1,11 +1,6 @@
 package io.ocfl.itest;
 
-import io.micrometer.core.instrument.Meter;
 import io.micrometer.core.instrument.Metrics;
-import io.micrometer.core.instrument.config.MeterFilter;
-import io.micrometer.core.instrument.distribution.DistributionStatisticConfig;
-import io.micrometer.prometheus.PrometheusConfig;
-import io.micrometer.prometheus.PrometheusMeterRegistry;
 import io.ocfl.api.MutableOcflRepository;
 import io.ocfl.api.OcflRepository;
 import io.ocfl.api.model.ObjectVersionId;
@@ -16,11 +11,9 @@
 import io.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig;
 import io.ocfl.core.util.FileUtil;
 import io.ocfl.core.util.UncheckedFiles;
-import io.prometheus.client.exporter.HTTPServer;
 import java.io.BufferedOutputStream;
 import java.io.IOException;
 import java.io.UncheckedIOException;
-import java.net.InetSocketAddress;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.time.Duration;
@@ -31,8 +24,7 @@
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.concurrent.TimeUnit;
 import java.util.function.Consumer;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
+import org.HdrHistogram.Histogram;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
@@ -43,9 +35,6 @@
 @Disabled
 public class LoadITest {
 
-    // AVG: rate(putObject_seconds_sum[1m])/rate(putObject_seconds_count[1m])
-    // p99: histogram_quantile(0.99, sum(rate(putObject_seconds_bucket[1m])) by (le))
-
     private static final int KB = 1024;
     private static final long MB = 1024 * KB;
 
@@ -54,44 +43,7 @@ public class LoadITest {
     @TempDir
     public Path tempRoot;
 
-    private static HTTPServer prometheusServer;
-
-    @BeforeAll
-    public static void beforeAll() throws IOException {
-        var registry = new PrometheusMeterRegistry(new PrometheusConfig() {
-            @Override
-            public Duration step() {
-                return Duration.ofSeconds(30);
-            }
-
-            @Override
-            public String get(final String key) {
-                return null;
-            }
-        });
-        // Enables distribution stats for all timer metrics
-        registry.config().meterFilter(new MeterFilter() {
-            @Override
-            public DistributionStatisticConfig configure(final Meter.Id id, final DistributionStatisticConfig config) {
-                if (id.getType() == Meter.Type.TIMER) {
-                    return DistributionStatisticConfig.builder()
-                            .percentilesHistogram(true)
-                            .percentiles(0.5, 0.90, 0.99)
-                            .build()
-                            .merge(config);
-                }
-                return config;
-            }
-        });
-        Metrics.addRegistry(registry);
-
-        prometheusServer = new HTTPServer(new InetSocketAddress(1234), registry.getPrometheusRegistry());
-    }
-
-    @AfterAll
-    public static void afterAll() {
-        prometheusServer.stop();
-    }
+    private static final Histogram histogram = new Histogram(3600000000000L, 3);
 
     @Test
     public void fsPutObjectSmallFilesTest() throws InterruptedException {
@@ -307,6 +259,7 @@ private void runPutTest(
             boolean shouldPurge)
             throws InterruptedException {
         System.out.println("Starting putTest");
+        histogram.reset();
 
         System.out.println("Creating test object");
         var objectPath = createTestObject(fileCount, fileSize);
@@ -315,24 +268,14 @@ private void runPutTest(
         var versionInfo =
                 new VersionInfo().setUser("Peter", "pwinckles@example.com").setMessage("Testing");
 
-        var timer = Metrics.timer(
-                "putObject",
-                "files",
-                String.valueOf(fileCount),
-                "sizeBytes",
-                String.valueOf(fileSize),
-                "threads",
-                String.valueOf(threadCount),
-                "storage",
-                storageType);
-
         var threads = new ArrayList(threadCount);
 
         for (var i = 0; i < threadCount; i++) {
             threads.add(createThread(duration, objectId -> {
-                timer.record(() -> {
-                    repo.putObject(ObjectVersionId.head(objectId), objectPath, versionInfo);
-                });
+                var start = System.nanoTime();
+                repo.putObject(ObjectVersionId.head(objectId), objectPath, versionInfo);
+                var end = System.nanoTime();
+                histogram.recordValue(end - start);
                 if (shouldPurge) {
                     repo.purgeObject(objectId);
                 }
@@ -342,16 +285,18 @@ private void runPutTest(
         startThreads(threads);
         System.out.println("Waiting for threads to complete...");
         joinThreads(threads);
-
-        System.out.println("Finished. Waiting for metrics collection...");
-        TimeUnit.SECONDS.sleep(30);
         System.out.println("Done");
+
+        System.out.printf(
+                "putTest results for %s files=%d size=%s threads=%s%n", storageType, fileSize, fileCount, threadCount);
+        histogram.outputPercentileDistribution(System.out, 1_000_000.0);
     }
 
     private void runGetTest(
             OcflRepository repo, int fileCount, long fileSize, int threadCount, Duration duration, String storageType)
             throws InterruptedException {
         System.out.println("Starting getTest");
+        histogram.reset();
 
         System.out.println("Creating test object");
         var objectPath = createTestObject(fileCount, fileSize);
@@ -364,25 +309,15 @@ private void runGetTest(
 
         repo.putObject(ObjectVersionId.head(objectId), objectPath, versionInfo);
 
-        var timer = Metrics.timer(
-                "getObject",
-                "files",
-                String.valueOf(fileCount),
-                "sizeBytes",
-                String.valueOf(fileSize),
-                "threads",
-                String.valueOf(threadCount),
-                "storage",
-                storageType);
-
         var threads = new ArrayList(threadCount);
 
         for (var i = 0; i < threadCount; i++) {
             threads.add(createThread(duration, out -> {
                 var outDir = tempRoot.resolve(out);
-                timer.record(() -> {
-                    repo.getObject(ObjectVersionId.head(objectId), outDir);
-                });
+                var start = System.nanoTime();
+                repo.getObject(ObjectVersionId.head(objectId), outDir);
+                var end = System.nanoTime();
+                histogram.recordValue(end - start);
                 FileUtil.safeDeleteDirectory(outDir);
             }));
         }
@@ -390,10 +325,11 @@ private void runGetTest(
         startThreads(threads);
         System.out.println("Waiting for threads to complete...");
         joinThreads(threads);
-
-        System.out.println("Finished. Waiting for metrics collection...");
-        TimeUnit.SECONDS.sleep(30);
         System.out.println("Done");
+
+        System.out.printf(
+                "getTest results for %s files=%d size=%s threads=%s%n", storageType, fileSize, fileCount, threadCount);
+        histogram.outputPercentileDistribution(System.out, 1_000_000.0);
     }
 
     private Thread createThread(Duration duration, Consumer test) {
@@ -447,8 +383,22 @@ private OcflRepository createFsRepo() {
     }
 
     private MutableOcflRepository createS3Repo() {
+        //        var s3Client = S3AsyncClient.builder()
+        //                .region(Region.US_EAST_2)
+        //                .httpClientBuilder(NettyNioAsyncHttpClient.builder()
+        //                        .connectionAcquisitionTimeout(Duration.ofSeconds(60))
+        //                        .writeTimeout(Duration.ofSeconds(0))
+        //                        .readTimeout(Duration.ofSeconds(0))
+        //                        .maxConcurrency(100))
+        //                .build();
+        //        var transferManager = S3TransferManager.builder()
+        //                .s3Client(MultipartS3AsyncClient.create(
+        //                        s3Client, MultipartConfiguration.builder().build()))
+        //                .build();
+
         var s3Client = S3AsyncClient.crtBuilder().region(Region.US_EAST_2).build();
         var transferManager = S3TransferManager.builder().s3Client(s3Client).build();
+
         var prefix = UUID.randomUUID().toString();
         // Note this is NOT using a db, which an S3 setup would normally use
         return new OcflRepositoryBuilder()
diff --git a/pom.xml b/pom.xml
index 9abb4c57..9607cb52 100644
--- a/pom.xml
+++ b/pom.xml
@@ -397,6 +397,11 @@
                 
                 2.17.0
             
+            
+                org.hdrhistogram
+                HdrHistogram
+                2.1.12
+            
         
     
 

From a2a72ecb534ff2f3296b31677cc8c483f778b934 Mon Sep 17 00:00:00 2001
From: Peter Winckles 
Date: Sun, 24 Mar 2024 19:31:24 -0500
Subject: [PATCH 21/21] fix concurrent tests

---
 .../java/io/ocfl/itest/MutableHeadITest.java  | 29 +++++++++++--------
 .../test/java/io/ocfl/itest/OcflITest.java    | 15 ++++++----
 2 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/MutableHeadITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/MutableHeadITest.java
index 18e9fa47..33effe3f 100644
--- a/ocfl-java-itest/src/test/java/io/ocfl/itest/MutableHeadITest.java
+++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/MutableHeadITest.java
@@ -29,6 +29,7 @@
 import java.nio.file.Paths;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Phaser;
 import java.util.concurrent.TimeUnit;
 import java.util.function.Consumer;
 import org.hamcrest.Matchers;
@@ -447,24 +448,26 @@ public void rejectUpdateWhenConcurrentChangeToPreviousVersionOfMutableHead() thr
             updater.writeFile(streamString("file2"), "file2.txt");
         });
 
+        var phaser = new Phaser(2);
+
         var future = CompletableFuture.runAsync(() -> {
             repo.stageChanges(ObjectVersionId.head(objectId), defaultVersionInfo, updater -> {
-                try {
-                    TimeUnit.SECONDS.sleep(3);
-                } catch (InterruptedException e) {
-                    throw new RuntimeException(e);
-                }
+                phaser.arriveAndAwaitAdvance();
+                phaser.arriveAndAwaitAdvance();
                 updater.writeFile(streamString("file3"), "file3.txt");
             });
         });
 
-        TimeUnit.MILLISECONDS.sleep(100);
+        phaser.arriveAndAwaitAdvance();
 
         repo.rollbackToVersion(ObjectVersionId.version(objectId, "v1"));
         repo.stageChanges(ObjectVersionId.head(objectId), defaultVersionInfo, updater -> {
             updater.writeFile(streamString("file4"), "file4.txt");
         });
 
+        phaser.arriveAndAwaitAdvance();
+        TimeUnit.MILLISECONDS.sleep(100);
+
         OcflAsserts.assertThrowsWithMessage(
                 ObjectOutOfSyncException.class,
                 "Cannot update object o1 because the update is out of sync with the current object state. The digest of the current inventory is ",
@@ -497,24 +500,26 @@ public void rejectUpdateWhenConcurrentChangeWhileCreatingMutableHead() throws In
             updater.writeFile(streamString("file2"), "file2.txt");
         });
 
+        var phaser = new Phaser(2);
+
         var future = CompletableFuture.runAsync(() -> {
             repo.stageChanges(ObjectVersionId.head(objectId), defaultVersionInfo, updater -> {
-                try {
-                    TimeUnit.SECONDS.sleep(3);
-                } catch (InterruptedException e) {
-                    throw new RuntimeException(e);
-                }
+                phaser.arriveAndAwaitAdvance();
+                phaser.arriveAndAwaitAdvance();
                 updater.writeFile(streamString("file3"), "file3.txt");
             });
         });
 
-        TimeUnit.MILLISECONDS.sleep(100);
+        phaser.arriveAndAwaitAdvance();
 
         repo.rollbackToVersion(ObjectVersionId.version(objectId, "v1"));
         repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo, updater -> {
             updater.writeFile(streamString("file4"), "file4.txt");
         });
 
+        phaser.arriveAndAwaitAdvance();
+        TimeUnit.MILLISECONDS.sleep(100);
+
         OcflAsserts.assertThrowsWithMessage(
                 ObjectOutOfSyncException.class,
                 "Cannot update object o1 because the update is out of sync with the current object state. The digest of the current inventory is ",
diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/OcflITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/OcflITest.java
index b41ae09f..9f83e534 100644
--- a/ocfl-java-itest/src/test/java/io/ocfl/itest/OcflITest.java
+++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/OcflITest.java
@@ -76,6 +76,7 @@
 import java.util.UUID;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Phaser;
 import java.util.concurrent.TimeUnit;
 import java.util.function.Consumer;
 import java.util.stream.Collectors;
@@ -2208,24 +2209,26 @@ public void rejectUpdateWhenConcurrentChangeToPreviousVersion() throws Interrupt
             updater.writeFile(ITestHelper.streamString("file2"), "file2.txt");
         });
 
+        var phaser = new Phaser(2);
+
         var future = CompletableFuture.runAsync(() -> {
             repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo, updater -> {
-                try {
-                    TimeUnit.SECONDS.sleep(3);
-                } catch (InterruptedException e) {
-                    throw new RuntimeException(e);
-                }
+                phaser.arriveAndAwaitAdvance();
+                phaser.arriveAndAwaitAdvance();
                 updater.writeFile(ITestHelper.streamString("file3"), "file3.txt");
             });
         });
 
-        TimeUnit.MILLISECONDS.sleep(100);
+        phaser.arriveAndAwaitAdvance();
 
         repo.rollbackToVersion(ObjectVersionId.version(objectId, "v1"));
         repo.updateObject(ObjectVersionId.head(objectId), defaultVersionInfo, updater -> {
             updater.writeFile(ITestHelper.streamString("file4"), "file4.txt");
         });
 
+        phaser.arriveAndAwaitAdvance();
+        TimeUnit.MILLISECONDS.sleep(100);
+
         OcflAsserts.assertThrowsWithMessage(
                 ObjectOutOfSyncException.class,
                 "Cannot update object o1 because the update is out of sync with the current object state. The digest of the current inventory is ",