diff --git a/src/main/java/htsjdk/beta/io/IOPathUtils.java b/src/main/java/htsjdk/beta/io/IOPathUtils.java index 07e0582154..965c850a59 100644 --- a/src/main/java/htsjdk/beta/io/IOPathUtils.java +++ b/src/main/java/htsjdk/beta/io/IOPathUtils.java @@ -3,6 +3,7 @@ import htsjdk.beta.exception.HtsjdkIOException; import htsjdk.io.HtsPath; import htsjdk.io.IOPath; +import htsjdk.utils.ValidationUtils; import java.io.BufferedOutputStream; import java.io.File; @@ -10,7 +11,8 @@ import java.io.StringWriter; import java.nio.charset.StandardCharsets; import java.nio.file.Files; -import java.nio.file.Path; +import java.util.Optional; +import java.util.function.Function; public class IOPathUtils { @@ -69,4 +71,65 @@ public static void writeStringToPath(final IOPath ioPath, final String contents) e); } } + + /** + * Takes an IOPath and returns a new IOPath object that keeps the same basename as the original but with + * a new extension. Only the last component of the extension will be replaced, e.g. ("my.fasta.gz", ".tmp") -> + * "my.fasta.tmp". If the original path has no extension, an exception will be thrown. + * + * If the input IOPath was created from a rawInputString that specifies a relative local path, the new path will + * have a rawInputString that specifies an absolute path. + * + * Examples + * - ("test_na12878.bam", ".bai") -> "test_na12878.bai" + * - ("test_na12878.bam", "bai") -> "test_na12878.bai" + * - ("test_na12878.ext.bam, ".bai") -> "test_na12878.ext.md5" + * + * @param path The original path + * @param newExtension The new file extension. If no leading "." is provided as part of the new extension, one will be added. + * @param ioPathConstructor a function that takes a string and returns an IOPath-derived class of type + * @return A new IOPath object with the new extension + */ + public static T replaceExtension( + final IOPath path, + final String newExtension, + final Function ioPathConstructor){ + final String extensionToUse = newExtension.startsWith(".") ? + newExtension : + "." + newExtension; + final Optional oldExtension = path.getExtension(); + if (oldExtension.isEmpty()){ + throw new RuntimeException("The original path has no extension to replace" + path.getURIString()); + } + final String oldFileName = path.toPath().getFileName().toString(); + final String newFileName = oldFileName.replaceAll(oldExtension.get() + "$", extensionToUse); + return ioPathConstructor.apply(path.toPath().resolveSibling(newFileName).toUri().toString()); + } + + /** + * Takes an IOPath and returns a new IOPath object that keeps the same name as the original, but with + * the new extension added. If no leading "." is provided as part of the new extension, one will be added. + * + * If the input IOPath was created from a rawInputString that specifies a relative local path, the new path will + * have a rawInputString that specifies an absolute path. + * + * Examples: + * - ("test_na12878.bam", ".bai") -> "test_na12878.bam.bai" + * - ("test_na12878.bam", "md5") -> "test_na12878.bam.md5" + * + * @param path The original path + * @param extension The file extension to add. If no leading "." is provided as part of the extension, one will be added. + * @param ioPathConstructor a function that takes a string and returns an IOPath-derived class of type + * @return A new IOPath object with the new extension + */ + public static T appendExtension( + final IOPath path, + final String extension, + final Function ioPathConstructor){ + final String oldFileName = path.toPath().getFileName().toString(); + final String newExtension = extension.startsWith(".") ? + extension : + "." + extension; + return ioPathConstructor.apply(path.toPath().resolveSibling(oldFileName + newExtension).toUri().toString()); + } } diff --git a/src/main/java/htsjdk/beta/io/bundle/BundleJSON.java b/src/main/java/htsjdk/beta/io/bundle/BundleJSON.java index 1dfc5e2396..95d4d7aaac 100644 --- a/src/main/java/htsjdk/beta/io/bundle/BundleJSON.java +++ b/src/main/java/htsjdk/beta/io/bundle/BundleJSON.java @@ -25,6 +25,7 @@ public class BundleJSON { public static final String BUNDLE_EXTENSION = ".json"; private static final Log LOG = Log.getInstance(BundleJSON.class); + public static final String JSON_PROPERTY_SCHEMA = "schema"; public static final String JSON_PROPERTY_SCHEMA_NAME = "schemaName"; public static final String JSON_PROPERTY_SCHEMA_VERSION = "schemaVersion"; public static final String JSON_PROPERTY_PRIMARY = "primary"; @@ -34,8 +35,7 @@ public class BundleJSON { public static final String JSON_SCHEMA_NAME = "htsbundle"; public static final String JSON_SCHEMA_VERSION = "0.1.0"; // TODO: bump this to 1.0.0 - final private static Set TOP_LEVEL_PROPERTIES = - Set.of(JSON_PROPERTY_SCHEMA_NAME, JSON_PROPERTY_SCHEMA_VERSION, JSON_PROPERTY_PRIMARY); + final private static Set TOP_LEVEL_PROPERTIES = Set.of(JSON_PROPERTY_SCHEMA, JSON_PROPERTY_PRIMARY); /** * Serialize a bundle to a JSON string representation. All resources in the bundle must @@ -46,9 +46,14 @@ public class BundleJSON { * @throws IllegalArgumentException if any resource in bundle is not an IOPathResources. */ public static String toJSON(final Bundle bundle) { + ValidationUtils.validateArg( + !bundle.getPrimaryContentType().equals(JSON_PROPERTY_PRIMARY), + "Primary content type cannot be named 'primary'"); + final JSONObject schemaMap = new JSONObject() + .put(JSON_PROPERTY_SCHEMA_NAME, JSON_SCHEMA_NAME) + .put(JSON_PROPERTY_SCHEMA_VERSION, JSON_SCHEMA_VERSION); final JSONObject outerJSON = new JSONObject() - .put(JSON_PROPERTY_SCHEMA_NAME, JSON_SCHEMA_NAME) - .put(JSON_PROPERTY_SCHEMA_VERSION, JSON_SCHEMA_VERSION) + .put(JSON_PROPERTY_SCHEMA, schemaMap) .put(JSON_PROPERTY_PRIMARY, bundle.getPrimaryContentType()); bundle.forEach(bundleResource -> { @@ -120,7 +125,7 @@ public static Bundle toBundle( String.format("A JSON string with more than one bundle was provided but only a single bundle is allowed in this context (%s)", e.getMessage())); } - return bundles.stream().findFirst().get(); + return bundles.get(0); } catch (JSONException | UnsupportedOperationException e2) { throw new IllegalArgumentException( String.format("The JSON can be interpreted neither as an individual bundle (%s) nor as a bundle collection (%s)", @@ -188,15 +193,17 @@ private static Bundle toBundle( final JSONObject jsonObject, // must be a single Bundle object final Function ioPathConstructor) { try { - // validate the schema name - final String schemaName = getRequiredPropertyAsString(jsonObject, JSON_PROPERTY_SCHEMA_NAME); + // validate the schema name and version + final JSONObject schemaMap = jsonObject.getJSONObject(JSON_PROPERTY_SCHEMA); + if (schemaMap == null) { + throw new IllegalArgumentException("JSON bundle is missing the required schema property"); + } + final String schemaName = getRequiredPropertyAsString(schemaMap, JSON_PROPERTY_SCHEMA_NAME); if (!schemaName.equals(JSON_SCHEMA_NAME)) { throw new IllegalArgumentException( String.format("Expected bundle schema name %s but found %s", JSON_SCHEMA_NAME, schemaName)); } - - // validate the schema version - final String schemaVersion = getRequiredPropertyAsString(jsonObject, JSON_PROPERTY_SCHEMA_VERSION); + final String schemaVersion = getRequiredPropertyAsString(schemaMap, JSON_PROPERTY_SCHEMA_VERSION); if (!schemaVersion.equals(JSON_SCHEMA_VERSION)) { throw new IllegalArgumentException(String.format("Expected bundle schema version %s but found %s", JSON_SCHEMA_VERSION, schemaVersion)); diff --git a/src/main/java/htsjdk/beta/io/bundle/BundleResourceType.java b/src/main/java/htsjdk/beta/io/bundle/BundleResourceType.java index 823b3c58c3..d6987c9561 100644 --- a/src/main/java/htsjdk/beta/io/bundle/BundleResourceType.java +++ b/src/main/java/htsjdk/beta/io/bundle/BundleResourceType.java @@ -5,8 +5,7 @@ import htsjdk.beta.plugin.variants.VariantsFormats; /** - * Constants for specifying tandard primary content types, secondary content types, and (optional) formats - * for resources contained in a {@link Bundle}. + * Constants for specifying standard content types and formats for resources contained in a {@link Bundle}. * * Bundles generally contain one primary resource, plus one or more secondary resources such as an index or md5 file. * Each resource has an associated content type, and optionally a format. The bundle itself has a primary content @@ -17,8 +16,8 @@ * correspond to one of the enum values in {@link htsjdk.beta.plugin.HtsContentType}, since each of these has a * corresponding {@link htsjdk.beta.plugin.HtsCodec} that handles that type of resource, such as reads or variants. * - * Secondary resources can also be any string, but the standard secondary content types are defined here, i.e., for - * primary content type "READS", a secondary content type might be "READS_INDEX". + * Secondary resource content types can also be any string, but the standard secondary content types are defined + * here, i.e., for primary content type "READS", a secondary content type might be "READS_INDEX". * * Finally, each resource in a bundle can have an optional format, which is a string that corresponds to the format * for that resource. For example, a primary content type of "READS" might have a format of "BAM". diff --git a/src/main/java/htsjdk/beta/plugin/reads/ReadsBundle.java b/src/main/java/htsjdk/beta/plugin/reads/ReadsBundle.java index 2c6d9574e0..3d2f244f14 100644 --- a/src/main/java/htsjdk/beta/plugin/reads/ReadsBundle.java +++ b/src/main/java/htsjdk/beta/plugin/reads/ReadsBundle.java @@ -47,9 +47,7 @@ public class ReadsBundle extends Bundle implements Serializabl * @param reads An {@link IOPath}-derived object that represents a source of reads. */ public ReadsBundle(final T reads) { - this(Arrays.asList(toInputResource( - BundleResourceType.CT_ALIGNED_READS, - ValidationUtils.nonNull(reads, BundleResourceType.CT_ALIGNED_READS)))); + this(Arrays.asList(toInputResource(BundleResourceType.CT_ALIGNED_READS, reads))); } /** @@ -59,10 +57,8 @@ public ReadsBundle(final T reads) { */ public ReadsBundle(final T reads, final T index) { this(Arrays.asList( - toInputResource(BundleResourceType.CT_ALIGNED_READS, ValidationUtils.nonNull(reads, BundleResourceType.CT_ALIGNED_READS)), - toInputResource( - BundleResourceType.CT_READS_INDEX, - ValidationUtils.nonNull(index, BundleResourceType.CT_READS_INDEX)))); + toInputResource(BundleResourceType.CT_ALIGNED_READS, reads), + toInputResource(BundleResourceType.CT_READS_INDEX, index))); } /** @@ -174,10 +170,6 @@ public static ReadsBundle resolveIndex( return new ReadsBundle<>(reads); } - public static boolean looksLikeAReadsBundle(final IOPath rawReadPath) { - return rawReadPath.getURI().getPath().endsWith(BundleJSON.BUNDLE_EXTENSION); - } - private static IOPathResource toInputResource(final String providedContentType, final T ioPath) { ValidationUtils.nonNull(ioPath, "ioPath"); final Optional> typePair = getInferredContentTypes(ioPath); diff --git a/src/main/java/htsjdk/beta/plugin/variants/VariantsBundle.java b/src/main/java/htsjdk/beta/plugin/variants/VariantsBundle.java new file mode 100644 index 0000000000..a8bce40cc5 --- /dev/null +++ b/src/main/java/htsjdk/beta/plugin/variants/VariantsBundle.java @@ -0,0 +1,215 @@ +package htsjdk.beta.plugin.variants; + +import htsjdk.beta.io.IOPathUtils; +import htsjdk.beta.io.bundle.*; +import htsjdk.io.HtsPath; +import htsjdk.io.IOPath; +import htsjdk.samtools.util.FileExtensions; +import htsjdk.samtools.util.Log; +import htsjdk.samtools.util.Tuple; +import htsjdk.utils.ValidationUtils; + +import java.io.Serial; +import java.io.Serializable; +import java.nio.file.Files; +import java.util.*; +import java.util.function.Function; + +/** + * A {@link Bundle} for variants and variants-related resources that are backed by on disk files. A {@link + * htsjdk.beta.plugin.variants.VariantsBundle} has a primary resource with content type {@link + * BundleResourceType#PRIMARY_CT_VARIANT_CONTEXTS}; and an optional index resource. A VariantsBundle can also + * contain additional resources. + * + * Note that this class is merely a convenience class for the case where the variants are backed by files on disk. + * A bundle that contains variants and related resources can be created manually using the {@link Bundle} class. + * This class provides convenient constructors, and validation for JSON interconversions. To create a VariantsBundle + * for variants sources that are backed by streams or other {@link BundleResource} types, the {@link Bundle} and + * {@link BundleBuilder} classes can be used to construct such bundles directly. + */ +public class VariantsBundle extends Bundle implements Serializable { + @Serial + private static final long serialVersionUID = 1L; + private static final Log LOG = Log.getInstance(VariantsBundle.class); + /** + * Create a {@link htsjdk.beta.plugin.variants.VariantsBundle} containing only a variants resource. + * + * @param vcfPath An {@link IOPath}-derived object that represents a source of variants. + */ + public VariantsBundle(final IOPath vcfPath) { + this(List.of(toInputResource(BundleResourceType.CT_VARIANT_CONTEXTS, vcfPath))); + } + + /** + * Create a {@link htsjdk.beta.plugin.variants.VariantsBundle} containing only variants and an index. + * + * @param vcfPath An {@link IOPath}-derived object that represents a source of variants. + * @param indexPath An {@link IOPath}-derived object that represents the companion index for {@code vcfPath}. + */ + public VariantsBundle(final IOPath vcfPath, final IOPath indexPath) { + this(List.of( + toInputResource(BundleResourceType.CT_VARIANT_CONTEXTS, vcfPath), + toInputResource(BundleResourceType.CT_VARIANTS_INDEX, indexPath))); + } + + /** + * Create a {@link htsjdk.beta.plugin.variants.VariantsBundle} using the resources in an existing bundle. A + * resource with content type {@link BundleResourceType#CT_VARIANT_CONTEXTS} must be present in the + * resources, or this constructor will throw. + * + * @param resources collection of {@link BundleResource}. the collection must include a resource with + * content type {@link BundleResourceType#CT_VARIANT_CONTEXTS}. + * @throws IllegalArgumentException if no resource with content type + * {@link BundleResourceType#CT_VARIANT_CONTEXTS} is included in the input {@link BundleResource} + * collection. + */ + public VariantsBundle(final Collection resources) { + super(BundleResourceType.CT_VARIANT_CONTEXTS, resources); + } + + /** + * @return the {@link BundleResourceType#CT_VARIANT_CONTEXTS} {@link BundleResource} for this + * {@link htsjdk.beta.plugin.variants.VariantsBundle} + */ + public BundleResource getVariants() { + return getOrThrow(BundleResourceType.CT_VARIANT_CONTEXTS); + } + + /** + * Get the optional {@link BundleResourceType#CT_VARIANTS_INDEX} resource for this + * {@link htsjdk.beta.plugin.variants.VariantsBundle}. + * + * @return the optional {@link BundleResourceType#CT_VARIANTS_INDEX} resource for this + * {@link htsjdk.beta.plugin.variants.VariantsBundle}, or Optional.empty() if no index resource is present in + * the bundle. + */ + public Optional getIndex() { + return get(BundleResourceType.CT_VARIANTS_INDEX); + } + + /** + * Create a {@link VariantsBundle} from a JSON string contained in jsonPath. + * + * @param jsonPath the path to a file that contains a {@link Bundle} serialized to JSON. The bundle + * must contain a resource with content type VARIANT_CONTEXTS. + * @return a {@link VariantsBundle} created from jsonPath + */ + public static VariantsBundle getVariantsBundleFromPath(final IOPath jsonPath) { + return getVariantsBundleFromString(IOPathUtils.getStringFromPath(jsonPath)); + } + + /** + * Create a {@link VariantsBundle} from a JSON string contained in jsonPath. + * + * @param the IOPath-derived type of the IOPathResources to be used in the new bundle + * @param jsonPath the path to a file that contains a {@link Bundle} serialized to JSON. The bundle + * must contain a resource with content type VARIANT_CONTEXTS. + * @param ioPathConstructor a function that takes a string and returns an IOPath-derived class of type {@code T} + * @return a {@link VariantsBundle} created from jsonPath + */ + public static VariantsBundle getVariantsBundleFromPath(final IOPath jsonPath, + final Function ioPathConstructor) { + return getVariantsBundleFromString(IOPathUtils.getStringFromPath(jsonPath), ioPathConstructor); + } + + /** + * Create a {@link htsjdk.beta.plugin.variants.VariantsBundle} from a JSON string. + * + * @param jsonString the jsonString to use to create the {@link htsjdk.beta.plugin.variants.VariantsBundle} + * @return a {@link htsjdk.beta.plugin.variants.VariantsBundle} + */ + public static VariantsBundle getVariantsBundleFromString(final String jsonString) { + return getVariantsBundleFromString(jsonString, HtsPath::new); + } + + /** + * Create a {@link htsjdk.beta.plugin.variants.VariantsBundle} from a JSON string with all IOPathResources using + * an IOPath-derived class of type {@code T}. + * + * @param the IOPath-derived type of the IOPathResources to be used in the new bundle + * @param jsonString the string to use to create the {@link htsjdk.beta.plugin.variants.VariantsBundle} + * @param ioPathConstructor a function that takes a string and returns an IOPath-derived class of type {@code T} + * @return a newly created {@link htsjdk.beta.plugin.variants.VariantsBundle} + */ + public static VariantsBundle getVariantsBundleFromString( + final String jsonString, + final Function ioPathConstructor) { + return new VariantsBundle(BundleJSON.toBundle(jsonString, ioPathConstructor).getResources()); + } + + /** + * Find the companion index for a variants source, and create a new {@link htsjdk.beta.plugin.variants.VariantsBundle} + * containing the variants and the companion index, if one can be found. + * + * @param variants the variants source to use + * @return a {@link htsjdk.beta.plugin.variants.VariantsBundle} containing variants and companion index, if it can + * be found. + */ + public static Optional resolveIndex(final IOPath variants) { + return resolveIndex(variants, HtsPath::new); + } + + /** + * Attempts to find the companion index for a variants source based on commonly used file extensions, and + * create a new {@link htsjdk.beta.plugin.variants.VariantsBundle} containing the variants and the companion + * index, if one can be found. + * + * An index can only be resolved for an IOPath that represents a file on a file system for which an NIO + * provider is installed. Remote paths that use a protocol scheme for which no NIO file system is + * available will (silently) not be resolved. + * + * @param the IOPath-derived type of the IOPath to be returned + * @param variantsHtsPath the IOPath-derived object representing the variants source to use + * @param ioPathConstructor a function that takes a string and returns an IOPath-derived class of type + * @return a {@link IOPath}-derived object of type T containing the companion index for {@code variantsPath}, + * if it can be found + */ + public static Optional resolveIndex( + final T variantsHtsPath, + final Function ioPathConstructor) { + final Set indexExtensions = Set.of(FileExtensions.TRIBBLE_INDEX, FileExtensions.TABIX_INDEX); + for (final String extension : indexExtensions) { + final T putativeIndexPath = IOPathUtils.appendExtension(variantsHtsPath, extension, ioPathConstructor); + if (Files.exists(putativeIndexPath.toPath())) { + return Optional.of(putativeIndexPath); + } + } + return Optional.empty(); + } + + private static IOPathResource toInputResource(final String providedContentType, final T ioPath) { + ValidationUtils.nonNull(ioPath, "ioPath"); + final Optional> typePair = getInferredContentTypes(ioPath); + if (typePair.isPresent()) { + if (providedContentType != null && !typePair.get().a.equals(providedContentType)) { + LOG.warn(String.format( + "Provided content type \"%s\" for \"%s\" doesn't match derived content type \"%s\"", + providedContentType, + ioPath.getRawInputString(), + typePair.get().a)); + } + } + return new IOPathResource(ioPath, providedContentType); + } + + // Try to infer the contentType/format, i.e., variants from an IOPath. Currently this + // exists purely to check for logical inconsistencies. It can detect cases that are illogical + // (an IOPath that has format CRAM, but file extension BAM), but it can't determinstically + // and correctly infer the types in all cases without reproducing all the logic embedded in all the + // codecs (i.e., an htsget IOPath ends in ".bam", but has format HTSGET_BAM, not BAM - detecting + // that here would require parsing the entire IOPath structure, which is best left to the codecs + // themselves). So for now its advisory, but maybe it should be abandoned altogether. + private static Optional> getInferredContentTypes(final T ioPath) { + ValidationUtils.nonNull(ioPath, "ioPath"); + final Optional extension = ioPath.getExtension(); + if (extension.isPresent()) { + final String ext = extension.get(); + if (ext.equals(FileExtensions.VCF)) { + return Optional.of(new Tuple<>(BundleResourceType.CT_VARIANT_CONTEXTS, BundleResourceType.FMT_VARIANTS_VCF)); + } else if (ext.equals(FileExtensions.COMPRESSED_VCF)) { + return Optional.of(new Tuple<>(BundleResourceType.CT_VARIANT_CONTEXTS, BundleResourceType.FMT_VARIANTS_VCF)); + } + } + return Optional.empty(); + } +} diff --git a/src/test/java/htsjdk/beta/io/bundle/BundleJSONTest.java b/src/test/java/htsjdk/beta/io/bundle/BundleJSONTest.java index c43374beed..159959dedc 100644 --- a/src/test/java/htsjdk/beta/io/bundle/BundleJSONTest.java +++ b/src/test/java/htsjdk/beta/io/bundle/BundleJSONTest.java @@ -34,8 +34,10 @@ public Object[][] getSingleBundleTestCases() { { """ { - "schemaName":"htsbundle", - "schemaVersion":"%s", + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"%s", + }, "primary":"%s", "%s":{"path":"%s","format":"BAM"} }""".formatted( @@ -49,8 +51,10 @@ BundleResourceType.CT_ALIGNED_READS, getURIStringFromIOPath(BundleResourceTestDa { """ { - "schemaName":"htsbundle", - "schemaVersion":"%s", + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"%s", + }, "primary":"%s", "%s":{"path":"%s"} }""".formatted( @@ -63,8 +67,10 @@ BundleResourceType.CT_ALIGNED_READS, getURIStringFromIOPath(BundleResourceTestDa { """ { - "schemaName":"htsbundle", - "schemaVersion":"%s", + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"%s", + }, "primary":"%s", "%s":{"path":"%s","format":"%s"}, "%s":{"path":"%s","format":"%s"} @@ -81,8 +87,10 @@ BundleResourceType.CT_ALIGNED_READS, getURIStringFromIOPath(BundleResourceTestDa { """ { - "schemaName":"htsbundle", - "schemaVersion":"%s", + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"%s", + }, "primary":"%s", "%s":{"path":"%s","format":"%s"}, "%s":{"path":"%s"} @@ -99,8 +107,10 @@ BundleResourceType.CT_ALIGNED_READS, getURIStringFromIOPath(BundleResourceTestDa { """ { - "schemaName":"htsbundle", - "schemaVersion":"%s", + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"%s", + }, "primary":"%s", "%s":{"path":"%s"}, "%s":{"path":"%s","format":"%s"} @@ -117,8 +127,10 @@ BundleResourceType.CT_ALIGNED_READS, getURIStringFromIOPath(BundleResourceTestDa { """ { - "schemaName":"htsbundle", - "schemaVersion":"%s", + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"%s", + }, "primary":"%s", "%s":{"path":"%s"}, "%s":{"path":"%s"} @@ -137,8 +149,10 @@ BundleResourceType.CT_ALIGNED_READS, getURIStringFromIOPath(BundleResourceTestDa { """ { - "schemaName":"htsbundle", - "schemaVersion":"%s", + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"%s", + }, "primary":"CUSTOM", "CUSTOM":{"path":"%s"} }""".formatted( @@ -152,12 +166,14 @@ BundleResourceType.CT_ALIGNED_READS, getURIStringFromIOPath(BundleResourceTestDa { """ { + "schema":{ "schemaName":"htsbundle", "schemaVersion":"%s", - "primary":"%s", - "%s":{"path":"%s"}, - "%s":{"path":"%s"}, - "CUSTOM":{"path":"%s"} + }, + "primary":"%s", + "%s":{"path":"%s"}, + "%s":{"path":"%s"}, + "CUSTOM":{"path":"%s"} }""".formatted( BundleJSON.JSON_SCHEMA_VERSION, BundleResourceType.CT_ALIGNED_READS, @@ -230,14 +246,18 @@ public void testAcceptSingleBundleJSONAsCollection( public void testRejectMultipleBundlesAsSingleBundle() { final String multipleBundles = """ [{ - "schemaName":"htsbundle", - "schemaVersion":"%s", - "primary":"%s", - "%s":{"path":"%s","format":"%s"} + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"%s", + }, + "primary":"%s", + "%s":{"path":"%s","format":"%s"} }, { - "schemaName":"htsbundle", - "schemaVersion":"%s", + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"%s", + }, "primary":"%s", "%s":{"path":"%s"} }]""".formatted( @@ -258,13 +278,17 @@ public Object[][] getInvalidBundleJSON() { {null, "cannot be null"}, {"", "The string is empty"}, - // missing schema name - {"{}", "missing the required property schemaName"}, + // missing schema entirely + {"{}", "[\"schema\"] not found"}, - // still missing schema name + // missing schema name { """ - {"schemaVersion":"%s"} + { + "schema":{ + "schemaVersion":"%s", + } + } """.formatted(BundleJSON.JSON_SCHEMA_VERSION), "missing the required property schemaName" }, @@ -272,7 +296,12 @@ public Object[][] getInvalidBundleJSON() { // incorrect schema name { """ - {"schemaName":"bogusname", "schemaVersion":"%s"} + { + "schema":{ + "schemaName":"bogusname", + "schemaVersion":"%s", + } + } """.formatted(BundleJSON.JSON_SCHEMA_VERSION), "Expected bundle schema name" }, @@ -280,7 +309,11 @@ public Object[][] getInvalidBundleJSON() { // missing schema version { """ - { "schemaName":"htsbundle" } + { + "schema":{ + "schemaName":"htsbundle", + } + } """, "missing the required property schemaVersion" }, @@ -288,7 +321,12 @@ public Object[][] getInvalidBundleJSON() { // incorrect schema version { """ - {"schemaName":"htsbundle", "schemaVersion":"99.99.99"} + { + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"99.99.99", + } + } """, "Expected bundle schema version" }, @@ -297,8 +335,10 @@ public Object[][] getInvalidBundleJSON() { { """ { - "schemaVersion":"%s", - "schemaName":"htsbundle", + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"%s", + }, "%s":{"path":"myreads.bam","format":"%s"} } """.formatted( @@ -309,12 +349,14 @@ public Object[][] getInvalidBundleJSON() { // primary property is present, but the resource it specifies is not in the bundle { """ - { - "schemaVersion":"%s", + { + "schema":{ "schemaName":"htsbundle", - "%s":{"path":"myreads.bam","format":"%s"}, - "primary":"MISSING_RESOURCE" - } + "schemaVersion":"%s", + }, + "%s":{"path":"myreads.bam","format":"%s"}, + "primary":"MISSING_RESOURCE" + } """.formatted( BundleJSON.JSON_SCHEMA_VERSION, BundleResourceType.CT_ALIGNED_READS, BundleResourceType.FMT_READS_BAM), @@ -324,26 +366,30 @@ public Object[][] getInvalidBundleJSON() { // syntax error (missing quote before schemaName) { """ - { - "schemaVersion":"%s", + { + "schema":{ schemaName":"htsbundle", - "%s":{"path":"myreads.bam","format":"%s"}, - "primary":"%s" - } + "schemaVersion":"%s", + }, + "%s":{"path":"myreads.bam","format":"%s"}, + "primary":"%s" + } """.formatted( BundleJSON.JSON_SCHEMA_VERSION, BundleResourceType.CT_ALIGNED_READS, BundleResourceType.FMT_READS_BAM, BundleResourceType.CT_ALIGNED_READS), - "Expected a ':' after a key at 58 [character 19 line 3]" + "Expected a ':' after a key at 36 [character 19 line 3]" }, // missing enclosing {} -> UnsupportedOperationException (no text message) { """ + "schema":{ "schemaName":"htsbundle", "schemaVersion":"%s", + } """.formatted(BundleJSON.JSON_SCHEMA_VERSION), - "A JSONObject text must begin with '{' at 5 [character 6 line 1]", + "A JSONObject text must begin with '{' at 1 [character 2 line 1]", }, }; } @@ -389,8 +435,10 @@ public void testJAcceptSONCollectionWithOneBundleAsSingleBundle() { final String jsonCollectionWithOneBundle = """ [{ - "schemaName":"htsbundle", - "schemaVersion":"%s", + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"%s", + }, "primary":"%s", "%s":{"path":"%s","format":"BAM"} }]""".formatted( @@ -420,8 +468,10 @@ public Object[][] getBundleCollectionTestCases() { // 1 bundle """ [{ - "schemaName":"htsbundle", - "schemaVersion":"%s", + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"%s", + }, "primary":"%s", "%s":{"path":"%s","format":"%s"} }]""".formatted( @@ -434,14 +484,18 @@ BundleResourceType.CT_ALIGNED_READS, getURIStringFromIOPath(BundleResourceTestDa // 2 bundles """ [{ - "schemaName":"htsbundle", - "schemaVersion":"%s", + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"%s", + }, "primary":"%s", "%s":{"path":"%s","format":"%s"} }, { - "schemaName":"htsbundle", - "schemaVersion":"%s", + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"%s", + }, "primary":"%s", "%s":{"path":"%s"} }]""".formatted( @@ -459,20 +513,26 @@ BundleResourceType.CT_ALIGNED_READS, getURIStringFromIOPath(BundleResourceTestDa // 3 bundles """ [{ - "schemaName":"htsbundle", - "schemaVersion":"%s", + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"%s", + }, "primary":"%s", "%s":{"path":"%s","format":"%s"} }, { - "schemaName":"htsbundle", - "schemaVersion":"%s", + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"%s", + }, "primary":"%s", "%s":{"path":"%s"} }, { - "schemaName":"htsbundle", - "schemaVersion":"%s", + "schema":{ + "schemaName":"htsbundle", + "schemaVersion":"%s", + }, "primary":"%s", "%s":{"path":"%s","format":"%s"} }]""".formatted( diff --git a/src/test/java/htsjdk/beta/io/bundle/BundleTest.java b/src/test/java/htsjdk/beta/io/bundle/BundleTest.java index 2e63057c52..cc93beef2d 100644 --- a/src/test/java/htsjdk/beta/io/bundle/BundleTest.java +++ b/src/test/java/htsjdk/beta/io/bundle/BundleTest.java @@ -7,6 +7,7 @@ import java.util.Arrays; import java.util.Collections; +import java.util.HashSet; import java.util.Iterator; // Example JSON : diff --git a/src/test/java/htsjdk/beta/plugin/reads/ReadsBundleTest.java b/src/test/java/htsjdk/beta/plugin/reads/ReadsBundleTest.java index fbccc72ba2..6846d26426 100644 --- a/src/test/java/htsjdk/beta/plugin/reads/ReadsBundleTest.java +++ b/src/test/java/htsjdk/beta/plugin/reads/ReadsBundleTest.java @@ -43,7 +43,11 @@ public void testReadsBundleReadsAndIndex() { public void testNoReadsInSerializedBundle() { final String vcfJSON = """ { - "schemaVersion":"0.1.0", + schema: { + schemaVersion: "%s", + schemaName: "htsbundle" + }, + "schemaVersion":"%s", "schemaName":"htsbundle", "%s":{"path":"my.vcf","format":"%s"}, "primary":"%s" @@ -82,8 +86,10 @@ public Object[][] getRoundTripJSONTestData() { // reads only, without format included """ { - "schemaVersion":"%s", - "schemaName":"htsbundle", + schema: { + schemaVersion: "%s", + schemaName: "htsbundle" + }, "%s":{"path":"%s"}, "primary":"%s" }""".formatted( @@ -97,8 +103,10 @@ public Object[][] getRoundTripJSONTestData() { // reads only, with format included """ { - "schemaVersion":"%s", - "schemaName":"htsbundle", + schema: { + schemaVersion: "%s", + schemaName: "htsbundle" + }, "%s":{"path":"%s", "format":"%s"}, "primary":"%s" }""".formatted( @@ -119,8 +127,10 @@ public Object[][] getRoundTripJSONTestData() { // reads with index, with format included """ { - "schemaVersion":"%s", - "schemaName":"htsbundle", + schema: { + schemaVersion: "%s", + schemaName: "htsbundle" + }, "%s":{"path":"%s", "format":"%s"}, "%s":{"path":"%s", "format":"%s"}, "primary":"%s" diff --git a/src/test/java/htsjdk/beta/plugin/variants/VariantsBundleTest.java b/src/test/java/htsjdk/beta/plugin/variants/VariantsBundleTest.java new file mode 100644 index 0000000000..f5a7df07d5 --- /dev/null +++ b/src/test/java/htsjdk/beta/plugin/variants/VariantsBundleTest.java @@ -0,0 +1,203 @@ +package htsjdk.beta.plugin.variants; + +import htsjdk.HtsjdkTest; +import htsjdk.beta.io.IOPathUtils; +import htsjdk.beta.io.bundle.*; +import htsjdk.io.HtsPath; +import htsjdk.io.IOPath; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Optional; + +public class VariantsBundleTest extends HtsjdkTest { + + private final static String VCF_FILE = "a.vcf"; + private final static String VCF_INDEX_FILE = "a.vcf.idx"; + + @Test + public void testVariantsBundleVCFOnly() { + final IOPath variantsPath = new HtsPath(VCF_FILE); + final VariantsBundle variantsBundle = new VariantsBundle(variantsPath); + + Assert.assertTrue(variantsBundle.getVariants().getIOPath().isPresent()); + Assert.assertEquals(variantsBundle.getVariants().getIOPath().get(), variantsPath); + Assert.assertFalse(variantsBundle.getIndex().isPresent()); + } + + @Test + public void testVariantsBundleVCFWithIndex() { + final IOPath variantsPath = new HtsPath(VCF_FILE); + final IOPath indexPath = new HtsPath(VCF_INDEX_FILE); + final VariantsBundle variantsBundle = new VariantsBundle(variantsPath, indexPath); + + Assert.assertTrue(variantsBundle.getVariants().getIOPath().isPresent()); + Assert.assertEquals(variantsBundle.getVariants().getIOPath().get(), variantsPath); + + Assert.assertTrue(variantsBundle.getIndex().isPresent()); + Assert.assertTrue(variantsBundle.getIndex().get().getIOPath().isPresent()); + final IOPath actualIndexPath = variantsBundle.getIndex().get().getIOPath().get(); + Assert.assertEquals(actualIndexPath, indexPath); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testNoVCFInSerializedBundle() { + final String vcfJSON = """ + { + schema: { + schemaVersion: "%s", + schemaName: "htsbundle" + }, + "%s":{"path":"my.cram","format":"%s"}, + "primary":"%s" + } + """.formatted( + BundleJSON.JSON_SCHEMA_VERSION, + BundleResourceType.CT_ALIGNED_READS, + BundleResourceType.FMT_READS_CRAM, + BundleResourceType.CT_ALIGNED_READS + ); + try { + VariantsBundle.getVariantsBundleFromString(vcfJSON); + } catch (final IllegalArgumentException e) { + Assert.assertTrue(e.getMessage().contains("not present in the bundle's resources")); + throw e; + } + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testNoVCFInResources() { + final Bundle bundleWithNoVariants = new BundleBuilder() + .addPrimary(new IOPathResource(new HtsPath("notVariants.txt"), "NOT_VARIANTS")) + .addSecondary(new IOPathResource(new HtsPath("alsoNotVariants.txt"), "ALSO_NOT_VARIANTS")) + .build(); + new VariantsBundle(bundleWithNoVariants.getResources()); + } + + @DataProvider(name = "roundTripJSONTestData") + public Object[][] getRoundTripJSONTestData() { + return new Object[][]{ + // json string, primary key, corresponding array of resources + { + // vcf only, without format included + """ + { + schema: { + schemaVersion: "%s", + schemaName: "htsbundle" + }, + "%s":{"path":"%s"}, + "primary":"%s" + } + """.formatted( + BundleJSON.JSON_SCHEMA_VERSION, + BundleResourceType.CT_VARIANT_CONTEXTS, + VCF_FILE, + BundleResourceType.CT_VARIANT_CONTEXTS), + new VariantsBundle(new HtsPath(VCF_FILE)) + }, + { + // vcf only, with format included + """ + { + schema: { + schemaVersion: "%s", + schemaName: "htsbundle" + }, + "%s":{"path":"%s","format":"%s"}, + "primary":"%s" + } + """.formatted( + BundleJSON.JSON_SCHEMA_VERSION, + BundleResourceType.CT_VARIANT_CONTEXTS, VCF_FILE, BundleResourceType.FMT_VARIANTS_VCF, + BundleResourceType.CT_VARIANT_CONTEXTS), + // VariantsBundle doesn't automatically infer format, so create one manually + new VariantsBundle( + new BundleBuilder().addPrimary( + new IOPathResource( + new HtsPath(VCF_FILE), + BundleResourceType.CT_VARIANT_CONTEXTS, + BundleResourceType.FMT_VARIANTS_VCF)) + .build().getResources()) + }, + { + // vcf with an index, with format included + """ + { + schema: { + schemaVersion: "%s", + schemaName: "htsbundle" + }, + "%s":{"path":"%s","format":"%s"}, + "%s":{"path":"%s"}, + "primary":"%s" + } + """.formatted( + BundleJSON.JSON_SCHEMA_VERSION, + BundleResourceType.CT_VARIANT_CONTEXTS, VCF_FILE, BundleResourceType.FMT_VARIANTS_VCF, + BundleResourceType.CT_VARIANTS_INDEX, VCF_INDEX_FILE, + BundleResourceType.CT_VARIANT_CONTEXTS), + // VariantsBundle doesn't automatically infer format, so create one manually + new VariantsBundle( + new BundleBuilder() + .addPrimary( + new IOPathResource( + new HtsPath(VCF_FILE), + BundleResourceType.CT_VARIANT_CONTEXTS, + BundleResourceType.FMT_VARIANTS_VCF)) + .addSecondary( + new IOPathResource( + new HtsPath(VCF_INDEX_FILE), + BundleResourceType.CT_VARIANTS_INDEX)) + .build().getResources()) + }, + }; + } + + @Test(dataProvider = "roundTripJSONTestData") + public void testVariantsWriteRoundTrip( + final String jsonString, + final VariantsBundle expectedVariantsBundle) { + final VariantsBundle bundleFromJSON = VariantsBundle.getVariantsBundleFromString(jsonString); + Assert.assertTrue(Bundle.equalsIgnoreOrder(bundleFromJSON, expectedVariantsBundle)); + } + + @Test(dataProvider = "roundTripJSONTestData") + public void testGetVariantsBundleFromPath( + final String jsonString, + final VariantsBundle expectedVariantsBundle) { + final IOPath jsonFilePath = IOPathUtils.createTempPath("variants", BundleJSON.BUNDLE_EXTENSION); + IOPathUtils.writeStringToPath(jsonFilePath, jsonString); + final VariantsBundle bundleFromPath = VariantsBundle.getVariantsBundleFromPath(jsonFilePath); + + Assert.assertTrue(Bundle.equalsIgnoreOrder(bundleFromPath, expectedVariantsBundle)); + Assert.assertTrue(bundleFromPath.getVariants().getIOPath().isPresent()); + Assert.assertEquals(bundleFromPath.getVariants().getIOPath().get(), expectedVariantsBundle.getVariants().getIOPath().get()); + } + + @DataProvider(name = "resolveIndexTestData") + public Object[][] getResolveIndexTestData() { + return new Object[][]{ + { + "build/resources/test/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.gz", + "build/resources/test/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.gz.tbi" + }, + { + "build/resources/test/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf", + "build/resources/test/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.idx" + } + }; + + } + + @Test(dataProvider = "resolveIndexTestData") + public void testResolveIndex( + final String baseVCF, + final String expectedIndex) { + final Optional resolvedIndex = VariantsBundle.resolveIndex(new HtsPath(baseVCF)); + Assert.assertTrue(resolvedIndex.isPresent()); + Assert.assertEquals(resolvedIndex.get(), new HtsPath(expectedIndex)); + } + +} \ No newline at end of file diff --git a/src/test/java/htsjdk/io/IOPathUtilsTest.java b/src/test/java/htsjdk/io/IOPathUtilsTest.java new file mode 100644 index 0000000000..09a7c42ab5 --- /dev/null +++ b/src/test/java/htsjdk/io/IOPathUtilsTest.java @@ -0,0 +1,66 @@ +package htsjdk.io; + +import htsjdk.HtsjdkTest; +import htsjdk.beta.io.IOPathUtils; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +public class IOPathUtilsTest extends HtsjdkTest { + + @DataProvider(name = "replaceExtensionTestData") + public Object[][] getReplaceExtensionTestData() { + return new Object[][]{ + {"file:///somepath/a.vcf", ".idx", "file:///somepath/a.idx"}, + {"file:///somepath/a.vcf", "idx", "file:///somepath/a.idx"}, + {"file:///a.vcf/a.vcf", ".idx", "file:///a.vcf/a.idx"}, + {"file:///a.vcf/a.vcf", "idx", "file:///a.vcf/a.idx"}, + {"file:///somepath/a.vcf.gz", ".tbi", "file:///somepath/a.vcf.tbi"}, + {"file:///somepath/a.vcf.gz", "tbi", "file:///somepath/a.vcf.tbi"}, + }; + } + + @Test(dataProvider = "replaceExtensionTestData") + public void testReplaceExtension( + final String basePath, + final String extension, + final String resolvedPath) { + Assert.assertEquals( + IOPathUtils.replaceExtension(new HtsPath(basePath), extension, HtsPath::new), + new HtsPath(resolvedPath)); + } + + @Test(expectedExceptions = {RuntimeException.class}) + public void testThrowOnMissingExtension() { + try { + IOPathUtils.replaceExtension(new HtsPath("file:///somepath/a"), "idx", HtsPath::new); + Assert.fail("Expected exception"); + } catch (final RuntimeException e) { + Assert.assertTrue(e.getMessage().contains("The original path has no extension to replace")); + throw e; + } + } + + @DataProvider(name = "appendExtensionTestData") + public Object[][] getAppendExtensionTestData() { + return new Object[][]{ + {"file:///somepath/a.vcf", ".idx", "file:///somepath/a.vcf.idx"}, + {"file:///somepath/a.vcf", "idx", "file:///somepath/a.vcf.idx"}, + {"file:///a.vcf/a.vcf", ".idx", "file:///a.vcf/a.vcf.idx"}, + {"file:///a.vcf/a.vcf", "idx", "file:///a.vcf/a.vcf.idx"}, + {"file:///somepath/a.vcf.gz", ".tbi", "file:///somepath/a.vcf.gz.tbi"}, + {"file:///somepath/a.vcf.gz", "tbi", "file:///somepath/a.vcf.gz.tbi"}, + }; + } + + @Test(dataProvider = "appendExtensionTestData") + public void testAppendExtension( + final String basePath, + final String extension, + final String resolvedPath) { + Assert.assertEquals( + IOPathUtils.appendExtension(new HtsPath(basePath), extension, HtsPath::new), + new HtsPath(resolvedPath)); + } + +}