From e72d53d38a3ce4f0c140797c6981a788b6815c9c Mon Sep 17 00:00:00 2001 From: David Roazen Date: Wed, 6 May 2015 11:58:13 -0400 Subject: [PATCH] Make VCFHeader Serializable This was missed in the previous pull request to make SAM/Variant records Serializable. We want to be able to serialize VCFHeaders just as we do SAMFileHeaders --- .../variant/vcf/VCFCompoundHeaderLine.java | 30 ++++++++++++---- .../variant/vcf/VCFContigHeaderLine.java | 22 ++++++++++++ src/java/htsjdk/variant/vcf/VCFHeader.java | 4 ++- .../htsjdk/variant/vcf/VCFHeaderLine.java | 26 +++++++++++--- .../variant/vcf/VCFSimpleHeaderLine.java | 28 +++++++++------ .../htsjdk/variant/vcf/VCFHeaderUnitTest.java | 34 +++++++++++++++++++ 6 files changed, 123 insertions(+), 21 deletions(-) diff --git a/src/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java b/src/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java index 1a2d773612..48e0cdf0db 100644 --- a/src/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java +++ b/src/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java @@ -249,16 +249,34 @@ protected String toStringEncoding() { } /** - * returns true if we're equal to another compounder header line + * returns true if we're equal to another compound header line * @param o a compound header line * @return true if equal */ - public boolean equals(Object o) { - if (!(o instanceof VCFCompoundHeaderLine)) + @Override + public boolean equals(final Object o) { + if ( this == o ) { + return true; + } + if ( o == null || getClass() != o.getClass() || ! super.equals(o) ) { return false; - VCFCompoundHeaderLine other = (VCFCompoundHeaderLine) o; - return equalsExcludingDescription(other) && - description.equals(other.description); + } + + final VCFCompoundHeaderLine that = (VCFCompoundHeaderLine) o; + return equalsExcludingDescription(that) && + description.equals(that.description); + } + + @Override + public int hashCode() { + int result = super.hashCode(); + result = 31 * result + name.hashCode(); + result = 31 * result + count; + result = 31 * result + (countType != null ? countType.hashCode() : 0); // only nullable field according to validate() + result = 31 * result + description.hashCode(); + result = 31 * result + type.hashCode(); + result = 31 * result + lineType.hashCode(); + return result; } public boolean equalsExcludingDescription(VCFCompoundHeaderLine other) { diff --git a/src/java/htsjdk/variant/vcf/VCFContigHeaderLine.java b/src/java/htsjdk/variant/vcf/VCFContigHeaderLine.java index 64c5d7f8f5..12e400c95c 100644 --- a/src/java/htsjdk/variant/vcf/VCFContigHeaderLine.java +++ b/src/java/htsjdk/variant/vcf/VCFContigHeaderLine.java @@ -34,6 +34,8 @@ /** * A special class representing a contig VCF header line. Knows the true contig order and sorts on that * + * Note: this class has a natural ordering that is inconsistent with equals() + * * @author mdepristo */ public class VCFContigHeaderLine extends VCFSimpleHeaderLine { @@ -82,6 +84,26 @@ public SAMSequenceRecord getSAMSequenceRecord() { return record; } + @Override + public boolean equals(final Object o) { + if ( this == o ) { + return true; + } + if ( o == null || getClass() != o.getClass() || ! super.equals(o) ) { + return false; + } + + final VCFContigHeaderLine that = (VCFContigHeaderLine) o; + return contigIndex.equals(that.contigIndex); + } + + @Override + public int hashCode() { + int result = super.hashCode(); + result = 31 * result + contigIndex.hashCode(); + return result; + } + /** * IT IS CRITICAL THAT THIS BE OVERRIDDEN SO WE SORT THE CONTIGS IN THE CORRECT ORDER */ diff --git a/src/java/htsjdk/variant/vcf/VCFHeader.java b/src/java/htsjdk/variant/vcf/VCFHeader.java index f763729885..f0c432b3a0 100644 --- a/src/java/htsjdk/variant/vcf/VCFHeader.java +++ b/src/java/htsjdk/variant/vcf/VCFHeader.java @@ -32,6 +32,7 @@ import htsjdk.variant.utils.GeneralUtils; import htsjdk.variant.variantcontext.VariantContextComparator; +import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -57,7 +58,8 @@ *

* A class representing the VCF header */ -public class VCFHeader { +public class VCFHeader implements Serializable { + public static final long serialVersionUID = 1L; // the mandatory header fields public enum HEADER_FIELDS { diff --git a/src/java/htsjdk/variant/vcf/VCFHeaderLine.java b/src/java/htsjdk/variant/vcf/VCFHeaderLine.java index 284f0d4520..00d0f45f0f 100644 --- a/src/java/htsjdk/variant/vcf/VCFHeaderLine.java +++ b/src/java/htsjdk/variant/vcf/VCFHeaderLine.java @@ -27,6 +27,7 @@ import htsjdk.tribble.TribbleException; +import java.io.Serializable; import java.util.Map; @@ -37,7 +38,9 @@ *

* A class representing a key=value entry in the VCF header */ -public class VCFHeaderLine implements Comparable { +public class VCFHeaderLine implements Comparable, Serializable { + public static final long serialVersionUID = 1L; + protected static final boolean ALLOW_UNBOUND_DESCRIPTIONS = true; protected static final String UNBOUND_DESCRIPTION = "Not provided in original VCF header"; @@ -101,10 +104,25 @@ protected String toStringEncoding() { return mKey + "=" + mValue; } - public boolean equals(Object o) { - if ( !(o instanceof VCFHeaderLine) ) + @Override + public boolean equals(final Object o) { + if ( this == o ) { + return true; + } + if ( o == null || getClass() != o.getClass() ) { return false; - return mKey.equals(((VCFHeaderLine)o).getKey()) && mValue.equals(((VCFHeaderLine)o).getValue()); + } + + final VCFHeaderLine that = (VCFHeaderLine) o; + return mKey.equals(that.mKey) && // key not nullable + (mValue != null ? mValue.equals(that.mValue) : that.mValue == null); // value is nullable + } + + @Override + public int hashCode() { + int result = mKey.hashCode(); + result = 31 * result + (mValue != null ? mValue.hashCode() : 0); + return result; } public int compareTo(Object other) { diff --git a/src/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java b/src/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java index 16e90e3409..3401d08315 100644 --- a/src/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java +++ b/src/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java @@ -98,18 +98,26 @@ protected String toStringEncoding() { return getKey() + "=" + VCFHeaderLine.toStringEncoding(map); } - public boolean equals(Object o) { - if ( !(o instanceof VCFSimpleHeaderLine) ) - return false; - VCFSimpleHeaderLine other = (VCFSimpleHeaderLine)o; - if ( !name.equals(other.name) || genericFields.size() != other.genericFields.size() ) + @Override + public boolean equals( final Object o ) { + if ( this == o ) { + return true; + } + if ( o == null || getClass() != o.getClass() || ! super.equals(o) ) { return false; - for ( Map.Entry entry : genericFields.entrySet() ) { - if ( !entry.getValue().equals(other.genericFields.get(entry.getKey())) ) - return false; } - - return true; + + final VCFSimpleHeaderLine that = (VCFSimpleHeaderLine) o; + return name.equals(that.name) && + genericFields.equals(that.genericFields); + } + + @Override + public int hashCode() { + int result = super.hashCode(); + result = 31 * result + name.hashCode(); + result = 31 * result + genericFields.hashCode(); + return result; } public String getID() { diff --git a/src/tests/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java b/src/tests/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java index 339d203284..bc97b918c7 100644 --- a/src/tests/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java +++ b/src/tests/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java @@ -38,11 +38,15 @@ import org.testng.Assert; import org.testng.annotations.Test; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; import java.io.PrintWriter; import java.io.StringReader; import java.math.BigInteger; @@ -51,6 +55,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Set; /** * Created by IntelliJ IDEA. @@ -192,6 +197,35 @@ public void testVCFHeaderAddDuplicateHeaderLine() { Assert.assertEquals(numHeaderLinesBefore, numHeaderLinesAfter); } + @Test + public void testVCFHeaderSerialization() throws Exception { + final VCFFileReader reader = new VCFFileReader(new File("testdata/htsjdk/variant/HiSeq.10000.vcf"), false); + final VCFHeader originalHeader = reader.getFileHeader(); + reader.close(); + + final ByteArrayOutputStream byteArrayStream = new ByteArrayOutputStream(); + final ObjectOutputStream out = new ObjectOutputStream(byteArrayStream); + out.writeObject(originalHeader); + out.close(); + + final ObjectInputStream in = new ObjectInputStream(new ByteArrayInputStream(byteArrayStream.toByteArray())); + final VCFHeader deserializedHeader = (VCFHeader)in.readObject(); + in.close(); + + Assert.assertEquals(deserializedHeader.getMetaDataInInputOrder(), originalHeader.getMetaDataInInputOrder(), "Header metadata does not match before/after serialization"); + Assert.assertEquals(deserializedHeader.getContigLines(), originalHeader.getContigLines(), "Contig header lines do not match before/after serialization"); + Assert.assertEquals(deserializedHeader.getFilterLines(), originalHeader.getFilterLines(), "Filter header lines do not match before/after serialization"); + Assert.assertEquals(deserializedHeader.getFormatHeaderLines(), originalHeader.getFormatHeaderLines(), "Format header lines do not match before/after serialization"); + Assert.assertEquals(deserializedHeader.getIDHeaderLines(), originalHeader.getIDHeaderLines(), "ID header lines do not match before/after serialization"); + Assert.assertEquals(deserializedHeader.getInfoHeaderLines(), originalHeader.getInfoHeaderLines(), "Info header lines do not match before/after serialization"); + Assert.assertEquals(deserializedHeader.getOtherHeaderLines(), originalHeader.getOtherHeaderLines(), "Other header lines do not match before/after serialization"); + Assert.assertEquals(deserializedHeader.getGenotypeSamples(), originalHeader.getGenotypeSamples(), "Genotype samples not the same before/after serialization"); + Assert.assertEquals(deserializedHeader.samplesWereAlreadySorted(), originalHeader.samplesWereAlreadySorted(), "Sortedness of samples not the same before/after serialization"); + Assert.assertEquals(deserializedHeader.getSampleNamesInOrder(), originalHeader.getSampleNamesInOrder(), "Sorted list of sample names in header not the same before/after serialization"); + Assert.assertEquals(deserializedHeader.getSampleNameToOffset(), originalHeader.getSampleNameToOffset(), "Sample name to offset map not the same before/after serialization"); + Assert.assertEquals(deserializedHeader.toString(), originalHeader.toString(), "String representation of header not the same before/after serialization"); + } + /** * a little utility function for all tests to md5sum a file * Shameless taken from: