From bf8d8da66213f5f044b4d3e4fd75a0fbbcf0ef19 Mon Sep 17 00:00:00 2001
From: Chris Norman <cnorman@broadinstitute.org>
Date: Mon, 8 Nov 2021 17:11:25 -0500
Subject: [PATCH 01/22] Commit with raw GATK SequenceDictionaryUtils and
 SequenceDictionaryUtilsTest.

---
 .../samtools/SAMSequenceDictionaryUtils.java  | 505 ++++++++++++++++++
 .../SAMSequenceDictionaryUtilsTest.java       | 357 +++++++++++++
 2 files changed, 862 insertions(+)
 create mode 100644 src/main/java/htsjdk/samtools/SAMSequenceDictionaryUtils.java
 create mode 100644 src/test/java/htsjdk/samtools/SAMSequenceDictionaryUtilsTest.java

diff --git a/src/main/java/htsjdk/samtools/SAMSequenceDictionaryUtils.java b/src/main/java/htsjdk/samtools/SAMSequenceDictionaryUtils.java
new file mode 100644
index 0000000000..7f1db9fd94
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/SAMSequenceDictionaryUtils.java
@@ -0,0 +1,505 @@
+package org.broadinstitute.hellbender.utils;
+
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.SAMSequenceRecord;
+import org.broadinstitute.hellbender.exceptions.GATKException;
+import org.broadinstitute.hellbender.exceptions.UserException;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ *
+ * A series of utility functions that enable the GATK to compare two sequence dictionaries -- from the reference,
+ * from BAMs, or from feature sources -- for consistency.  The system supports two basic modes: get an enum state that
+ * describes at a high level the consistency between two dictionaries, or a validateDictionaries that will
+ * blow up with a UserException if the dicts are too incompatible.
+ *
+ * Dictionaries are tested for contig name overlaps, consistency in ordering in these overlap set, and length,
+ * if available.
+ */
+public final class SequenceDictionaryUtils {
+
+    private SequenceDictionaryUtils(){}
+
+    /**
+     * Compares sequence records by their order
+     */
+    private static final Comparator<SAMSequenceRecord> SEQUENCE_INDEX_ORDER = Comparator.comparing(SAMSequenceRecord::getSequenceIndex);
+
+    // The following sets of contig records are used to perform the non-canonical human ordering check.
+    // This check ensures that the order is 1,2,3... instead of 1, 10, 11, 12...2, 20, 21...
+
+    // hg18
+    protected static final SAMSequenceRecord CHR1_HG18 = new SAMSequenceRecord("chr1", 247249719);
+    protected static final SAMSequenceRecord CHR2_HG18 = new SAMSequenceRecord("chr2", 242951149);
+    protected static final SAMSequenceRecord CHR10_HG18 = new SAMSequenceRecord("chr10", 135374737);
+
+    // hg19
+    protected static final SAMSequenceRecord CHR1_HG19 = new SAMSequenceRecord("chr1", 249250621);
+    protected static final SAMSequenceRecord CHR2_HG19 = new SAMSequenceRecord("chr2", 243199373);
+    protected static final SAMSequenceRecord CHR10_HG19 = new SAMSequenceRecord("chr10", 135534747);
+
+    // b36
+    protected static final SAMSequenceRecord CHR1_B36 = new SAMSequenceRecord("1", 247249719);
+    protected static final SAMSequenceRecord CHR2_B36 = new SAMSequenceRecord("2", 242951149);
+    protected static final SAMSequenceRecord CHR10_B36 = new SAMSequenceRecord("10", 135374737);
+
+    // b37
+    protected static final SAMSequenceRecord CHR1_B37 = new SAMSequenceRecord("1", 249250621);
+    protected static final SAMSequenceRecord CHR2_B37 = new SAMSequenceRecord("2", 243199373);
+    protected static final SAMSequenceRecord CHR10_B37 = new SAMSequenceRecord("10", 135534747);
+
+
+    public enum SequenceDictionaryCompatibility {
+        IDENTICAL,                      // the dictionaries are identical
+        COMMON_SUBSET,                  // there exists a common subset of equivalent contigs
+        SUPERSET,                       // the first dict's set of contigs supersets the second dict's set
+        NO_COMMON_CONTIGS,              // no overlap between dictionaries
+        UNEQUAL_COMMON_CONTIGS,         // common subset has contigs that have the same name but different lengths
+        NON_CANONICAL_HUMAN_ORDER,      // human reference detected but the order of the contigs is non-standard (lexicographic, for example)
+        OUT_OF_ORDER,                   // the two dictionaries overlap but the overlapping contigs occur in different
+                                        // orders with respect to each other
+        DIFFERENT_INDICES               // the two dictionaries overlap and the overlapping contigs occur in the same
+                                        // order with respect to each other, but one or more of them have different
+                                        // indices in the two dictionaries. Eg., { chrM, chr1, chr2 } vs. { chr1, chr2 }
+    }
+
+    /**
+     * Tests for compatibility between two sequence dictionaries, using standard validation settings appropriate
+     * for the GATK. If the dictionaries are incompatible, then UserExceptions are thrown with detailed error messages.
+     *
+     * The standard validation settings used by this method are:
+     *
+     * -Require the dictionaries to share a common subset of equivalent contigs
+     *
+     * -Do not require dict1 to be a superset of dict2.
+     *
+     * -Do not perform checks related to contig ordering: don't throw if the common contigs are in
+     *  different orders with respect to each other, occur at different absolute indices, or are
+     *  lexicographically sorted human dictionaries. GATK uses contig names rather than contig
+     *  indices, and so should not be sensitive to contig ordering issues.
+     *
+     * For comparing a CRAM dictionary against a reference dictionary, call
+     * {@link #validateCRAMDictionaryAgainstReference(SAMSequenceDictionary, SAMSequenceDictionary)} instead.
+     *
+     * @param name1 name associated with dict1
+     * @param dict1 the sequence dictionary dict1
+     * @param name2 name associated with dict2
+     * @param dict2 the sequence dictionary dict2
+     */
+    public static void validateDictionaries( final String name1,
+                                             final SAMSequenceDictionary dict1,
+                                             final String name2,
+                                             final SAMSequenceDictionary dict2) {
+        final boolean requireSuperset = false;
+        final boolean checkContigOrdering = false;
+
+        validateDictionaries(name1, dict1, name2, dict2, requireSuperset, checkContigOrdering);
+    }
+
+    /**
+     * Tests for compatibility between a reference dictionary and a CRAM dictionary, using appropriate
+     * validation settings. If the dictionaries are incompatible, then UserExceptions are thrown with
+     * detailed error messages.
+     *
+     * The standard validation settings used by this method are:
+     *
+     * -Require the reference dictionary to be a superset of the cram dictionary
+     *
+     * -Do not perform checks related to contig ordering: don't throw if the common contigs are in
+     *  different orders with respect to each other, occur at different absolute indices, or are
+     *  lexicographically sorted human dictionaries. GATK uses contig names rather than contig
+     *  indices, and so should not be sensitive to contig ordering issues.
+     *
+     * @param referenceDictionary the sequence dictionary for the reference
+     * @param cramDictionary sequence dictionary from a CRAM file
+     */
+    public static void validateCRAMDictionaryAgainstReference( final SAMSequenceDictionary referenceDictionary,
+                                                               final SAMSequenceDictionary cramDictionary ) {
+        // For CRAM, we require the reference dictionary to be a superset of the reads dictionary
+        final boolean requireSuperset = true;
+        final boolean checkContigOrdering = false;
+
+        validateDictionaries("reference", referenceDictionary, "reads", cramDictionary, requireSuperset, checkContigOrdering);
+    }
+
+
+    /**
+     * Tests for compatibility between two sequence dictionaries.  If the dictionaries are incompatible, then
+     * UserExceptions are thrown with detailed error messages.
+     *
+     * Two sequence dictionaries are compatible if they share a common subset of equivalent contigs,
+     * where equivalent contigs are defined as having the same name and length.
+     *
+     * @param name1 name associated with dict1
+     * @param dict1 the sequence dictionary dict1
+     * @param name2 name associated with dict2
+     * @param dict2 the sequence dictionary dict2
+     * @param requireSuperset if true, require that dict1 be a superset of dict2, rather than dict1 and dict2 sharing a common subset
+     * @param checkContigOrdering if true, require common contigs to be in the same relative order with respect to each other
+     *                            and occur at the same absolute indices, and forbid lexicographically-sorted human dictionaries
+     */
+    public static void validateDictionaries( final String name1,
+                                             final SAMSequenceDictionary dict1,
+                                             final String name2,
+                                             final SAMSequenceDictionary dict2,
+                                             final boolean requireSuperset,
+                                             final boolean checkContigOrdering ) {
+        Utils.nonNull(dict1, "Something went wrong with sequence dictionary detection, check that "+name1+" has a valid sequence dictionary");
+        Utils.nonNull(dict2, "Something went wrong with sequence dictionary detection, check that "+name2+" has a valid sequence dictionary");
+
+        final SequenceDictionaryCompatibility type = compareDictionaries(dict1, dict2, checkContigOrdering);
+
+        switch ( type ) {
+            case IDENTICAL:
+                return;
+            case SUPERSET:
+                return;
+            case COMMON_SUBSET:
+                if ( requireSuperset ) {
+                    final Set<String> contigs1 = dict1.getSequences().stream().map(SAMSequenceRecord::getSequenceName).collect(Collectors.toSet());
+                    final List<String> missingContigs = dict2.getSequences().stream()
+                            .map(SAMSequenceRecord::getSequenceName)
+                            .filter(contig -> !contigs1.contains(contig))
+                            .collect(Collectors.toList());
+                    throw new UserException.IncompatibleSequenceDictionaries(String.format("Dictionary %s is missing contigs found in dictionary %s.  Missing contigs: \n %s \n", name1, name2, String.join(", ", missingContigs)), name1, dict1, name2, dict2);
+                }
+                return;
+            case NO_COMMON_CONTIGS:
+                throw new UserException.IncompatibleSequenceDictionaries("No overlapping contigs found", name1, dict1, name2, dict2);
+
+            case UNEQUAL_COMMON_CONTIGS: {
+                final List<SAMSequenceRecord> x = findDisequalCommonContigs(getCommonContigsByName(dict1, dict2), dict1, dict2);
+                final SAMSequenceRecord elt1 = x.get(0);
+                final SAMSequenceRecord elt2 = x.get(1);
+                throw new UserException.IncompatibleSequenceDictionaries(
+                        String.format("Found contigs with the same name but different lengths:\n  contig %s = %s / %d\n  contig %s = %s / %d",
+                        name1, elt1.getSequenceName(), elt1.getSequenceLength(),
+                        name2, elt2.getSequenceName(), elt2.getSequenceLength()),
+                        name1, dict1, name2, dict2
+                );
+            }
+
+            case NON_CANONICAL_HUMAN_ORDER: {
+                // We only get NON_CANONICAL_HUMAN_ORDER if the caller explicitly requested that we check contig ordering,
+                // so we should always throw when we see it.
+                final UserException ex;
+                if ( nonCanonicalHumanContigOrder(dict1) ) {
+                    ex = new UserException.LexicographicallySortedSequenceDictionary(name1, dict1);
+                }
+                else {
+                    ex = new UserException.LexicographicallySortedSequenceDictionary(name2, dict2);
+                }
+
+                throw ex;
+            }
+
+            case OUT_OF_ORDER: {
+                // We only get OUT_OF_ORDER if the caller explicitly requested that we check contig ordering,
+                // so we should always throw when we see it.
+                throw new UserException.IncompatibleSequenceDictionaries(
+                                "The relative ordering of the common contigs in " + name1 + " and " + name2 +
+                                " is not the same; to fix this please see: "
+                                + "(https://www.broadinstitute.org/gatk/guide/article?id=1328), "
+                                + " which describes reordering contigs in BAM and VCF files.",
+                                name1, dict1, name2, dict2);
+            }
+
+            case DIFFERENT_INDICES: {
+                // We only get DIFFERENT_INDICES if the caller explicitly requested that we check contig ordering,
+                // so we should always throw when we see it.
+                final String msg = "One or more contigs common to both dictionaries have " +
+                        "different indices (ie., absolute positions) in each dictionary. Code " +
+                        "that is sensitive to contig ordering can fail when this is the case. " +
+                        "You should fix the sequence dictionaries so that all shared contigs " +
+                        "occur at the same absolute positions in both dictionaries.";
+                throw new UserException.IncompatibleSequenceDictionaries(msg, name1, dict1, name2, dict2);
+            }
+            default:
+                throw new GATKException("Unexpected SequenceDictionaryComparison type: " + type);
+        }
+    }
+
+    /**
+     * Workhorse routine that takes two dictionaries and returns their compatibility.
+     *
+     * @param dict1 first sequence dictionary
+     * @param dict2 second sequence dictionary
+     * @param checkContigOrdering if true, perform checks related to contig ordering: forbid lexicographically-sorted
+     *                            dictionaries, and require common contigs to be in the same relative order and at the
+     *                            same absolute indices
+     * @return A SequenceDictionaryCompatibility enum value describing the compatibility of the two dictionaries
+     */
+    public static SequenceDictionaryCompatibility compareDictionaries( final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2, final boolean checkContigOrdering ) {
+        if ( checkContigOrdering && (nonCanonicalHumanContigOrder(dict1) || nonCanonicalHumanContigOrder(dict2)) ) {
+            return SequenceDictionaryCompatibility.NON_CANONICAL_HUMAN_ORDER;
+        }
+
+        final Set<String> commonContigs = getCommonContigsByName(dict1, dict2);
+
+        if (commonContigs.isEmpty()) {
+            return SequenceDictionaryCompatibility.NO_COMMON_CONTIGS;
+        }
+        else if ( ! commonContigsHaveSameLengths(commonContigs, dict1, dict2) ) {
+            return SequenceDictionaryCompatibility.UNEQUAL_COMMON_CONTIGS;
+        }
+
+        final boolean commonContigsAreInSameRelativeOrder = commonContigsAreInSameRelativeOrder(commonContigs, dict1, dict2);
+
+        if ( checkContigOrdering && ! commonContigsAreInSameRelativeOrder ) {
+            return SequenceDictionaryCompatibility.OUT_OF_ORDER;
+        }
+        else if ( commonContigsAreInSameRelativeOrder && commonContigs.size() == dict1.size() && commonContigs.size() == dict2.size() ) {
+            return SequenceDictionaryCompatibility.IDENTICAL;
+        }
+        else if ( checkContigOrdering && ! commonContigsAreAtSameIndices(commonContigs, dict1, dict2) ) {
+            return SequenceDictionaryCompatibility.DIFFERENT_INDICES;
+        }
+        else if ( supersets(dict1, dict2) ) {
+            return SequenceDictionaryCompatibility.SUPERSET;
+        }
+        else {
+            return SequenceDictionaryCompatibility.COMMON_SUBSET;
+        }
+    }
+
+
+    /**
+     * Utility function that tests whether dict1's set of contigs is a superset of dict2's
+     *
+     * @param dict1 first sequence dictionary
+     * @param dict2 second sequence dictionary
+     * @return true if dict1's set of contigs supersets dict2's
+     */
+    private static boolean supersets( SAMSequenceDictionary dict1, SAMSequenceDictionary dict2 ) {
+        // Cannot rely on SAMSequenceRecord.equals() as it's too strict (takes extended attributes into account).
+        for ( final SAMSequenceRecord dict2Record : dict2.getSequences() ) {
+            final SAMSequenceRecord dict1Record = dict1.getSequence(dict2Record.getSequenceName());
+            if ( dict1Record == null || ! sequenceRecordsAreEquivalent(dict2Record, dict1Record) ) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+
+
+    /**
+     * Utility function that tests whether the commonContigs in both dicts are equivalent.  Equivalence means
+     * that the seq records have the same length, if both are non-zero.
+     *
+     * @param commonContigs
+     * @param dict1
+     * @param dict2
+     * @return true if all of the common contigs are equivalent
+     */
+    private static boolean commonContigsHaveSameLengths(Set<String> commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
+        return findDisequalCommonContigs(commonContigs, dict1, dict2) == null;
+    }
+
+    /**
+     * Returns a List(x,y) that contains two disequal sequence records among the common contigs in both dicts.  Returns
+     * null if all common contigs are equivalent
+     *
+     * @param commonContigs
+     * @param dict1
+     * @param dict2
+     * @return
+     */
+    private static List<SAMSequenceRecord> findDisequalCommonContigs(Set<String> commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
+        for ( String name : commonContigs ) {
+            SAMSequenceRecord elt1 = dict1.getSequence(name);
+            SAMSequenceRecord elt2 = dict2.getSequence(name);
+            if ( ! sequenceRecordsAreEquivalent(elt1, elt2) )
+                return Arrays.asList(elt1,elt2);
+        }
+
+        return null;
+    }
+
+    /**
+     * Helper routine that returns whether two sequence records are equivalent, defined as having the same name and
+     * lengths.
+     *
+     * NOTE: we allow the lengths to differ if one or both are UNKNOWN_SEQUENCE_LENGTH
+     *
+     * @param first first sequence record to compare
+     * @param second second sequence record to compare
+     * @return true if first and second have the same names and lengths, otherwise false
+     */
+    public static boolean sequenceRecordsAreEquivalent(final SAMSequenceRecord first, final SAMSequenceRecord second) {
+        if ( first == second ) {
+            return true;
+        }
+        if ( first == null || second == null ) {
+            return false;
+        }
+        final int length1 = first.getSequenceLength();
+        final int length2 = second.getSequenceLength();
+
+        if (length1 != length2 && length1 != SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH && length2 != SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH){
+            return false;
+        }
+        if (! first.getSequenceName().equals(second.getSequenceName())){
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * A very simple (and naive) algorithm to determine (1) if the dict is a human reference (hg18, hg19, b36, or b37) and if it's
+     * lexicographically sorted.  Works by matching lengths of the static chr1, chr10, and chr2, and then if these
+     * are all matched, requiring that the order be chr1, chr2, chr10.
+     *
+     * @param dict
+     * @return
+     */
+    private static boolean nonCanonicalHumanContigOrder(SAMSequenceDictionary dict) {
+        SAMSequenceRecord chr1 = null, chr2 = null, chr10 = null;
+        for ( SAMSequenceRecord elt : dict.getSequences() ) {
+            if ( isHumanSeqRecord(elt, CHR1_HG18, CHR1_HG19, CHR1_B36, CHR1_B37) ) chr1 = elt;
+            if ( isHumanSeqRecord(elt, CHR2_HG18, CHR2_HG19, CHR2_B36, CHR2_B37) ) chr2 = elt;
+            if ( isHumanSeqRecord(elt, CHR10_HG18, CHR10_HG19, CHR10_B36, CHR10_B37) ) chr10 = elt;
+        }
+        if ( chr1 != null  && chr2 != null && chr10 != null) {
+            return ! ( chr1.getSequenceIndex() < chr2.getSequenceIndex() && chr2.getSequenceIndex() < chr10.getSequenceIndex() );
+        }
+
+        return false;
+    }
+
+    /**
+     * Trivial helper that returns true if elt has the same name and length as rec1 or rec2
+     * @param elt record to test
+     * @param recs the list of records to check for name and length equivalence
+     * @return true if elt has the same name and length as any of the recs
+     */
+    private static boolean isHumanSeqRecord(SAMSequenceRecord elt, SAMSequenceRecord... recs) {
+        for (SAMSequenceRecord rec : recs) {
+            if (elt.getSequenceLength() == rec.getSequenceLength() && elt.getSequenceName().equals(rec.getSequenceName())) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Returns true if the common contigs in dict1 and dict2 are in the same relative order, without regard to
+     * absolute index position. This is accomplished by getting the common contigs in both dictionaries, sorting
+     * these according to their indices, and then walking through the sorted list to ensure that each ordered contig
+     * is equivalent
+     *
+     * @param commonContigs names of the contigs common to both dictionaries
+     * @param dict1 first SAMSequenceDictionary
+     * @param dict2 second SAMSequenceDictionary
+     * @return true if the common contigs occur in the same relative order in both dict1 and dict2, otherwise false
+     */
+    private static boolean commonContigsAreInSameRelativeOrder(Set<String> commonContigs, SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
+        final List<SAMSequenceRecord> list1 = getSequencesOfName(commonContigs, dict1);
+        final List<SAMSequenceRecord> list2 = getSequencesOfName(commonContigs, dict2);
+        list1.sort(SEQUENCE_INDEX_ORDER);
+        list2.sort(SEQUENCE_INDEX_ORDER);
+
+        for ( int i = 0; i < list1.size(); i++ ) {
+            SAMSequenceRecord elt1 = list1.get(i);
+            SAMSequenceRecord elt2 = list2.get(i);
+            if ( ! elt1.getSequenceName().equals(elt2.getSequenceName()) )
+                return false;
+        }
+
+        return true;
+    }
+
+    /**
+     * Gets the subset of SAMSequenceRecords in commonContigs in dict
+     *
+     * @param commonContigs
+     * @param dict
+     * @return
+     */
+    private static List<SAMSequenceRecord> getSequencesOfName(Set<String> commonContigs, SAMSequenceDictionary dict) {
+        List<SAMSequenceRecord> l = new ArrayList<>(commonContigs.size());
+        for ( String name : commonContigs ) {
+            l.add(dict.getSequence(name) );
+        }
+
+        return l;
+    }
+
+    /**
+     * Checks whether the common contigs in the given sequence dictionaries occur at the same indices
+     * in both dictionaries
+     *
+     * @param commonContigs Set of names of the contigs that occur in both dictionaries
+     * @param dict1 first sequence dictionary
+     * @param dict2 second sequence dictionary
+     * @return true if the contigs common to dict1 and dict2 occur at the same indices in both dictionaries,
+     *         otherwise false
+     */
+    private static boolean commonContigsAreAtSameIndices( final Set<String> commonContigs, final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2 ) {
+        for ( String commonContig : commonContigs ) {
+            SAMSequenceRecord dict1Record = dict1.getSequence(commonContig);
+            SAMSequenceRecord dict2Record = dict2.getSequence(commonContig);
+
+            // Each common contig must have the same index in both dictionaries
+            if ( dict1Record.getSequenceIndex() != dict2Record.getSequenceIndex() ) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    /**
+     * Returns the set of contig names found in both dicts.
+     * @param dict1
+     * @param dict2
+     * @return
+     */
+    public static Set<String> getCommonContigsByName(SAMSequenceDictionary dict1, SAMSequenceDictionary dict2) {
+        Set<String> intersectingSequenceNames = getContigNames(dict1);
+        intersectingSequenceNames.retainAll(getContigNames(dict2));
+        return intersectingSequenceNames;
+    }
+
+    public static Set<String> getContigNames(SAMSequenceDictionary dict) {
+        Set<String> contigNames = new LinkedHashSet<String>(Utils.optimumHashSize(dict.size()));
+        for (SAMSequenceRecord dictionaryEntry : dict.getSequences())
+            contigNames.add(dictionaryEntry.getSequenceName());
+        return contigNames;
+    }
+
+    public static List<String> getContigNamesList(final SAMSequenceDictionary refSeqDict) {
+        Utils.nonNull(refSeqDict, "provided reference sequence ditionary is null");
+        return refSeqDict.getSequences().stream().map(SAMSequenceRecord::getSequenceName).collect(Collectors.toList());
+    }
+
+    /**
+     * Returns a compact String representation of the sequence dictionary it's passed
+     *
+     * The format of the returned String is:
+     * [ contig1Name(length: contig1Length) contig2Name(length: contig2Length) ... ]
+     *
+     * @param dict a non-null SAMSequenceDictionary
+     * @return A String containing all of the contig names and lengths from the sequence dictionary it's passed
+     */
+    public static String getDictionaryAsString( final SAMSequenceDictionary dict ) {
+        Utils.nonNull(dict, "Sequence dictionary must be non-null");
+
+        StringBuilder s = new StringBuilder("[ ");
+
+        for ( SAMSequenceRecord dictionaryEntry : dict.getSequences() ) {
+            s.append(dictionaryEntry.getSequenceName());
+            s.append("(length:");
+            s.append(dictionaryEntry.getSequenceLength());
+            s.append(") ");
+        }
+
+        s.append("]");
+
+        return s.toString();
+    }
+
+}
diff --git a/src/test/java/htsjdk/samtools/SAMSequenceDictionaryUtilsTest.java b/src/test/java/htsjdk/samtools/SAMSequenceDictionaryUtilsTest.java
new file mode 100644
index 0000000000..37842f8a9a
--- /dev/null
+++ b/src/test/java/htsjdk/samtools/SAMSequenceDictionaryUtilsTest.java
@@ -0,0 +1,357 @@
+package org.broadinstitute.hellbender.utils;
+
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.SAMSequenceRecord;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.broadinstitute.hellbender.exceptions.UserException;
+import org.broadinstitute.hellbender.GATKBaseTest;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.broadinstitute.hellbender.utils.SequenceDictionaryUtils.*;
+import static org.broadinstitute.hellbender.utils.SequenceDictionaryUtils.SequenceDictionaryCompatibility.*;
+
+public final class SequenceDictionaryUtilsUnitTest extends GATKBaseTest {
+
+    private static Logger logger = LogManager.getLogger(SequenceDictionaryUtilsUnitTest.class);
+
+    @DataProvider( name = "testSequenceRecordsAreEquivalentDataProvider" )
+    public Object[][] testSequenceRecordsAreEquivalentDataProvider() {
+        final SAMSequenceRecord CHRM_HG19 = new SAMSequenceRecord("chrM", 16571);
+        final SAMSequenceRecord CHR_NONSTANDARD1 = new SAMSequenceRecord("NonStandard1", 8675309);
+        final SAMSequenceRecord CHR1_HG19_WITH_UNKNOWN_LENGTH = new SAMSequenceRecord(CHR1_HG19.getSequenceName(), SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH);
+        final SAMSequenceRecord CHR1_HG19_WITH_DIFFERENT_LENGTH = new SAMSequenceRecord(CHR1_HG19.getSequenceName(), 123456);
+        return new Object[][]{
+                {CHR1_HG19, CHR1_HG19, true},
+                {CHR1_HG19, CHRM_HG19, false},
+                {CHR1_HG19, CHR_NONSTANDARD1, false},
+                {null, null, true},
+                {CHR1_HG19, null, false},
+                {null, CHR1_HG19, false},
+                {CHR1_HG19, CHR1_HG19_WITH_UNKNOWN_LENGTH, true},
+                {CHR1_HG19, CHR1_HG19_WITH_DIFFERENT_LENGTH, false},
+                {CHR1_HG19_WITH_UNKNOWN_LENGTH, CHR1_HG19, true},
+                {CHR1_HG19_WITH_DIFFERENT_LENGTH, CHR1_HG19, false},
+        };
+    }
+
+    @Test(dataProvider = "testSequenceRecordsAreEquivalentDataProvider")
+    public void testSequenceRecordsAreEquivalent(final SAMSequenceRecord one, final SAMSequenceRecord two, final boolean expected){
+        final boolean actual = SequenceDictionaryUtils.sequenceRecordsAreEquivalent(one, two);
+        Assert.assertEquals(actual, expected);
+    }
+
+    @DataProvider( name = "SequenceDictionaryDataProvider" )
+    public Object[][] generateSequenceDictionaryTestData() {
+        final SAMSequenceRecord CHRM_HG19 = new SAMSequenceRecord("chrM", 16571);
+        final SAMSequenceRecord CHR_NONSTANDARD1 = new SAMSequenceRecord("NonStandard1", 8675309);
+        final SAMSequenceRecord CHR_NONSTANDARD2 = new SAMSequenceRecord("NonStandard2", 8675308);
+        final SAMSequenceRecord CHR1_HG19_WITH_UNKNOWN_LENGTH = new SAMSequenceRecord(CHR1_HG19.getSequenceName(), SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH);
+        final SAMSequenceRecord CHR1_HG19_WITH_DIFFERENT_LENGTH = new SAMSequenceRecord(CHR1_HG19.getSequenceName(), 123456);
+
+        final SAMSequenceRecord CHR1_HG19_WITH_ATTRIBUTES = new SAMSequenceRecord(CHR1_HG19.getSequenceName(), CHR1_HG19.getSequenceLength());
+        CHR1_HG19_WITH_ATTRIBUTES.setAttribute("M5", "0dec9660ec1efaaf33281c0d5ea2560f");
+        CHR1_HG19_WITH_ATTRIBUTES.setAttribute("UR", "file:/foo/bar");
+
+        final Class<UserException.IncompatibleSequenceDictionaries> NO_COMMON_CONTIGS_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
+        final Class<UserException.IncompatibleSequenceDictionaries> UNEQUAL_COMMON_CONTIGS_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
+        final Class<UserException.LexicographicallySortedSequenceDictionary> NON_CANONICAL_HUMAN_ORDER_EXCEPTION = UserException.LexicographicallySortedSequenceDictionary.class;
+        final Class<UserException.IncompatibleSequenceDictionaries> OUT_OF_ORDER_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
+        final Class<UserException.IncompatibleSequenceDictionaries> DIFFERENT_INDICES_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
+
+        final List<SimpleInterval> hg19AllContigsIntervalSet = Arrays.asList(
+                new SimpleInterval("chrM", 1, 1),
+                new SimpleInterval("chr1", 1, 1),
+                new SimpleInterval("chr2", 1, 1),
+                new SimpleInterval("chr10", 1, 1));
+        final List<SimpleInterval> hg19PartialContigsIntervalSet = Arrays.asList(
+                new SimpleInterval("chrM", 1, 1),
+                new SimpleInterval("chr1", 1, 1));
+
+        return new Object[][]  {
+                // Identical dictionaries:
+                {Arrays.asList(CHR1_HG19),                         Arrays.asList(CHR1_HG19),                        IDENTICAL, null, false, false},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_HG19),                        IDENTICAL, null, false, false},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_HG19),                        IDENTICAL, null, true,  false},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_HG19),                        IDENTICAL, null, false, true},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_HG19),                        IDENTICAL, null, true,  true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), IDENTICAL, null, false, false},
+                { Arrays.asList(CHR1_B37),                         Arrays.asList(CHR1_B37),                         IDENTICAL, null, false, false},
+                { Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37),    Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37),    IDENTICAL, null, false, false},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_HG19_WITH_UNKNOWN_LENGTH),    IDENTICAL, null, false, false},
+                { Arrays.asList(CHR1_HG19_WITH_UNKNOWN_LENGTH),    Arrays.asList(CHR1_HG19),                        IDENTICAL, null, false, false},
+
+                // Dictionaries with a common subset:
+                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1),                                   COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1),                                   COMMON_SUBSET, null, false, true},
+                // If requireSuperset == true, we should get an exception upon COMMON_SUBSET:
+                { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19),                    Arrays.asList(CHRM_HG19, CHR1_HG19, CHR10_HG19),                              COMMON_SUBSET, UserException.IncompatibleSequenceDictionaries.class, true, false},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1),                        Arrays.asList(CHR1_HG19, CHR_NONSTANDARD2),                                   COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19),                        Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19),                                   COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19),                        Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHRM_HG19),                        COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD2),            COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHR2_HG19, CHR10_HG19),            COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHR2_HG19, CHR10_HG19, CHRM_HG19), COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37, CHR_NONSTANDARD1),    Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37, CHR_NONSTANDARD2),               COMMON_SUBSET, null, false, false},
+                // If requireSuperset == true, we should get an exception upon COMMON_SUBSET:
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                              COMMON_SUBSET, UserException.IncompatibleSequenceDictionaries.class, true, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                   Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1),            COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                              COMMON_SUBSET, null, false, false},
+                // If checkContigOrdering == false, ordering of the common contigs should not matter:
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR2_HG19, CHR1_HG19, CHR10_HG19),                              COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR10_HG19, CHR2_HG19, CHR1_HG19),                              COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR2_HG19, CHR10_HG19, CHR1_HG19),                              COMMON_SUBSET, null, false, false},
+
+                // Dictionaries with no common contigs:
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR2_HG19),                     NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, false},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR2_HG19),                     NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR2_HG19),                     NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, true,  false},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR2_HG19),                     NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, true,  true},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_B37),                      NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),             Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, false},
+
+                // Dictionaries with unequal common contigs:
+                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG19_WITH_DIFFERENT_LENGTH),                    UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
+                { Arrays.asList(CHR1_HG19_WITH_DIFFERENT_LENGTH),                    Arrays.asList(CHR1_HG19),                                          UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
+                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG18),                                          UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
+                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG18),                                          UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, true,  false},
+                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG18),                                          UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG18),                                          UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, true,  true},
+                { Arrays.asList(CHR1_B36),                                           Arrays.asList(CHR1_B37),                                           UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                   Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18),                   UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
+                { Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37),                      Arrays.asList(CHR1_B36, CHR2_B36, CHR10_B36),                      UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18, CHR_NONSTANDARD2), UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
+                { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG18, CHR2_HG18, CHR10_HG18), UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18),                   UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
+
+                // One or both dictionaries in non-canonical human order:
+                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, true,  true},
+                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, true,  true},
+                { Arrays.asList(CHR1_HG18, CHR10_HG18, CHR2_HG18), Arrays.asList(CHR1_HG18, CHR10_HG18, CHR2_HG18), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_B37, CHR10_B37, CHR2_B37),    Arrays.asList(CHR1_B37, CHR10_B37, CHR2_B37),    NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_B36, CHR10_B36, CHR2_B36),    Arrays.asList(CHR1_B36, CHR10_B36, CHR2_B36),    NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true},
+                // If checkContigOrdering == false, we should not get NON_CANONICAL_HUMAN_ORDER:
+                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), IDENTICAL, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19, CHR_NONSTANDARD1), COMMON_SUBSET, null, false, false},
+
+                // Dictionaries with a common subset, but different relative ordering within that subset
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19),                              OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19),                              OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, true,  true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19),                              OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19),                              OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, true,  true},
+                { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19, CHRM_HG19),                   OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true},
+                { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR2_HG19, CHR1_HG19),                   OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true},
+                { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHRM_HG19, CHR1_HG19),                   OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_B37, CHR2_B37),              Arrays.asList(CHR2_B37, CHR1_B37),                                OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true},
+                // If checkContigOrdering == false, we should not get OUT_OF_ORDER:
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19),                              SUPERSET,      null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19, CHR_NONSTANDARD1),            COMMON_SUBSET, null, false, false},
+
+                // Dictionaries with a common subset in the same relative order, but with different indices.
+                // This will only throw an exception during validation if checkContigOrdering is true
+
+                // These have checkContigOrdering == true, so we expect DIFFERENT_INDICES and an exception:
+                { Arrays.asList(CHRM_HG19, CHR1_HG19),                                                 Arrays.asList(CHR1_HG19),                                          DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
+                // Setting requireSuperset == true should make no difference here (we should still get DIFFERENT_INDICES and an exception):
+                { Arrays.asList(CHRM_HG19, CHR1_HG19),                                                 Arrays.asList(CHR1_HG19),                                          DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true,  true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19),                    DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1),  DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19),  DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19),                               Arrays.asList(CHR1_HG19, CHR2_HG19),                               DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19),                   Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                   DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19 ),                   Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19),                    DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
+
+                // Same test cases as above, but these have checkContigOrdering == false, so we expect SUPERSET or COMMON_SUBSET instead of DIFFERENT_INDICES, and no exception:
+                { Arrays.asList(CHRM_HG19, CHR1_HG19),                                                 Arrays.asList(CHR1_HG19),                                          SUPERSET,      null, false, false},
+                { Arrays.asList(CHRM_HG19, CHR1_HG19),                                                 Arrays.asList(CHR1_HG19),                                          SUPERSET,      null, true,  false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19),                    COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1),  COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19),  COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19),                               Arrays.asList(CHR1_HG19, CHR2_HG19),                               SUPERSET,      null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19),                   Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                   SUPERSET,      null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19 ),                   Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19),                    SUPERSET,      null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), SUPERSET,      null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), SUPERSET,      null, false, false},
+
+                // tests for SUPERSET
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                  Arrays.asList(CHR1_HG19),                                                       SUPERSET, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                  Arrays.asList(CHR1_HG19),                                                       SUPERSET, null, false, true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                  Arrays.asList(CHR1_HG19),                                                       SUPERSET, null, true,  false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                  Arrays.asList(CHR1_HG19),                                                       SUPERSET, null, true,  true},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1),                           Arrays.asList(CHR1_HG19),                                                       SUPERSET, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1),    Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                                SUPERSET, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19),    Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                                SUPERSET, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19),    Arrays.asList(CHR1_HG19, CHR2_HG19),                                            SUPERSET, null, false, false},
+                // Extended attributes should be ignored when determining whether a superset exists:
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                  Arrays.asList(CHR1_HG19_WITH_ATTRIBUTES),                                       SUPERSET, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHRM_HG19),           Arrays.asList(CHR1_HG19_WITH_ATTRIBUTES, CHR10_HG19),                           SUPERSET, null, false, false}
+        };
+    }
+
+    @Test( dataProvider = "SequenceDictionaryDataProvider" )
+    public void testSequenceDictionaryValidation( final List<SAMSequenceRecord> firstDictionaryContigs,
+                                                  final List<SAMSequenceRecord> secondDictionaryContigs,
+                                                  final SequenceDictionaryUtils.SequenceDictionaryCompatibility dictionaryCompatibility, //not needed by this test
+                                                  final Class<? extends UserException> expectedExceptionUponValidation,
+                                                  final boolean requireSuperset,
+                                                  final boolean checkContigOrdering) {
+        final SAMSequenceDictionary firstDictionary = createSequenceDictionary(firstDictionaryContigs);
+        final SAMSequenceDictionary secondDictionary = createSequenceDictionary(secondDictionaryContigs);
+        final String testDescription = String.format("First dictionary: %s  Second dictionary: %s",
+                SequenceDictionaryUtils.getDictionaryAsString(firstDictionary),
+                SequenceDictionaryUtils.getDictionaryAsString(secondDictionary));
+        Exception exceptionThrown = null;
+        try {
+            SequenceDictionaryUtils.validateDictionaries(
+                    "firstDictionary",
+                    firstDictionary,
+                    "secondDictionary",
+                    secondDictionary,
+                    requireSuperset,
+                    checkContigOrdering);
+        }
+        catch ( Exception e ) {
+            exceptionThrown = e;
+        }
+        if ( expectedExceptionUponValidation != null ) {
+            Assert.assertTrue(exceptionThrown != null && expectedExceptionUponValidation.isInstance(exceptionThrown),
+                    String.format("Expected exception %s but saw %s instead. %s",
+                            expectedExceptionUponValidation.getSimpleName(),
+                            exceptionThrown == null ? "no exception" : exceptionThrown.getClass().getSimpleName(),
+                            testDescription));
+        }
+        else {
+            Assert.assertTrue(exceptionThrown == null,
+                    String.format("Expected no exception but saw exception %s instead. %s",
+                            exceptionThrown != null ? exceptionThrown.getClass().getSimpleName() : "none",
+                            testDescription));
+        }
+    }
+
+    @Test( dataProvider = "SequenceDictionaryDataProvider" )
+    public void testSequenceDictionaryComparison( final List<SAMSequenceRecord> firstDictionaryContigs,
+                                                  final List<SAMSequenceRecord> secondDictionaryContigs,
+                                                  final SequenceDictionaryUtils.SequenceDictionaryCompatibility dictionaryCompatibility,
+                                                  final Class<? extends UserException> expectedExceptionUponValidation,
+                                                  final boolean requireSuperset,
+                                                  final boolean checkContigOrdering) {
+
+        final SAMSequenceDictionary firstDictionary = createSequenceDictionary(firstDictionaryContigs);
+        final SAMSequenceDictionary secondDictionary = createSequenceDictionary(secondDictionaryContigs);
+        final String testDescription = String.format("First dictionary: %s  Second dictionary: %s",
+                SequenceDictionaryUtils.getDictionaryAsString(firstDictionary),
+                SequenceDictionaryUtils.getDictionaryAsString(secondDictionary));
+
+        final SequenceDictionaryUtils.SequenceDictionaryCompatibility reportedCompatibility =
+                SequenceDictionaryUtils.compareDictionaries(firstDictionary, secondDictionary, checkContigOrdering);
+
+        Assert.assertTrue(reportedCompatibility == dictionaryCompatibility,
+                String.format("Dictionary comparison should have returned %s but instead returned %s. %s",
+                        dictionaryCompatibility, reportedCompatibility, testDescription));
+    }
+
+    @DataProvider(name = "StandardValidationIgnoresContigOrderData")
+    public Object[][] getStandardValidationIgnoresContigOrderData() {
+        return new Object[][] {
+                { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19) },
+                { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19, CHR10_HG19) },
+                { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR10_HG19, CHR2_HG19, CHR1_HG19) },
+                { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR10_HG19, CHR1_HG19) },
+
+        };
+    }
+
+    @Test(dataProvider = "StandardValidationIgnoresContigOrderData")
+    public void testStandardValidationIgnoresContigOrder( final List<SAMSequenceRecord> firstDictionaryContigs, final List<SAMSequenceRecord> secondDictionaryContigs ) {
+        final SAMSequenceDictionary firstDictionary = createSequenceDictionary(firstDictionaryContigs);
+        final SAMSequenceDictionary secondDictionary = createSequenceDictionary(secondDictionaryContigs);
+
+        // Standard validation (the overload of validateDictionaries() that doesn't take any boolean args)
+        // should ignore differences in ordering of common contigs, so we shouldn't get an exception here
+        SequenceDictionaryUtils.validateDictionaries("first", firstDictionary, "second", secondDictionary);
+    }
+
+    @DataProvider(name = "NonSupersetData")
+    public Object[][] getNonSupersetData() {
+        return new Object[][] {
+                { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19, CHR10_HG19) },
+                { Arrays.asList(CHR1_HG19),            Arrays.asList(CHR10_HG19, CHR2_HG19, CHR1_HG19) }
+        };
+    }
+
+    @Test(dataProvider = "NonSupersetData")
+    public void testStandardValidationDoesNotRequireSuperset( final List<SAMSequenceRecord> firstDictionaryContigs, final List<SAMSequenceRecord> secondDictionaryContigs ) {
+        final SAMSequenceDictionary firstDictionary = createSequenceDictionary(firstDictionaryContigs);
+        final SAMSequenceDictionary secondDictionary = createSequenceDictionary(secondDictionaryContigs);
+
+        // Standard validation (the overload of validateDictionaries() that doesn't take any boolean args)
+        // should not require a superset relationship, so we shouldn't get an exception here
+        SequenceDictionaryUtils.validateDictionaries("first", firstDictionary, "second", secondDictionary);
+    }
+
+    @Test(dataProvider = "NonSupersetData", expectedExceptions = UserException.IncompatibleSequenceDictionaries.class)
+    public void testCRAMValidationDoesRequireSuperset( final List<SAMSequenceRecord> refDictionaryContigs, final List<SAMSequenceRecord> cramDictionaryContigs ) {
+        final SAMSequenceDictionary refDictionary = createSequenceDictionary(refDictionaryContigs);
+        final SAMSequenceDictionary cramDictionary = createSequenceDictionary(cramDictionaryContigs);
+
+        // CRAM validation against the reference SHOULD require a superset relationship, so we should
+        // get an exception here
+        SequenceDictionaryUtils.validateCRAMDictionaryAgainstReference(refDictionary, cramDictionary);
+    }
+
+    @DataProvider(name = "SupersetData")
+    public Object[][] getSupersetData() {
+        return new Object[][] {
+                { Arrays.asList(CHR2_HG19, CHR1_HG19, CHR10_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19, CHR10_HG19)}, //exactly same
+                { Arrays.asList(CHR2_HG19, CHR1_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19) },
+                { Arrays.asList(CHR10_HG19, CHR2_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19) }
+        };
+    }
+
+    @Test(dataProvider = "SupersetData")
+    public void testCRAMValidationDoesAcceptSuperset( final List<SAMSequenceRecord> refDictionaryContigs, final List<SAMSequenceRecord> cramDictionaryContigs ) {
+        final SAMSequenceDictionary refDictionary = createSequenceDictionary(refDictionaryContigs);
+        final SAMSequenceDictionary cramDictionary = createSequenceDictionary(cramDictionaryContigs);
+
+        //In these inputs , cram contigs are subsets of ref contigs and so it should be accepted
+        SequenceDictionaryUtils.validateCRAMDictionaryAgainstReference(refDictionary, cramDictionary);
+    }
+
+    private SAMSequenceDictionary createSequenceDictionary( final List<SAMSequenceRecord> contigs ) {
+        final List<SAMSequenceRecord> clonedContigs = new ArrayList<SAMSequenceRecord>(contigs.size());
+
+        // Clone the individual SAMSequenceRecords to avoid contig-index issues with shared objects
+        // across multiple dictionaries in tests
+        for ( SAMSequenceRecord contig : contigs ) {
+            clonedContigs.add(contig.clone());
+        }
+
+        return new SAMSequenceDictionary(clonedContigs);
+    }
+
+    @Test(expectedExceptions = IllegalArgumentException.class)
+    public void testGetContigNamesListExpectingException() {
+        getContigNamesList(null);
+    }
+
+    @Test
+    public void testGetContigNamesList() {
+
+        final SAMSequenceDictionary samSequenceDictionary = new SAMSequenceDictionary(Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37));
+
+        Assert.assertEquals(getContigNamesList(samSequenceDictionary), Arrays.asList("1", "2", "10"));
+    }
+}
\ No newline at end of file

From f66252a46001952589d7ec1b69ac31613bdc030f Mon Sep 17 00:00:00 2001
From: Chris Norman <cnorman@broadinstitute.org>
Date: Mon, 8 Nov 2021 17:09:17 -0500
Subject: [PATCH 02/22] VCFHeader and VCFHeaderLine refactoring to enable
 support for VCF4.3/BCF2.2 and bug fixes.

---
 src/main/java/htsjdk/samtools/Defaults.java   |   6 +
 .../samtools/SAMSequenceDictionary.java       |  15 +
 .../samtools/SAMSequenceDictionaryUtils.java  | 181 +---
 .../java/htsjdk/tribble/TribbleException.java |   6 +
 .../java/htsjdk/variant/bcf2/BCF2Utils.java   |  29 +-
 .../variantcontext/writer/VCFWriter.java      |  54 +-
 .../htsjdk/variant/vcf/AbstractVCFCodec.java  | 548 +++++++----
 .../java/htsjdk/variant/vcf/VCF3Codec.java    |  69 +-
 .../htsjdk/variant/vcf/VCFAltHeaderLine.java  |  40 +-
 .../java/htsjdk/variant/vcf/VCFCodec.java     | 127 +--
 .../variant/vcf/VCFCompoundHeaderLine.java    | 580 ++++++------
 .../java/htsjdk/variant/vcf/VCFConstants.java |  26 +-
 .../variant/vcf/VCFContigHeaderLine.java      | 165 +++-
 .../variant/vcf/VCFFilterHeaderLine.java      |  48 +-
 .../variant/vcf/VCFFormatHeaderLine.java      |  61 +-
 .../java/htsjdk/variant/vcf/VCFHeader.java    | 643 +++++++------
 .../htsjdk/variant/vcf/VCFHeaderLine.java     | 125 ++-
 .../variant/vcf/VCFHeaderLineCount.java       |  69 ++
 .../variant/vcf/VCFHeaderLineTranslator.java  | 127 +--
 .../htsjdk/variant/vcf/VCFHeaderLineType.java |  30 +-
 .../htsjdk/variant/vcf/VCFHeaderMerger.java   | 286 ++++++
 .../htsjdk/variant/vcf/VCFHeaderVersion.java  |  43 +-
 .../htsjdk/variant/vcf/VCFInfoHeaderLine.java |  72 +-
 .../htsjdk/variant/vcf/VCFMetaDataLines.java  | 525 +++++++++++
 .../htsjdk/variant/vcf/VCFMetaHeaderLine.java |  32 +-
 .../variant/vcf/VCFPedigreeHeaderLine.java    |  42 +-
 .../htsjdk/variant/vcf/VCFRecordCodec.java    |   3 +-
 .../variant/vcf/VCFSampleHeaderLine.java      |  33 +-
 .../variant/vcf/VCFSimpleHeaderLine.java      | 216 +++--
 .../variant/vcf/VCFStandardHeaderLines.java   |  50 +-
 .../java/htsjdk/variant/vcf/VCFUtils.java     | 150 +--
 .../variant/vcf/VCFValidationFailure.java     |  63 ++
 .../SAMSequenceDictionaryUtilsTest.java       | 345 +++----
 .../variant/bcf2/BCF2UtilsUnitTest.java       |  26 +-
 .../variant/bcf2/BCF2WriterUnitTest.java      |   1 +
 .../VariantContextTestProvider.java           |   1 +
 .../AsyncVariantContextWriterUnitTest.java    |   3 +-
 .../writer/VCFWriterUnitTest.java             |  11 +-
 .../variant/vcf/AbstractVCFCodecTest.java     |  69 +-
 .../variant/vcf/VCFAltHeaderLineUnitTest.java |  43 +
 .../variant/vcf/VCFCodec43FeaturesTest.java   |  34 +-
 .../vcf/VCFCompoundHeaderLineUnitTest.java    | 237 ++++-
 .../vcf/VCFContigHeaderLineUnitTest.java      | 184 ++++
 .../htsjdk/variant/vcf/VCFEncoderTest.java    |   1 +
 .../vcf/VCFFormatHeaderLineUnitTest.java      |  19 +
 .../vcf/VCFHeaderLineTranslatorUnitTest.java  |  25 +-
 .../variant/vcf/VCFHeaderLineUnitTest.java    | 123 ++-
 .../variant/vcf/VCFHeaderMergerUnitTest.java  | 554 +++++++++++
 .../htsjdk/variant/vcf/VCFHeaderUnitTest.java | 875 +++++++++++-------
 .../variant/vcf/VCFHeaderUnitTestData.java    | 203 ++++
 .../vcf/VCFInfoHeaderLineUnitTest.java        |  86 ++
 .../variant/vcf/VCFMetaDataLinesUnitTest.java | 354 +++++++
 .../vcf/VCFMetaHeaderLineUnitTest.java        |  44 +
 .../vcf/VCFPedigreeHeaderLineUnitTest.java    |  50 +
 .../vcf/VCFSampleHeaderLineUnitTest.java      |  43 +
 .../vcf/VCFSimpleHeaderLineUnitTest.java      | 151 +++
 .../vcf/VCFStandardHeaderLinesUnitTest.java   |   9 +-
 .../java/htsjdk/variant/vcf/VCFUtilsTest.java |  55 +-
 .../resources/htsjdk/variant/HiSeq.10000.vcf  |   1 -
 .../htsjdk/variant/VCF4HeaderTest.vcf         |   1 -
 60 files changed, 5905 insertions(+), 2107 deletions(-)
 create mode 100644 src/main/java/htsjdk/variant/vcf/VCFHeaderMerger.java
 create mode 100644 src/main/java/htsjdk/variant/vcf/VCFMetaDataLines.java
 create mode 100644 src/main/java/htsjdk/variant/vcf/VCFValidationFailure.java
 create mode 100644 src/test/java/htsjdk/variant/vcf/VCFAltHeaderLineUnitTest.java
 create mode 100644 src/test/java/htsjdk/variant/vcf/VCFContigHeaderLineUnitTest.java
 create mode 100644 src/test/java/htsjdk/variant/vcf/VCFFormatHeaderLineUnitTest.java
 create mode 100644 src/test/java/htsjdk/variant/vcf/VCFHeaderMergerUnitTest.java
 create mode 100644 src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTestData.java
 create mode 100644 src/test/java/htsjdk/variant/vcf/VCFInfoHeaderLineUnitTest.java
 create mode 100644 src/test/java/htsjdk/variant/vcf/VCFMetaDataLinesUnitTest.java
 create mode 100644 src/test/java/htsjdk/variant/vcf/VCFMetaHeaderLineUnitTest.java
 create mode 100644 src/test/java/htsjdk/variant/vcf/VCFPedigreeHeaderLineUnitTest.java
 create mode 100644 src/test/java/htsjdk/variant/vcf/VCFSampleHeaderLineUnitTest.java
 create mode 100644 src/test/java/htsjdk/variant/vcf/VCFSimpleHeaderLineUnitTest.java

diff --git a/src/main/java/htsjdk/samtools/Defaults.java b/src/main/java/htsjdk/samtools/Defaults.java
index e2ecf3d1f7..b3db211e20 100644
--- a/src/main/java/htsjdk/samtools/Defaults.java
+++ b/src/main/java/htsjdk/samtools/Defaults.java
@@ -110,6 +110,11 @@ public class Defaults {
      */
     public static final boolean DISABLE_SNAPPY_COMPRESSOR;
 
+    /**
+     * Strict VCF version validation.  Default = true.
+     */
+    public static final boolean STRICT_VCF_VERSION_VALIDATION;
+
 
     public static final String SAMJDK_PREFIX = "samjdk.";
     static {
@@ -134,6 +139,7 @@ public class Defaults {
         SAM_FLAG_FIELD_FORMAT = SamFlagField.valueOf(getStringProperty("sam_flag_field_format", SamFlagField.DECIMAL.name()));
         SRA_LIBRARIES_DOWNLOAD = getBooleanProperty("sra_libraries_download", false);
         DISABLE_SNAPPY_COMPRESSOR = getBooleanProperty(DISABLE_SNAPPY_PROPERTY_NAME, false);
+        STRICT_VCF_VERSION_VALIDATION = getBooleanProperty("strict_version_validation", true);
     }
 
     /**
diff --git a/src/main/java/htsjdk/samtools/SAMSequenceDictionary.java b/src/main/java/htsjdk/samtools/SAMSequenceDictionary.java
index cf40fe6532..1e6cb764e0 100644
--- a/src/main/java/htsjdk/samtools/SAMSequenceDictionary.java
+++ b/src/main/java/htsjdk/samtools/SAMSequenceDictionary.java
@@ -53,6 +53,13 @@ public SAMSequenceDictionary(final List<SAMSequenceRecord> list) {
         setSequences(list);
     }
 
+    //TODO: this returns sequences in the internal list order instead of
+    // honoring each sequence's contigIndex
+    /**
+     * Get a list of sequences for this dictionary.
+     * @return the list of sequences for this dictionary in internal order (the order in which the sequences
+     * were added to this dictionary)
+     */
     public List<SAMSequenceRecord> getSequences() {
         return Collections.unmodifiableList(mSequences);
     }
@@ -75,6 +82,14 @@ public void setSequences(final List<SAMSequenceRecord> list) {
         list.forEach(this::addSequence);
     }
 
+    /**
+     * Add a sequence to the dictionary.
+     * @param sequenceRecord the sequence record to add - note that this method mutates the contig
+     *                       index of the sequenceRecord to match the newly added record's relative
+     *                       order in the list
+     */
+    //TODO: this method ignores (and actually mutates) the sequenceRecord's contig index to make it match
+    // the record's relative placement in the dictionary's internal list
     public void addSequence(final SAMSequenceRecord sequenceRecord) {
         if (mSequenceMap.containsKey(sequenceRecord.getSequenceName())) {
             throw new IllegalArgumentException("Cannot add sequence that already exists in SAMSequenceDictionary: " +
diff --git a/src/main/java/htsjdk/samtools/SAMSequenceDictionaryUtils.java b/src/main/java/htsjdk/samtools/SAMSequenceDictionaryUtils.java
index 7f1db9fd94..0d5073a0ba 100644
--- a/src/main/java/htsjdk/samtools/SAMSequenceDictionaryUtils.java
+++ b/src/main/java/htsjdk/samtools/SAMSequenceDictionaryUtils.java
@@ -1,16 +1,13 @@
-package org.broadinstitute.hellbender.utils;
+package htsjdk.samtools;
 
-import htsjdk.samtools.SAMSequenceDictionary;
-import htsjdk.samtools.SAMSequenceRecord;
-import org.broadinstitute.hellbender.exceptions.GATKException;
-import org.broadinstitute.hellbender.exceptions.UserException;
+import htsjdk.utils.ValidationUtils;
 
 import java.util.*;
 import java.util.stream.Collectors;
 
 /**
  *
- * A series of utility functions that enable the GATK to compare two sequence dictionaries -- from the reference,
+ * A series of utility functions that enable comparison of two sequence dictionaries -- from the reference,
  * from BAMs, or from feature sources -- for consistency.  The system supports two basic modes: get an enum state that
  * describes at a high level the consistency between two dictionaries, or a validateDictionaries that will
  * blow up with a UserException if the dicts are too incompatible.
@@ -18,9 +15,9 @@
  * Dictionaries are tested for contig name overlaps, consistency in ordering in these overlap set, and length,
  * if available.
  */
-public final class SequenceDictionaryUtils {
+public final class SAMSequenceDictionaryUtils {
 
-    private SequenceDictionaryUtils(){}
+    private SAMSequenceDictionaryUtils(){}
 
     /**
      * Compares sequence records by their order
@@ -59,166 +56,10 @@ public enum SequenceDictionaryCompatibility {
         UNEQUAL_COMMON_CONTIGS,         // common subset has contigs that have the same name but different lengths
         NON_CANONICAL_HUMAN_ORDER,      // human reference detected but the order of the contigs is non-standard (lexicographic, for example)
         OUT_OF_ORDER,                   // the two dictionaries overlap but the overlapping contigs occur in different
-                                        // orders with respect to each other
+        // orders with respect to each other
         DIFFERENT_INDICES               // the two dictionaries overlap and the overlapping contigs occur in the same
-                                        // order with respect to each other, but one or more of them have different
-                                        // indices in the two dictionaries. Eg., { chrM, chr1, chr2 } vs. { chr1, chr2 }
-    }
-
-    /**
-     * Tests for compatibility between two sequence dictionaries, using standard validation settings appropriate
-     * for the GATK. If the dictionaries are incompatible, then UserExceptions are thrown with detailed error messages.
-     *
-     * The standard validation settings used by this method are:
-     *
-     * -Require the dictionaries to share a common subset of equivalent contigs
-     *
-     * -Do not require dict1 to be a superset of dict2.
-     *
-     * -Do not perform checks related to contig ordering: don't throw if the common contigs are in
-     *  different orders with respect to each other, occur at different absolute indices, or are
-     *  lexicographically sorted human dictionaries. GATK uses contig names rather than contig
-     *  indices, and so should not be sensitive to contig ordering issues.
-     *
-     * For comparing a CRAM dictionary against a reference dictionary, call
-     * {@link #validateCRAMDictionaryAgainstReference(SAMSequenceDictionary, SAMSequenceDictionary)} instead.
-     *
-     * @param name1 name associated with dict1
-     * @param dict1 the sequence dictionary dict1
-     * @param name2 name associated with dict2
-     * @param dict2 the sequence dictionary dict2
-     */
-    public static void validateDictionaries( final String name1,
-                                             final SAMSequenceDictionary dict1,
-                                             final String name2,
-                                             final SAMSequenceDictionary dict2) {
-        final boolean requireSuperset = false;
-        final boolean checkContigOrdering = false;
-
-        validateDictionaries(name1, dict1, name2, dict2, requireSuperset, checkContigOrdering);
-    }
-
-    /**
-     * Tests for compatibility between a reference dictionary and a CRAM dictionary, using appropriate
-     * validation settings. If the dictionaries are incompatible, then UserExceptions are thrown with
-     * detailed error messages.
-     *
-     * The standard validation settings used by this method are:
-     *
-     * -Require the reference dictionary to be a superset of the cram dictionary
-     *
-     * -Do not perform checks related to contig ordering: don't throw if the common contigs are in
-     *  different orders with respect to each other, occur at different absolute indices, or are
-     *  lexicographically sorted human dictionaries. GATK uses contig names rather than contig
-     *  indices, and so should not be sensitive to contig ordering issues.
-     *
-     * @param referenceDictionary the sequence dictionary for the reference
-     * @param cramDictionary sequence dictionary from a CRAM file
-     */
-    public static void validateCRAMDictionaryAgainstReference( final SAMSequenceDictionary referenceDictionary,
-                                                               final SAMSequenceDictionary cramDictionary ) {
-        // For CRAM, we require the reference dictionary to be a superset of the reads dictionary
-        final boolean requireSuperset = true;
-        final boolean checkContigOrdering = false;
-
-        validateDictionaries("reference", referenceDictionary, "reads", cramDictionary, requireSuperset, checkContigOrdering);
-    }
-
-
-    /**
-     * Tests for compatibility between two sequence dictionaries.  If the dictionaries are incompatible, then
-     * UserExceptions are thrown with detailed error messages.
-     *
-     * Two sequence dictionaries are compatible if they share a common subset of equivalent contigs,
-     * where equivalent contigs are defined as having the same name and length.
-     *
-     * @param name1 name associated with dict1
-     * @param dict1 the sequence dictionary dict1
-     * @param name2 name associated with dict2
-     * @param dict2 the sequence dictionary dict2
-     * @param requireSuperset if true, require that dict1 be a superset of dict2, rather than dict1 and dict2 sharing a common subset
-     * @param checkContigOrdering if true, require common contigs to be in the same relative order with respect to each other
-     *                            and occur at the same absolute indices, and forbid lexicographically-sorted human dictionaries
-     */
-    public static void validateDictionaries( final String name1,
-                                             final SAMSequenceDictionary dict1,
-                                             final String name2,
-                                             final SAMSequenceDictionary dict2,
-                                             final boolean requireSuperset,
-                                             final boolean checkContigOrdering ) {
-        Utils.nonNull(dict1, "Something went wrong with sequence dictionary detection, check that "+name1+" has a valid sequence dictionary");
-        Utils.nonNull(dict2, "Something went wrong with sequence dictionary detection, check that "+name2+" has a valid sequence dictionary");
-
-        final SequenceDictionaryCompatibility type = compareDictionaries(dict1, dict2, checkContigOrdering);
-
-        switch ( type ) {
-            case IDENTICAL:
-                return;
-            case SUPERSET:
-                return;
-            case COMMON_SUBSET:
-                if ( requireSuperset ) {
-                    final Set<String> contigs1 = dict1.getSequences().stream().map(SAMSequenceRecord::getSequenceName).collect(Collectors.toSet());
-                    final List<String> missingContigs = dict2.getSequences().stream()
-                            .map(SAMSequenceRecord::getSequenceName)
-                            .filter(contig -> !contigs1.contains(contig))
-                            .collect(Collectors.toList());
-                    throw new UserException.IncompatibleSequenceDictionaries(String.format("Dictionary %s is missing contigs found in dictionary %s.  Missing contigs: \n %s \n", name1, name2, String.join(", ", missingContigs)), name1, dict1, name2, dict2);
-                }
-                return;
-            case NO_COMMON_CONTIGS:
-                throw new UserException.IncompatibleSequenceDictionaries("No overlapping contigs found", name1, dict1, name2, dict2);
-
-            case UNEQUAL_COMMON_CONTIGS: {
-                final List<SAMSequenceRecord> x = findDisequalCommonContigs(getCommonContigsByName(dict1, dict2), dict1, dict2);
-                final SAMSequenceRecord elt1 = x.get(0);
-                final SAMSequenceRecord elt2 = x.get(1);
-                throw new UserException.IncompatibleSequenceDictionaries(
-                        String.format("Found contigs with the same name but different lengths:\n  contig %s = %s / %d\n  contig %s = %s / %d",
-                        name1, elt1.getSequenceName(), elt1.getSequenceLength(),
-                        name2, elt2.getSequenceName(), elt2.getSequenceLength()),
-                        name1, dict1, name2, dict2
-                );
-            }
-
-            case NON_CANONICAL_HUMAN_ORDER: {
-                // We only get NON_CANONICAL_HUMAN_ORDER if the caller explicitly requested that we check contig ordering,
-                // so we should always throw when we see it.
-                final UserException ex;
-                if ( nonCanonicalHumanContigOrder(dict1) ) {
-                    ex = new UserException.LexicographicallySortedSequenceDictionary(name1, dict1);
-                }
-                else {
-                    ex = new UserException.LexicographicallySortedSequenceDictionary(name2, dict2);
-                }
-
-                throw ex;
-            }
-
-            case OUT_OF_ORDER: {
-                // We only get OUT_OF_ORDER if the caller explicitly requested that we check contig ordering,
-                // so we should always throw when we see it.
-                throw new UserException.IncompatibleSequenceDictionaries(
-                                "The relative ordering of the common contigs in " + name1 + " and " + name2 +
-                                " is not the same; to fix this please see: "
-                                + "(https://www.broadinstitute.org/gatk/guide/article?id=1328), "
-                                + " which describes reordering contigs in BAM and VCF files.",
-                                name1, dict1, name2, dict2);
-            }
-
-            case DIFFERENT_INDICES: {
-                // We only get DIFFERENT_INDICES if the caller explicitly requested that we check contig ordering,
-                // so we should always throw when we see it.
-                final String msg = "One or more contigs common to both dictionaries have " +
-                        "different indices (ie., absolute positions) in each dictionary. Code " +
-                        "that is sensitive to contig ordering can fail when this is the case. " +
-                        "You should fix the sequence dictionaries so that all shared contigs " +
-                        "occur at the same absolute positions in both dictionaries.";
-                throw new UserException.IncompatibleSequenceDictionaries(msg, name1, dict1, name2, dict2);
-            }
-            default:
-                throw new GATKException("Unexpected SequenceDictionaryComparison type: " + type);
-        }
+        // order with respect to each other, but one or more of them have different
+        // indices in the two dictionaries. Eg., { chrM, chr1, chr2 } vs. { chr1, chr2 }
     }
 
     /**
@@ -465,14 +306,14 @@ public static Set<String> getCommonContigsByName(SAMSequenceDictionary dict1, SA
     }
 
     public static Set<String> getContigNames(SAMSequenceDictionary dict) {
-        Set<String> contigNames = new LinkedHashSet<String>(Utils.optimumHashSize(dict.size()));
+        Set<String> contigNames = new LinkedHashSet<String>(dict.size());
         for (SAMSequenceRecord dictionaryEntry : dict.getSequences())
             contigNames.add(dictionaryEntry.getSequenceName());
         return contigNames;
     }
 
     public static List<String> getContigNamesList(final SAMSequenceDictionary refSeqDict) {
-        Utils.nonNull(refSeqDict, "provided reference sequence ditionary is null");
+        ValidationUtils.nonNull(refSeqDict, "provided reference sequence ditionary is null");
         return refSeqDict.getSequences().stream().map(SAMSequenceRecord::getSequenceName).collect(Collectors.toList());
     }
 
@@ -486,7 +327,7 @@ public static List<String> getContigNamesList(final SAMSequenceDictionary refSeq
      * @return A String containing all of the contig names and lengths from the sequence dictionary it's passed
      */
     public static String getDictionaryAsString( final SAMSequenceDictionary dict ) {
-        Utils.nonNull(dict, "Sequence dictionary must be non-null");
+        ValidationUtils.nonNull(dict, "Sequence dictionary must be non-null");
 
         StringBuilder s = new StringBuilder("[ ");
 
diff --git a/src/main/java/htsjdk/tribble/TribbleException.java b/src/main/java/htsjdk/tribble/TribbleException.java
index abcbc25ca0..4e2651640b 100644
--- a/src/main/java/htsjdk/tribble/TribbleException.java
+++ b/src/main/java/htsjdk/tribble/TribbleException.java
@@ -86,6 +86,12 @@ public static class InternalCodecException extends TribbleException {
         public InternalCodecException(String message) { super (message); }
     }
 
+    public static class VersionValidationFailure extends TribbleException {
+        public VersionValidationFailure(final String message) {
+            super(String.format("Version validation failure: %s", message));
+        }
+    }
+
     // //////////////////////////////////////////////////////////////////////
     // Index exceptions
     // //////////////////////////////////////////////////////////////////////
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Utils.java b/src/main/java/htsjdk/variant/bcf2/BCF2Utils.java
index 39478bf069..545ede7497 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Utils.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Utils.java
@@ -27,7 +27,11 @@
 
 import htsjdk.samtools.util.FileExtensions;
 import htsjdk.tribble.TribbleException;
-import htsjdk.variant.vcf.*;
+import htsjdk.variant.vcf.VCFConstants;
+import htsjdk.variant.vcf.VCFHeader;
+import htsjdk.variant.vcf.VCFHeaderLine;
+import htsjdk.variant.vcf.VCFIDHeaderLine;
+import htsjdk.variant.vcf.VCFSimpleHeaderLine;
 
 import java.io.File;
 import java.io.FileNotFoundException;
@@ -93,10 +97,15 @@ public static ArrayList<String> makeDictionary(final VCFHeader header) {
         // set up the strings dictionary
         for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) {
             if ( line.shouldBeAddedToDictionary() ) {
-                final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
-                if ( ! seen.contains(idLine.getID())) {
-                    dict.add(idLine.getID());
-                    seen.add(idLine.getID());
+                if (!line.isIDHeaderLine()) {
+                    //is there a better way to ensure that shouldBeAddedToDictionary==true only when isIDHeaderLine==true
+                    throw new TribbleException(String.format(
+                            "The header line %s cannot be added to the BCF dictionary since its not an ID header line",
+                            line));
+                }
+                if ( ! seen.contains(line.getID())) {
+                    dict.add(line.getID());
+                    seen.add(line.getID());
                 }
             }
         }
@@ -291,7 +300,7 @@ else if ( o.getClass().isArray() ) {
      * Are the elements and their order in the output and input headers consistent so that
      * we can write out the raw genotypes block without decoding and recoding it?
      *
-     * If the order of INFO, FILTER, or contrig elements in the output header is different than
+     * If the order of INFO, FILTER, or contig elements in the output header is different than
      * in the input header we must decode the blocks using the input header and then recode them
      * based on the new output order.
      *
@@ -308,15 +317,15 @@ public static boolean headerLinesAreOrderedConsistently(final VCFHeader outputHe
         if ( ! nullAsEmpty(outputHeader.getSampleNamesInOrder()).equals(nullAsEmpty(genotypesBlockHeader.getSampleNamesInOrder())) )
             return false;
 
-        final Iterator<? extends VCFIDHeaderLine> outputLinesIt = outputHeader.getIDHeaderLines().iterator();
-        final Iterator<? extends VCFIDHeaderLine> inputLinesIt = genotypesBlockHeader.getIDHeaderLines().iterator();
+        final Iterator<VCFSimpleHeaderLine> outputLinesIt = outputHeader.getIDHeaderLines().iterator();
+        final Iterator<VCFSimpleHeaderLine> inputLinesIt = genotypesBlockHeader.getIDHeaderLines().iterator();
 
         while ( inputLinesIt.hasNext() ) {
             if ( ! outputLinesIt.hasNext() ) // missing lines in output
                 return false;
 
-            final VCFIDHeaderLine outputLine = outputLinesIt.next();
-            final VCFIDHeaderLine inputLine = inputLinesIt.next();
+            final VCFSimpleHeaderLine outputLine = outputLinesIt.next();
+            final VCFSimpleHeaderLine inputLine = inputLinesIt.next();
 
             if ( ! inputLine.getClass().equals(outputLine.getClass()) || ! inputLine.getID().equals(outputLine.getID()) )
                 return false;
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/VCFWriter.java b/src/main/java/htsjdk/variant/variantcontext/writer/VCFWriter.java
index 21f1453fbb..1b6edae1d8 100644
--- a/src/main/java/htsjdk/variant/variantcontext/writer/VCFWriter.java
+++ b/src/main/java/htsjdk/variant/variantcontext/writer/VCFWriter.java
@@ -27,8 +27,11 @@
 
 import htsjdk.samtools.SAMSequenceDictionary;
 import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.Log;
 import htsjdk.samtools.util.RuntimeIOException;
+import htsjdk.tribble.TribbleException;
 import htsjdk.tribble.index.IndexCreator;
+import htsjdk.utils.ValidationUtils;
 import htsjdk.variant.variantcontext.VariantContext;
 import htsjdk.variant.variantcontext.VariantContextBuilder;
 import htsjdk.variant.vcf.VCFConstants;
@@ -36,6 +39,7 @@
 import htsjdk.variant.vcf.VCFHeader;
 import htsjdk.variant.vcf.VCFHeaderLine;
 import htsjdk.variant.vcf.VCFHeaderVersion;
+import htsjdk.variant.vcf.VCFUtils;
 
 import java.io.BufferedWriter;
 import java.io.ByteArrayOutputStream;
@@ -45,14 +49,15 @@
 import java.io.OutputStreamWriter;
 import java.io.Writer;
 import java.nio.file.Path;
+import java.util.stream.Collectors;
 
 /**
  * this class writes VCF files
  */
 class VCFWriter extends IndexingVariantContextWriter {
+    protected final static Log logger = Log.getInstance(VCFWriter.class);
 
-    private static final String VERSION_LINE =
-            VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_2.getFormatString() + "=" + VCFHeaderVersion.VCF4_2.getVersionString();
+    private static final String DEFAULT_VERSION_LINE = VCFHeader.DEFAULT_VCF_VERSION.toHeaderVersionLine();
 
 	// Initialized when the header is written to the output stream
 	private VCFEncoder vcfEncoder = null;
@@ -164,7 +169,7 @@ public void writeHeader(final VCFHeader header) {
     }
 
     public static String getVersionLine() {
-        return VERSION_LINE;
+        return DEFAULT_VERSION_LINE;
     }
 
     public static VCFHeader writeHeader(VCFHeader header,
@@ -175,12 +180,18 @@ public static VCFHeader writeHeader(VCFHeader header,
         try {
             rejectVCFV43Headers(header);
 
-            // the file format field needs to be written first
+            // Validate that the file version we're writing is version-compatible this header's version.
+            validateHeaderVersion(header, versionLine);
+
+            // The file format field needs to be written first; below any file format lines
+            // embedded in the header will be removed
             writer.write(versionLine + "\n");
 
             for (final VCFHeaderLine line : header.getMetaDataInSortedOrder() ) {
-                if ( VCFHeaderVersion.isFormatString(line.getKey()) )
+                // Remove the fileformat header lines
+                if ( VCFHeaderVersion.isFormatString(line.getKey()) ) {
                     continue;
+                }
 
                 writer.write(VCFHeader.METADATA_INDICATOR);
                 writer.write(line.toString());
@@ -189,14 +200,9 @@ public static VCFHeader writeHeader(VCFHeader header,
 
             // write out the column line
             writer.write(VCFHeader.HEADER_INDICATOR);
-            boolean isFirst = true;
-            for (final VCFHeader.HEADER_FIELDS field : header.getHeaderFields() ) {
-                if ( isFirst )
-                    isFirst = false; // don't write out a field separator
-                else
-                    writer.write(VCFConstants.FIELD_SEPARATOR);
-                writer.write(field.toString());
-            }
+            writer.write(header.getHeaderFields().stream()
+                    .map(f -> f.name())
+                    .collect(Collectors.joining(VCFConstants.FIELD_SEPARATOR)).toString());
 
             if ( header.hasGenotypingData() ) {
                 writer.write(VCFConstants.FIELD_SEPARATOR);
@@ -274,6 +280,28 @@ private static void rejectVCFV43Headers(final VCFHeader targetHeader) {
         if (targetHeader.getVCFHeaderVersion() != null && targetHeader.getVCFHeaderVersion().isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) {
             throw new IllegalArgumentException(String.format("Writing VCF version %s is not implemented", targetHeader.getVCFHeaderVersion()));
         }
+    }
 
+    // Given a header and a requested target output version, see if the header's version is compatible with the
+    // requested version (where compatible means its ok to just declare that the header has the requested
+    // version).
+    private static void validateHeaderVersion(final VCFHeader header, final String requestedVersionLine) {
+        ValidationUtils.nonNull(header);
+        ValidationUtils.nonNull(requestedVersionLine);
+
+        final VCFHeaderVersion vcfCurrentVersion = header.getVCFHeaderVersion();
+        final VCFHeaderVersion vcfRequestedVersion = VCFHeaderVersion.fromHeaderVersionLine(requestedVersionLine);
+        if (!vcfCurrentVersion.equals(vcfRequestedVersion)) {
+            if (!VCFHeaderVersion.versionsAreCompatible(VCFHeaderVersion.fromHeaderVersionLine(requestedVersionLine), vcfCurrentVersion)) {
+                final String message = String.format("Attempting to write a %s VCF header to a %s VCFWriter",
+                        vcfRequestedVersion,
+                        vcfCurrentVersion.getVersionString());
+                if (VCFUtils.isStrictVCFVersionValidation()) {
+                    throw new TribbleException(message);
+                }
+                logger.warn(message);
+            }
+        }
     }
+
 }
diff --git a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java
index bfa718453e..1a1267e5c8 100644
--- a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java
+++ b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java
@@ -26,12 +26,14 @@
 package htsjdk.variant.vcf;
 
 import htsjdk.samtools.util.BlockCompressedInputStream;
+import htsjdk.samtools.util.Log;
 import htsjdk.samtools.util.IOUtil;
 import htsjdk.tribble.AsciiFeatureCodec;
 import htsjdk.tribble.Feature;
 import htsjdk.tribble.NameAwareCodec;
 import htsjdk.tribble.TribbleException;
 import htsjdk.tribble.index.tabix.TabixFormat;
+import htsjdk.tribble.readers.LineIterator;
 import htsjdk.tribble.util.ParsingUtils;
 import htsjdk.utils.ValidationUtils;
 import htsjdk.variant.utils.GeneralUtils;
@@ -46,6 +48,8 @@
 import java.util.zip.GZIPInputStream;
 
 public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext> implements NameAwareCodec {
+    protected final static Log logger = Log.getInstance(AbstractVCFCodec.class);
+
     public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20);
 
     protected final static int NUM_STANDARD_FIELDS = 8;  // INFO is the 8th
@@ -60,26 +64,22 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
     private VCFTextTransformer vcfTextTransformer = passThruTextTransformer;
 
     // a mapping of the allele
-    protected Map<String, List<Allele>> alleleMap = new HashMap<String, List<Allele>>(3);
-    
-    // for performance testing purposes
-    public static boolean validate = true;
+    protected final Map<String, List<Allele>> alleleMap = new HashMap<>(3);
 
     // a key optimization -- we need a per thread string parts array, so we don't allocate a big array over and over
     // todo: make this thread safe?
     protected String[] parts = null;
     protected String[] genotypeParts = null;
-    protected final String[] locParts = new String[6];
 
     // for performance we cache the hashmap of filter encodings for quick lookup
-    protected HashMap<String,List<String>> filterHash = new HashMap<String,List<String>>();
+    protected final HashMap<String, Set<String>> filterHash = new HashMap<>();
 
     // we store a name to give to each of the variant contexts we emit
     protected String name = "Unknown";
 
     protected int lineNo = 0;
 
-    protected Map<String, String> stringCache = new HashMap<String, String>();
+    protected final Map<String, String> stringCache = new HashMap<>();
 
     protected boolean warnedAboutNoEqualsForNonFlag = false;
 
@@ -117,17 +117,72 @@ class LazyVCFGenotypesParser implements LazyGenotypesContext.LazyParser {
 
         @Override
         public LazyGenotypesContext.LazyData parse(final Object data) {
-            //System.out.printf("Loading genotypes... %s:%d%n", contig, start);
             return createGenotypeMap((String) data, alleles, contig, start);
         }
     }
 
     /**
-     * parse the filter string, first checking to see if we already have parsed it in a previous attempt
-     * @param filterString the string to parse
-     * @return a set of the filters applied
+     * Return true if this codec can decode files with the target version
+     * @param targetVersion the target version to consider
+     * @return true if this codec can handle targetVersion
+     */
+    public abstract boolean canDecodeVersion(final VCFHeaderVersion targetVersion);
+
+    /**
+     * Reads all of the header from the provided iterator, but reads no further.
+     * @param lineIterator the line reader to take header lines from
+     * @return The parsed header
      */
-    protected abstract List<String> parseFilters(String filterString);
+    @Override
+    public Object readActualHeader(final LineIterator lineIterator) {
+        final List<String> headerStrings = new ArrayList<>();
+
+        // Extract one line and retrieve the file format and version, which must be the first line,
+        // and then add it back into the headerLines.
+        final VCFHeaderVersion fileFormatVersion = readFormatVersionLine(lineIterator);
+        headerStrings.add(fileFormatVersion.toHeaderVersionLine());
+
+        // collect metadata lines until we hit the required header line, or a non-metadata line,
+        // in which case throw since there was no header line
+        while (lineIterator.hasNext()) {
+            final String line = lineIterator.peek();
+            if (line.startsWith(VCFHeader.METADATA_INDICATOR)) {
+                lineNo++;
+                headerStrings.add(lineIterator.next());
+            } else if (line.startsWith(VCFHeader.HEADER_INDICATOR)) {
+                lineNo++;
+                headerStrings.add(lineIterator.next());
+                this.header = parseHeaderFromLines(headerStrings, fileFormatVersion);
+                return this.header;
+            }
+        }
+        throw new TribbleException.InvalidHeader(
+                "The required header line (starting with one #) is missing in the input VCF file");
+    }
+
+    /**
+     * Read ahead one line to obtain and return the vcf header version for this file
+     *
+     * @param headerLineIterator
+     * @return VCFHeaderVersion for this file
+     * @throws TribbleException if no file format header line is found in the first line or, the version can't
+     * be handled by this codec
+     */
+    protected VCFHeaderVersion readFormatVersionLine(final LineIterator headerLineIterator) {
+        if (headerLineIterator.hasNext()) {
+            final String headerVersionLine = headerLineIterator.next();
+            if (headerVersionLine.startsWith(VCFHeader.METADATA_INDICATOR)) {
+                final VCFHeaderVersion vcfFileVersion = VCFHeaderVersion.fromHeaderVersionLine(headerVersionLine);
+                if (!canDecodeVersion(vcfFileVersion)) {
+                    throw new TribbleException.InvalidHeader(
+                            String.format("The \"(%s)\" codec does not support VCF version: %s", getName(), vcfFileVersion));
+                } else {
+                    return vcfFileVersion;
+                }
+            }
+        }
+        throw new TribbleException.InvalidHeader("The VCF version header line is missing");
+    }
 
     /**
      * create a VCF header from a set of header record lines
@@ -135,180 +190,306 @@ public LazyGenotypesContext.LazyData parse(final Object data) {
      * @param headerStrings a list of strings that represent all the ## and # entries
      * @return a VCFHeader object
      */
-    protected VCFHeader parseHeaderFromLines( final List<String> headerStrings, final VCFHeaderVersion version ) {
-        this.version = version;
+    protected VCFHeader parseHeaderFromLines( final List<String> headerStrings, final VCFHeaderVersion sourceVersion ) {
+        this.version = sourceVersion;
 
-        Set<VCFHeaderLine> metaData = new LinkedHashSet<VCFHeaderLine>();
-        Set<String> sampleNames = new LinkedHashSet<String>();
+        final Set<VCFHeaderLine> metaData = new LinkedHashSet<>();
+        Set<String> sampleNames = new LinkedHashSet<>();
         int contigCounter = 0;
-        // iterate over all the passed in strings
-        for ( String str : headerStrings ) {
-            if ( !str.startsWith(VCFHeader.METADATA_INDICATOR) ) {
-                String[] strings = str.substring(1).split(VCFConstants.FIELD_SEPARATOR);
-                if ( strings.length < VCFHeader.HEADER_FIELDS.values().length )
-                    throw new TribbleException.InvalidHeader("there are not enough columns present in the header line: " + str);
-
-                int arrayIndex = 0;
-                for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
-                    try {
-                        if (field != VCFHeader.HEADER_FIELDS.valueOf(strings[arrayIndex]))
-                            throw new TribbleException.InvalidHeader("we were expecting column name '" + field + "' but we saw '" + strings[arrayIndex] + "'");
-                    } catch (IllegalArgumentException e) {
-                        throw new TribbleException.InvalidHeader("unknown column name '" + strings[arrayIndex] + "'; it does not match a legal column header name.");
-                    }
-                    arrayIndex++;
-                }
-
-                boolean sawFormatTag = false;
-                if ( arrayIndex < strings.length ) {
-                    if ( !strings[arrayIndex].equals("FORMAT") )
-                        throw new TribbleException.InvalidHeader("we were expecting column name 'FORMAT' but we saw '" + strings[arrayIndex] + "'");
-                    sawFormatTag = true;
-                    arrayIndex++;
-                }
-
-                while ( arrayIndex < strings.length )
-                    sampleNames.add(strings[arrayIndex++]);
-
-                if ( sawFormatTag && sampleNames.isEmpty())
-                    throw new TribbleException.InvalidHeader("The FORMAT field was provided but there is no genotype/sample data");
-
-                // If we're performing sample name remapping and there is exactly one sample specified in the header, replace
-                // it with the remappedSampleName. Throw an error if there are 0 or multiple samples and remapping was requested
-                // for this file.
-                if ( remappedSampleName != null ) {
-                    // We currently only support on-the-fly sample name remapping for single-sample VCFs
-                    if ( sampleNames.isEmpty() || sampleNames.size() > 1 ) {
-                        throw new TribbleException(String.format("Cannot remap sample name to %s because %s samples are specified in the VCF header, and on-the-fly sample name remapping is only supported for single-sample VCFs",
-                                                                 remappedSampleName, sampleNames.isEmpty() ? "no" : "multiple"));
-                    }
-
-                    sampleNames.clear();
-                    sampleNames.add(remappedSampleName);
-                }
 
+        for ( String headerLine : headerStrings ) {
+            if ( !headerLine.startsWith(VCFHeader.METADATA_INDICATOR) ) {
+                sampleNames = parsePrimaryHeaderLine(headerLine);
             } else {
-                if ( str.startsWith(VCFConstants.INFO_HEADER_START) ) {
-                    final VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.substring(7), version);
-                    metaData.add(info);
-                } else if ( str.startsWith(VCFConstants.FILTER_HEADER_START) ) {
-                    final VCFFilterHeaderLine filter = new VCFFilterHeaderLine(str.substring(9), version);
-                    metaData.add(filter);
-                } else if ( str.startsWith(VCFConstants.FORMAT_HEADER_START) ) {
-                    final VCFFormatHeaderLine format = new VCFFormatHeaderLine(str.substring(9), version);
-                    metaData.add(format);
-                } else if ( str.startsWith(VCFConstants.CONTIG_HEADER_START) ) {
-                    final VCFContigHeaderLine contig = new VCFContigHeaderLine(str.substring(9), version, VCFConstants.CONTIG_HEADER_START.substring(2), contigCounter++);
-                    metaData.add(contig);
-                } else if ( str.startsWith(VCFConstants.ALT_HEADER_START) ) {
-                    metaData.add(getAltHeaderLine(str.substring(VCFConstants.ALT_HEADER_OFFSET), version));
-                } else if ( str.startsWith(VCFConstants.PEDIGREE_HEADER_START) && version.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) {
-                    // only model pedigree header lines as structured header lines starting with v4.3
-                    metaData.add(getPedigreeHeaderLine(str.substring(VCFConstants.PEDIGREE_HEADER_OFFSET), version));
-                } else if ( str.startsWith(VCFConstants.META_HEADER_START) ) {
-                    metaData.add(getMetaHeaderLine(str.substring(VCFConstants.META_HEADER_OFFSET), version));
-                } else if ( str.startsWith(VCFConstants.SAMPLE_HEADER_START) ) {
-                    metaData.add(getSampleHeaderLine(str.substring(VCFConstants.SAMPLE_HEADER_OFFSET), version));
+                if ( headerLine.startsWith(VCFConstants.INFO_HEADER_START) ) {
+                    metaData.add(getInfoHeaderLine(headerLine.substring(VCFConstants.INFO_HEADER_OFFSET), sourceVersion));
+                } else if ( headerLine.startsWith(VCFConstants.FILTER_HEADER_START) ) {
+                    metaData.add(getFilterHeaderLine(headerLine.substring(VCFConstants.FILTER_HEADER_OFFSET), sourceVersion));
+                } else if ( headerLine.startsWith(VCFConstants.FORMAT_HEADER_START) ) {
+                    metaData.add(getFormatHeaderLine(headerLine.substring(VCFConstants.FORMAT_HEADER_OFFSET), sourceVersion));
+                } else if ( headerLine.startsWith(VCFConstants.CONTIG_HEADER_START) ) {
+                    metaData.add(getContigHeaderLine(headerLine.substring(VCFConstants.CONTIG_HEADER_OFFSET), sourceVersion, contigCounter++));
+                } else if ( headerLine.startsWith(VCFConstants.ALT_HEADER_START) ) {
+                    metaData.add(getAltHeaderLine(headerLine.substring(VCFConstants.ALT_HEADER_OFFSET), sourceVersion));
+                } else if ( headerLine.startsWith(VCFConstants.PEDIGREE_HEADER_START) ) {
+                    metaData.add(getPedigreeHeaderLine(headerLine.substring(VCFConstants.PEDIGREE_HEADER_OFFSET), sourceVersion));
+                } else if ( headerLine.startsWith(VCFConstants.META_HEADER_START) ) {
+                    metaData.add(getMetaHeaderLine(headerLine.substring(VCFConstants.META_HEADER_OFFSET), sourceVersion));
+                } else if ( headerLine.startsWith(VCFConstants.SAMPLE_HEADER_START) ) {
+                    metaData.add(getSampleHeaderLine(headerLine.substring(VCFConstants.SAMPLE_HEADER_OFFSET), sourceVersion));
                 } else {
-                    int equals = str.indexOf('=');
-                    if ( equals != -1 )
-                        metaData.add(new VCFHeaderLine(str.substring(2, equals), str.substring(equals+1)));
+                    final VCFHeaderLine otherHeaderLine = getOtherHeaderLine(
+                            headerLine.substring(VCFHeader.METADATA_INDICATOR.length()),
+                            sourceVersion);
+                    if (otherHeaderLine != null)
+                        metaData.add(otherHeaderLine);
                 }
             }
         }
-
-        setVCFHeader(new VCFHeader(version, metaData, sampleNames), version);
-        return this.header;
+        // return the header that is returned by setVCFHeader, since it may be different than the
+        // one we create here since setVCFHeader calls
+        // {@link VCFStandardHeaderLines#repairStandardHeaderLines(VCFHeader)}, which can create an
+        // entirely new "repaired" header.
+        final VCFHeader vcfHeader = new VCFHeader(metaData, sampleNames);
+        return setVCFHeader(vcfHeader);
     }
 
     /**
-     * @return the header that was either explicitly set on this codec, or read from the file. May be null.
-     * The returned value should not be modified.
+     * Create and return a VCFInfoHeader object from a header line string that conforms to the {@code sourceVersion}
+     * @param headerLineString VCF header line being parsed without the leading "##"
+     * @param sourceVersion the VCF header version derived from which the source was retrieved. The resulting header
+     *                      line object should be validate for this header version.
+     * @return a VCFInfoHeaderLine object
      */
-    public VCFHeader getHeader() {
-        return header;
+    protected VCFInfoHeaderLine getInfoHeaderLine(final String headerLineString, final VCFHeaderVersion sourceVersion) {
+        return new VCFInfoHeaderLine(headerLineString, sourceVersion);
     }
 
     /**
-     * @return the version number that was either explicitly set on this codec, or read from the file. May be null.
+     * Create and return a VCFFormatHeader object from a header line string that conforms to the {@code sourceVersion}
+     * @param headerLineString VCF header line being parsed without the leading "##"
+     * @param sourceVersion the VCF header version derived from which the source was retrieved. The resulting header
+     *                      line object should be validate for this header version.
+     * @return a VCFFormatHeaderLine object
      */
-    public VCFHeaderVersion getVersion() {
-        return version;
+    protected VCFFormatHeaderLine getFormatHeaderLine(final String headerLineString, final VCFHeaderVersion sourceVersion) {
+        return new VCFFormatHeaderLine(headerLineString, sourceVersion);
     }
 
     /**
-     * Explicitly set the VCFHeader on this codec. This will overwrite the header read from the file
-     * and the version state stored in this instance; conversely, reading the header from a file will
-     * overwrite whatever is set here.
-     *
-     * @param newHeader
-     * @param newVersion
-     * @return the actual header for this codec. The returned header may not be identical to the header
-     * argument since the header lines may be "repaired" (i.e., rewritten) if doOnTheFlyModifications is set.
-     * @throws TribbleException if the requested header version is not compatible with the existing version
+     * Create and return a VCFFilterHeaderLine object from a header line string that conforms to the {@code sourceVersion}
+     * @param headerLineString VCF header line being parsed without the leading "##"
+     * @param sourceVersion the VCF header version derived from which the source was retrieved. The resulting header
+     *                      line object should be validate for this header version.
+     * @return a VCFFilterHeaderLine object
      */
-    public VCFHeader setVCFHeader(final VCFHeader newHeader, final VCFHeaderVersion newVersion) {
-        validateHeaderVersionTransition(newHeader, newVersion);
-        if (this.doOnTheFlyModifications) {
-            final VCFHeader repairedHeader = VCFStandardHeaderLines.repairStandardHeaderLines(newHeader);
-            // validate the new header after repair to ensure the resulting header version is
-            // still compatible with the current version
-            validateHeaderVersionTransition(repairedHeader, newVersion);
-            this.header = repairedHeader;
-        } else {
-            this.header = newHeader;
-        }
-
-        this.version = newVersion;
-        this.vcfTextTransformer = getTextTransformerForVCFVersion(newVersion);
+    protected VCFFilterHeaderLine getFilterHeaderLine(final String headerLineString, final VCFHeaderVersion sourceVersion) {
+        return new VCFFilterHeaderLine(headerLineString, sourceVersion);
+    }
 
-        return this.header;
+    /**
+     * Create and return a VCFContigHeaderLine object from a header line string that conforms to the {@code sourceVersion}
+     * @param headerLineString VCF header line being parsed without the leading "##"
+     * @param sourceVersion the VCF header version derived from which the source was retrieved. The resulting header
+     *                      line object should be valid for this header version.
+     * @return a VCFContigHeaderLine object
+     */
+    protected VCFContigHeaderLine getContigHeaderLine(
+            final String headerLineString,
+            final VCFHeaderVersion sourceVersion,
+            final int contigIndex) {
+        return new VCFContigHeaderLine(headerLineString, sourceVersion, contigIndex);
     }
 
     /**
      * Create and return a VCFAltHeaderLine object from a header line string that conforms to the {@code sourceVersion}
-     * @param headerLineString VCF header line being parsed without the leading "##ALT="
+     * @param headerLineString VCF header line being parsed without the leading "##"
      * @param sourceVersion the VCF header version derived from which the source was retrieved. The resulting header
      *                      line object should be validate for this header version.
      * @return a VCFAltHeaderLine object
      */
-    public VCFAltHeaderLine getAltHeaderLine(final String headerLineString, final VCFHeaderVersion sourceVersion) {
+    protected VCFAltHeaderLine getAltHeaderLine(final String headerLineString, final VCFHeaderVersion sourceVersion) {
         return new VCFAltHeaderLine(headerLineString, sourceVersion);
     }
 
     /**
      * Create and return a VCFPedigreeHeaderLine object from a header line string that conforms to the {@code sourceVersion}
-     * @param headerLineString VCF header line being parsed without the leading "##PEDIGREE="
+     * @param headerLineString VCF header line being parsed without the leading "##"
      * @param sourceVersion the VCF header version derived from which the source was retrieved. The resulting header
      *                      line object should be validate for this header version.
      * @return a VCFPedigreeHeaderLine object
+     *
+     * NOTE:this can't return a VCFPedigreeHeaderLine since for pre-v4.3 PEDIGREE lines must be modeled as
+     * VCFHeaderLine due to the lack of a requirement for an ID field
      */
-    public VCFPedigreeHeaderLine getPedigreeHeaderLine(final String headerLineString, final VCFHeaderVersion sourceVersion) {
-        return new VCFPedigreeHeaderLine(headerLineString, sourceVersion);
+    protected VCFHeaderLine getPedigreeHeaderLine(final String headerLineString, final VCFHeaderVersion sourceVersion) {
+        if (sourceVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) {
+            return new VCFPedigreeHeaderLine(headerLineString, sourceVersion);
+        } else {
+            return new VCFHeaderLine(VCFConstants.PEDIGREE_HEADER_KEY, headerLineString);
+        }
     }
 
     /**
      * Create and return a VCFMetaHeaderLine object from a header line string that conforms to the {@code sourceVersion}
-     * @param headerLineString VCF header line being parsed without the leading "##META="
+     * @param headerLineString VCF header line being parsed without the leading "##"
      * @param sourceVersion the VCF header version derived from which the source was retrieved. The resulting header
      *                      line object should be validate for this header version.
      * @return a VCFMetaHeaderLine object
      */
-    public VCFMetaHeaderLine getMetaHeaderLine(final String headerLineString, final VCFHeaderVersion sourceVersion) {
+    protected VCFMetaHeaderLine getMetaHeaderLine(final String headerLineString, final VCFHeaderVersion sourceVersion) {
         return new VCFMetaHeaderLine(headerLineString, sourceVersion);
     }
 
     /**
      * Create and return a VCFSampleHeaderLine object from a header line string that conforms to the {@code sourceVersion}
-     * @param headerLineString VCF header line being parsed without the leading "##SAMPLE="
+     * @param headerLineString VCF header line being parsed without the leading "##"
      * @param sourceVersion the VCF header version derived from which the source was retrieved. The resulting header
      *                      line object should be validate for this header version.
      * @return a VCFSampleHeaderLine object
      */
-    public VCFSampleHeaderLine getSampleHeaderLine(final String headerLineString, final VCFHeaderVersion sourceVersion) {
+    protected VCFSampleHeaderLine getSampleHeaderLine(final String headerLineString, final VCFHeaderVersion sourceVersion) {
         return new VCFSampleHeaderLine(headerLineString, sourceVersion);
     }
 
+    /**
+     * Create and return a header line that is not modeled by a specific VCFHeaderLine subclass, ie., its not
+     * a info/format/contig/alt/pedigree/meta/sample VCFHeaderLine. This may return either a VCFSimpleHeaderLine
+     * or a VCFHeaderLine.
+     *
+     * @param headerLineString VCF header line being parsed without the leading "##"
+     * @param sourceVersion VCFHeaderVersion being parsed
+     * @return a VCFHeaderLine
+     */
+    protected VCFHeaderLine getOtherHeaderLine(final String headerLineString, final VCFHeaderVersion sourceVersion) {
+        final int indexOfEquals = headerLineString.indexOf('=');
+        if (indexOfEquals < 1) { // must at least have "?="
+            if (VCFUtils.isStrictVCFVersionValidation()) {
+                throw new TribbleException.InvalidHeader("Unrecognized metadata line type: " + headerLineString);
+            }
+            logger.warn("Dropping unrecognized VCFHeader metadata line type: " + headerLineString);
+            return null;
+        }
+        final String headerLineValue = headerLineString.substring(indexOfEquals + 1).trim();
+        if (headerLineValue.startsWith("<") && headerLineValue.endsWith(">")) {
+            if (sourceVersion.isAtLeastAsRecentAs((VCFHeaderVersion.VCF4_3)) || headerLineString.contains("<ID=")) {
+                return new VCFSimpleHeaderLine(
+                        headerLineString.substring(0, indexOfEquals),
+                        headerLineString.substring(indexOfEquals + 1),
+                        sourceVersion);
+            } else {
+                // for pre-v4.3, fall back to using VCFHeaderLine if there is no ID, in order to accommodate
+                // older files that contain lines with structured header line syntax (delimited by "<...>"),
+                // but which do not contain an ID attribute, i.e., GATK Funcotator uses v4.1 ClinVar test
+                // files with lines like that look like this:
+                //
+                //      "ID=<Description=\"ClinVar Variation ID\">"
+                //
+                // where the key is "ID", and no ID attribute is present
+                return new VCFHeaderLine(headerLineString.substring(0, indexOfEquals), headerLineString.substring(indexOfEquals + 1));
+            }
+        } else {
+            return new VCFHeaderLine(headerLineString.substring(0, indexOfEquals), headerLineString.substring(indexOfEquals + 1));
+        }
+    }
+
+    // Parse the primary header line of the form:
+    //
+    // #CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  ...
+    //
+    // The string passed in is the first non-metadata line we've seen, so it should conform.
+    //
+    private Set<String> parsePrimaryHeaderLine(final String headerLine) {
+        final Set<String> sampleNames = new LinkedHashSet<>();
+
+        final String[] columns = headerLine.substring(1).split(VCFConstants.FIELD_SEPARATOR);
+        if ( columns.length < VCFHeader.HEADER_FIELDS.values().length ) {
+            throw new TribbleException.InvalidHeader("not enough columns present in header line: " + headerLine);
+        }
+
+        int col = 0;
+        for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
+            try {
+                if (field != VCFHeader.HEADER_FIELDS.valueOf(columns[col])) {
+                    throw new TribbleException.InvalidHeader("expected column headerLineID '" + field + "' but saw '" + columns[col] + "'");
+                }
+            } catch (IllegalArgumentException e) {
+                throw new TribbleException.InvalidHeader("column headerLineID '" + columns[col] + "' is not a legal column header headerLineID.");
+            }
+            col++;
+        }
+
+        boolean sawFormatTag = false;
+        if ( col < columns.length ) {
+            if ( !columns[col].equals("FORMAT") )
+                throw new TribbleException.InvalidHeader("expected column headerLineID 'FORMAT' but  saw '" + columns[col] + "'");
+            sawFormatTag = true;
+            col++;
+        }
+
+        while ( col < columns.length ) {
+            sampleNames.add(columns[col++]);
+        }
+
+        if ( sawFormatTag && sampleNames.isEmpty())
+            throw new TribbleException.InvalidHeader("The FORMAT field was provided but there is no genotype/sample data");
+
+        // If we're performing sample name remapping and there is exactly one sample specified in the header, replace
+        // it with the remappedSampleName. Throw an error if there are 0 or multiple samples and remapping was requested
+        // for this file.
+        if ( remappedSampleName != null ) {
+            // We currently only support on-the-fly sample name remapping for single-sample VCFs
+            if ( sampleNames.isEmpty() || sampleNames.size() > 1 ) {
+                throw new TribbleException(
+                        String.format("Cannot remap sample headerLineID to %s because %s samples are specified in the VCF header, " +
+                                        "and on-the-fly sample headerLineID remapping is only supported for single-sample VCFs",
+                                remappedSampleName, sampleNames.isEmpty() ? "no" : "multiple"));
+            }
+
+            sampleNames.clear();
+            sampleNames.add(remappedSampleName);
+        }
+
+        return sampleNames;
+    }
+
+    /**
+     * @return the header that was either explicitly set on this codec, or read from the file. May be null.
+     * The returned value should not be modified.
+     */
+    public VCFHeader getHeader() {
+        return header;
+    }
+
+    /**
+     * @return the version number that was either explicitly set on this codec, or read from the file. May be null.
+     */
+    public VCFHeaderVersion getVersion() {
+        return version;
+    }
+
+    @Deprecated // starting after version 2.24.1
+    //Note: this is currently used by Disq
+    public VCFHeader setVCFHeader(final VCFHeader newHeader, final VCFHeaderVersion newVersion) {
+        ValidationUtils.nonNull(newHeader);
+        ValidationUtils.nonNull(newVersion);
+        ValidationUtils.validateArg(
+                newHeader.getVCFHeaderVersion().equals(newVersion),
+                "new version must equal the newHeader's version");
+        return setVCFHeader(newHeader);
+    }
+
+    /**
+     * Set the VCFHeader for this codec. The final header may be a complete replacement for the
+     * provided input header, since header lines may be "repaired" (upgraded to vcf v4.2) if
+     * doOnTheFlyModifications is set. See
+     * {@link VCFStandardHeaderLines#repairStandardHeaderLines(VCFHeader)}.
+     *
+     * @param newHeader the new header to be used by this codec
+     * @return the actual header that is established for this codec. See {@link
+     * VCFStandardHeaderLines#repairStandardHeaderLines(VCFHeader)}.
+     */
+    public VCFHeader setVCFHeader(final VCFHeader newHeader) {
+        ValidationUtils.nonNull(newHeader);
+
+        if (this.doOnTheFlyModifications) {
+            // calling this with a header that has any pre-v4.3 version will always result in a header
+            // with version vcfV4.2, no matter what the header version originally was, since the "repair"
+            // operation is essentially a transform of the header so that it conforms with header line rules
+            // as of 4.2
+            this.header = VCFStandardHeaderLines.repairStandardHeaderLines(newHeader);
+        } else {
+            this.header = newHeader;
+        }
+		this.version = this.header.getVCFHeaderVersion();
+        // Obtain a text transformer (technically, this should be based on the ORIGINAL header version, not
+        // the updated version after repairStandardHeaderLines is called), but it doesn't matter in practice
+        // since the transformer only differs starting with 4.3.
+        this.vcfTextTransformer = getTextTransformerForVCFVersion(this.version);
+
+		return this.header;
+	}
+
     /**
      * the fast decode function
      * @param line the line of text for the record
@@ -328,28 +509,6 @@ public VariantContext decode(String line) {
         return decodeLine(line, true);
     }
 
-    /**
-     * Throw if new a version/header are not compatible with the existing version/header. Generally, any version
-     * before v4.2 can be up-converted to v4.2, but not to v4.3. Once a header is established as v4.3, it cannot
-     * can not be up or down converted, and it must remain at v4.3.
-     * @param newHeader
-     * @param newVersion
-     * @throws TribbleException if the header conversion is not valid
-     */
-    private void validateHeaderVersionTransition(final VCFHeader newHeader, final VCFHeaderVersion newVersion) {
-        ValidationUtils.nonNull(newHeader);
-        ValidationUtils.nonNull(newVersion);
-
-        VCFHeader.validateVersionTransition(version, newVersion);
-
-        // If this codec currently has no header (this happens when the header is being established for
-        // the first time during file parsing), establish an initial header and version, and bypass
-        // validation.
-        if (header != null && newHeader.getVCFHeaderVersion() != null) {
-            VCFHeader.validateVersionTransition(header.getVCFHeaderVersion(), newHeader.getVCFHeaderVersion());
-        }
-    }
-
     /**
      * For v4.3 up, attribute values can contain embedded percent-encoded characters which must be decoded
      * on read. Return a version-aware text transformer that can decode encoded text.
@@ -421,7 +580,7 @@ else if ( parts[2].equals(VCFConstants.EMPTY_ID_FIELD) )
         final String alts = parts[4];
         builder.log10PError(parseQual(parts[5]));
 
-        final List<String> filters = parseFilters(getCachedString(parts[6]));
+        final Set<String> filters = parseFilters(getCachedString(parts[6]));
         if ( filters != null ) {
             builder.filters(new HashSet<>(filters));
         }
@@ -432,7 +591,7 @@ else if ( parts[2].equals(VCFConstants.EMPTY_ID_FIELD) )
             // update stop with the end key if provided
             try {
                 builder.stop(Integer.parseInt(attrs.get(VCFConstants.END_KEY).toString()));
-            } catch (Exception e) {
+            } catch (NumberFormatException e) {
                 generateException("the END value in the INFO field is not valid");
             }
         } else {
@@ -499,20 +658,64 @@ protected String getCachedString(String str) {
         return internedString;
     }
 
+    /**
+     * parse the filter string, first checking to see if we already have parsed it in a previous attempt
+     * @param filterString the string to parse
+     * @return a set of the filters applied
+     */
+    protected Set<String> parseFilters(final String filterString) {
+        // null for unfiltered
+        if ( filterString.equals(VCFConstants.UNFILTERED) )
+            return null;
+
+        if ( filterString.equals(VCFConstants.PASSES_FILTERS_v4) )
+            return Collections.emptySet();
+        if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
+            generateException(VCFConstants.PASSES_FILTERS_v3 + " is an invalid filter headerLineID in vcf4", lineNo);
+        if (filterString.isEmpty())
+            generateException("The VCF specification requires a valid filter status: filter was " + filterString, lineNo);
+
+        // do we have the filter string cached?
+        if ( filterHash.containsKey(filterString) )
+            return filterHash.get(filterString);
+
+        // empty set for passes filters
+        final Set<String> fFields = new HashSet<>();
+        // otherwise we have to parse and cache the value
+        if ( !filterString.contains(VCFConstants.FILTER_CODE_SEPARATOR) )
+            fFields.add(filterString);
+        else {
+            // Variant context uses a Set to store these, so duplicates have historically been
+            // dropped in previous versions. Delegate handling of warning for these to the
+            // specific codec subclass.
+            String[] filters = filterString.split(VCFConstants.FILTER_CODE_SEPARATOR);
+            for (int i = 0; i < filters.length; i++) {
+                if (!fFields.add(filters[i])) {
+                    reportDuplicateFilterIDs(filters[i], lineNo);
+                }
+            }
+        }
+
+        filterHash.put(filterString, Collections.unmodifiableSet(fFields));
+
+        return fFields;
+    }
+
     /**
      * parse out the info fields
      * @param infoField the fields
      * @return a mapping of keys to objects
      */
-    private Map<String, Object> parseInfo(String infoField) {
-        Map<String, Object> attributes = new HashMap<String, Object>();
+    protected Map<String, Object> parseInfo(String infoField) {
+        Map<String, Object> attributes = new HashMap<>();
 
         if ( infoField.isEmpty() )
             generateException("The VCF specification requires a valid (non-zero length) info field");
 
         if ( !infoField.equals(VCFConstants.EMPTY_INFO_FIELD) ) {
-            if ( infoField.indexOf('\t') != -1 || infoField.indexOf(' ') != -1 )
-                generateException("The VCF specification does not allow for whitespace in the INFO field. Offending field value was \"" + infoField + "\"");
+            if ( infoField.indexOf('\t') != -1 ) {
+                generateException("The VCF specification does not allow for tab characters in the INFO field. Offending field value was \"" + infoField + "\"");
+            }
 
             List<String> infoFields = ParsingUtils.split(infoField, VCFConstants.INFO_FIELD_SEPARATOR_CHAR);
             for (int i = 0; i < infoFields.size(); i++) {
@@ -540,8 +743,8 @@ private Map<String, Object> parseInfo(String infoField) {
                     key = infoFields.get(i);
                     final VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key);
                     if ( headerLine != null && headerLine.getType() != VCFHeaderLineType.Flag ) {
-                        if ( GeneralUtils.DEBUG_MODE_ENABLED && ! warnedAboutNoEqualsForNonFlag ) {
-                            System.err.println("Found info key " + key + " without a = value, but the header says the field is of type "
+                        if ( warnedAboutNoEqualsForNonFlag ) {
+                            logger.warn("Found info key " + key + " without a = value, but the header says the field is of type "
                                                + headerLine.getType() + " but this construct is only value for FLAG type fields");
                             warnedAboutNoEqualsForNonFlag = true;
                         }
@@ -555,6 +758,10 @@ private Map<String, Object> parseInfo(String infoField) {
                 // this line ensures that key/value pairs that look like key=; are parsed correctly as MISSING
                 if ( "".equals(value) ) value = VCFConstants.MISSING_VALUE_v4;
 
+                if (attributes.containsKey(key)) {
+                    reportDuplicateInfoKeyValue(key, infoField, lineNo);
+                }
+
                 attributes.put(key, value);
             }
         }
@@ -562,6 +769,23 @@ private Map<String, Object> parseInfo(String infoField) {
         return attributes;
     }
 
+    /**
+     * Handle reporting of duplicate filter IDs
+     *
+     * @param duplicateFilterString the duplicate filter string
+     * @param lineNo line number of the offending line
+     */
+    protected void reportDuplicateFilterIDs(final String duplicateFilterString, final int lineNo) {}
+
+    /**
+     * Handle reporting of duplicate info line field values
+     *
+     * @param duplicateKey the key name of the field that is duplicated
+     * @param infoField the entire info field line
+     * @param lineNo line number of the offending line
+     */
+    protected void reportDuplicateInfoKeyValue(final String duplicateKey, final String infoField, final int lineNo) { }
+
     /**
      * create a an allele from an index and an array of alleles
      * @param index the index
@@ -796,8 +1020,8 @@ public LazyGenotypesContext.LazyData createGenotypeMap(final String str,
                     } else if ( missing ) {
                         // if its truly missing (there no provided value) skip adding it to the attributes
                     } else if (gtKey.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
-                        final List<String> filters = parseFilters(getCachedString(genotypeValues.get(i)));
-                        if ( filters != null ) gb.filters(filters);
+                        final Set<String> filters = parseFilters(getCachedString(genotypeValues.get(i)));
+                        if ( filters != null ) gb.filters(new ArrayList<>(filters));
                     } else if ( genotypeValues.get(i).equals(VCFConstants.MISSING_VALUE_v4) ) {
                         // don't add missing values to the map
                     } else {
@@ -880,11 +1104,11 @@ public void setRemappedSampleName( final String remappedSampleName ) {
     }
 
     protected void generateException(String message) {
-        throw new TribbleException(String.format("The provided VCF file is malformed at approximately line number %d: %s", lineNo, message));
+        throw new TribbleException(String.format("Failure parsing VCF file at (approximately) line number %d: %s", lineNo, message));
     }
 
     protected static void generateException(String message, int lineNo) {
-        throw new TribbleException(String.format("The provided VCF file is malformed at approximately line number %d: %s", lineNo, message));
+        throw new TribbleException(String.format("Failure parsing VCF file at (approximately) line number %d: %s", lineNo, message));
     }
 
     @Override
diff --git a/src/main/java/htsjdk/variant/vcf/VCF3Codec.java b/src/main/java/htsjdk/variant/vcf/VCF3Codec.java
index e9ca3abdf7..3c19a7f051 100644
--- a/src/main/java/htsjdk/variant/vcf/VCF3Codec.java
+++ b/src/main/java/htsjdk/variant/vcf/VCF3Codec.java
@@ -25,12 +25,9 @@
 
 package htsjdk.variant.vcf;
 
-import htsjdk.tribble.TribbleException;
-import htsjdk.tribble.readers.LineIterator;
-
-import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.List;
+import java.util.HashSet;
+import java.util.Set;
 
 
 /**
@@ -53,45 +50,19 @@ public class VCF3Codec extends AbstractVCFCodec {
     public final static String VCF3_MAGIC_HEADER = "##fileformat=VCFv3";
 
     /**
-     * @param reader the line reader to take header lines from
-     * @return the number of header lines
+     * Return true if this codec can handle the target version
+     * @param targetHeaderVersion
+     * @return true if this codec can handle this version
      */
     @Override
-    public Object readActualHeader(final LineIterator reader) {
-        final List<String> headerStrings = new ArrayList<String>();
-
-        VCFHeaderVersion version = null;
-        boolean foundHeaderVersion = false;
-        while (reader.hasNext()) {
-            lineNo++;
-            final String line = reader.peek();
-            if (line.startsWith(VCFHeader.METADATA_INDICATOR)) {
-                final String[] lineFields = line.substring(2).split("=");
-                if (lineFields.length == 2 && VCFHeaderVersion.isFormatString(lineFields[0]) ) {
-                    if ( !VCFHeaderVersion.isVersionString(lineFields[1]) )
-                        throw new TribbleException.InvalidHeader(lineFields[1] + " is not a supported version");
-                    foundHeaderVersion = true;
-                    version = VCFHeaderVersion.toHeaderVersion(lineFields[1]);
-                    if ( version != VCFHeaderVersion.VCF3_3 && version != VCFHeaderVersion.VCF3_2 )
-                        throw new TribbleException.InvalidHeader("This codec is strictly for VCFv3 and does not support " + lineFields[1]);
-                }
-                headerStrings.add(reader.next());
-            }
-            else if (line.startsWith(VCFHeader.HEADER_INDICATOR)) {
-                if (!foundHeaderVersion) {
-                    throw new TribbleException.InvalidHeader("We never saw a header line specifying VCF version");
-                }
-                headerStrings.add(reader.next());
-                return super.parseHeaderFromLines(headerStrings, version);
-            }
-            else {
-                throw new TribbleException.InvalidHeader("We never saw the required CHROM header line (starting with one #) for the input VCF file");
-            }
-
-        }
-        throw new TribbleException.InvalidHeader("We never saw the required CHROM header line (starting with one #) for the input VCF file");
+    public boolean canDecodeVersion(final VCFHeaderVersion targetHeaderVersion) {
+        return targetHeaderVersion == VCFHeaderVersion.VCF3_3 || targetHeaderVersion == VCFHeaderVersion.VCF3_2;
     }
 
+    @Override
+    public boolean canDecode(final String potentialInputFile) {
+        return canDecodeFile(potentialInputFile, VCF3_MAGIC_HEADER);
+    }
 
     /**
      * parse the filter string, first checking to see if we already have parsed it in a previous attempt
@@ -99,24 +70,24 @@ else if (line.startsWith(VCFHeader.HEADER_INDICATOR)) {
      * @return a set of the filters applied
      */
     @Override
-    protected List<String> parseFilters(String filterString) {
+    protected Set<String> parseFilters(String filterString) {
 
         // null for unfiltered
         if ( filterString.equals(VCFConstants.UNFILTERED) )
             return null;
 
         // empty set for passes filters
-        List<String> fFields = new ArrayList<String>();
+        HashSet<String> fFields = new HashSet<>();
 
         if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
-            return new ArrayList<String>(fFields);
+            return new HashSet<>(fFields);
 
         if (filterString.isEmpty())
             generateException("The VCF specification requires a valid filter status");
 
         // do we have the filter string cached?
         if ( filterHash.containsKey(filterString) )
-            return new ArrayList<String>(filterHash.get(filterString));
+            return new HashSet<>(filterHash.get(filterString));
 
         // otherwise we have to parse and cache the value
         if ( filterString.indexOf(VCFConstants.FILTER_CODE_SEPARATOR) == -1 )
@@ -130,7 +101,13 @@ protected List<String> parseFilters(String filterString) {
     }
 
     @Override
-    public boolean canDecode(final String potentialInput) {
-        return canDecodeFile(potentialInput, VCF3_MAGIC_HEADER);
+    protected void reportDuplicateFilterIDs(final String duplicateFilterString, final int lineNo) {
+        // no-op since this codec historically doesn't report duplicates
     }
+
+    @Override
+    protected void reportDuplicateInfoKeyValue(final String duplicateKey, final String infoField, final int lineNo) {
+        // no-op since this codec historically doesn't report duplicates
+    }
+
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFAltHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFAltHeaderLine.java
index 71c4850f07..37ac9874e9 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFAltHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFAltHeaderLine.java
@@ -1,5 +1,7 @@
 package htsjdk.variant.vcf;
 
+import htsjdk.samtools.util.Log;
+
 import java.util.*;
 
 /**
@@ -7,16 +9,46 @@
  */
 public class VCFAltHeaderLine extends VCFSimpleHeaderLine {
     private static final long serialVersionUID = 1L;
+    protected final static Log logger = Log.getInstance(VCFHeader.class);
 
     private static List<String> expectedTags = Collections.unmodifiableList(
             new ArrayList<String>(2) {{
-                add(ID_ATTRIBUTE);
-                add(DESCRIPTION_ATTRIBUTE);
-            }}
+            add(ID_ATTRIBUTE);
+            add(DESCRIPTION_ATTRIBUTE);
+        }}
     );
 
     public VCFAltHeaderLine(final String line, final VCFHeaderVersion version) {
-        super(VCFConstants.ALT_HEADER_KEY, new VCF4Parser().parseLine(line, expectedTags));
+        // Honor the requested version to choose the parser, and let validateForVersion figure out
+        // whether that version is valid for this line (for example, if this is called with a pre-4.0 version)
+        super(VCFConstants.ALT_HEADER_KEY, VCFHeaderLineTranslator.parseLine(version, line, expectedTags));
+        validateForVersion(version);
     }
 
+    public VCFAltHeaderLine(final String id, final String description) {
+        super(VCFConstants.ALT_HEADER_KEY,
+            new LinkedHashMap<String, String>() {{
+                put(ID_ATTRIBUTE, id);
+                put(DESCRIPTION_ATTRIBUTE, description);
+            }}
+        );
+    }
+
+    @Override
+    public Optional<VCFValidationFailure<VCFHeaderLine>> getValidationFailure(final VCFHeaderVersion vcfTargetVersion) {
+        //TODO: Should we validate/constrain these to match the 4.3 spec constraints ?
+        if (!vcfTargetVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_0)) {
+            final VCFValidationFailure<VCFHeaderLine> validationFailure = new VCFValidationFailure<>(
+                    vcfTargetVersion,
+                    this,
+                    String.format("%s header lines are not allowed in VCF version %s headers", getKey(), vcfTargetVersion));
+            if (VCFUtils.isStrictVCFVersionValidation()) {
+                return Optional.of(validationFailure);
+            } else {
+                logger.warn(validationFailure.getFailureMessage());
+            }
+        }
+
+        return super.getValidationFailure(vcfTargetVersion);
+    }
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFCodec.java b/src/main/java/htsjdk/variant/vcf/VCFCodec.java
index 42f07150d1..3ebf47c02a 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFCodec.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFCodec.java
@@ -1,6 +1,6 @@
 /*
 * Copyright (c) 2012 The Broad Institute
-* 
+*
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
@@ -25,17 +25,10 @@
 
 package htsjdk.variant.vcf;
 
-import htsjdk.tribble.TribbleException;
-import htsjdk.tribble.readers.LineIterator;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.LinkedList;
-import java.util.List;
+import java.util.*;
 
 /**
- * A feature codec for the VCF 4 specification
+ * A feature codec for the VCF 4.0, 4.1, 4.2, and 4.3 specification versions
  *
  * <p>
  * VCF is a text file format (most likely stored in a compressed manner). It contains meta-information lines, a
@@ -45,7 +38,7 @@
  * of related samples. Recently the format for storing next-generation read alignments has been
  * standardised by the SAM/BAM file format specification. This has significantly improved the
  * interoperability of next-generation tools for alignment, visualisation, and variant calling.
- * We propose the Variant Call Format (VCF) as a standarised format for storing the most prevalent
+ * We propose the Variant Call Format (VCF) as a standardised format for storing the most prevalent
  * types of sequence variation, including SNPs, indels and larger structural variants, together
  * with rich annotations. VCF is usually stored in a compressed manner and can be indexed for
  * fast data retrieval of variants from a range of positions on the reference genome.
@@ -72,91 +65,55 @@
  * @since 2010
  */
 public class VCFCodec extends AbstractVCFCodec {
-    // Our aim is to read in the records and convert to VariantContext as quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters.
+    // Our aim is to read in the records and convert to VariantContext as quickly as possible, relying
+    // on VariantContext to do the validation of any contradictory (or malformed) record parameters.
     public final static String VCF4_MAGIC_HEADER = "##fileformat=VCFv4";
 
     /**
-     * Reads all of the header from the provided iterator, but no reads no further.
-     * @param lineIterator the line reader to take header lines from
-     * @return The parsed header
+     * Return true if this codec can handle the target version
+     * @param targetHeaderVersion
+     * @return true if this codec can handle this version
      */
     @Override
-    public Object readActualHeader(final LineIterator lineIterator) {
-        final List<String> headerStrings = new ArrayList<String>();
-
-        String line;
-        boolean foundHeaderVersion = false;
-        while (lineIterator.hasNext()) {
-            line = lineIterator.peek();
-            lineNo++;
-            if (line.startsWith(VCFHeader.METADATA_INDICATOR)) {
-                final String[] lineFields = line.substring(2).split("=");
-                if (lineFields.length == 2 && VCFHeaderVersion.isFormatString(lineFields[0]) ) {
-                    if ( !VCFHeaderVersion.isVersionString(lineFields[1]) )
-                        throw new TribbleException.InvalidHeader(lineFields[1] + " is not a supported version");
-                    foundHeaderVersion = true;
-                    version = VCFHeaderVersion.toHeaderVersion(lineFields[1]);
-                    if ( ! version.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_0) )
-                        throw new TribbleException.InvalidHeader("This codec is strictly for VCFv4; please use the VCF3 codec for " + lineFields[1]);
-                    if ( version != VCFHeaderVersion.VCF4_0 && version != VCFHeaderVersion.VCF4_1 && version != VCFHeaderVersion.VCF4_2 && version != VCFHeaderVersion.VCF4_3)
-                        throw new TribbleException.InvalidHeader("This codec is strictly for VCFv4 and does not support " + lineFields[1]);
-                }
-                headerStrings.add(lineIterator.next());
-            }
-            else if (line.startsWith(VCFHeader.HEADER_INDICATOR)) {
-                if (!foundHeaderVersion) {
-                    throw new TribbleException.InvalidHeader("We never saw a header line specifying VCF version");
-                }
-                headerStrings.add(lineIterator.next());
-                super.parseHeaderFromLines(headerStrings, version);
-                return this.header;
-            }
-            else {
-                throw new TribbleException.InvalidHeader("We never saw the required CHROM header line (starting with one #) for the input VCF file");
-            }
-
-        }
-        throw new TribbleException.InvalidHeader("We never saw the required CHROM header line (starting with one #) for the input VCF file");
+    public boolean canDecodeVersion(final VCFHeaderVersion targetHeaderVersion) {
+        return targetHeaderVersion == VCFHeaderVersion.VCF4_0 ||
+                targetHeaderVersion == VCFHeaderVersion.VCF4_1 ||
+                targetHeaderVersion == VCFHeaderVersion.VCF4_2 ||
+                targetHeaderVersion == VCFHeaderVersion.VCF4_3;
     }
 
-    /**
-     * parse the filter string, first checking to see if we already have parsed it in a previous attempt
-     *
-     * @param filterString the string to parse
-     * @return a set of the filters applied or null if filters were not applied to the record (e.g. as per the missing value in a VCF)
-     */
     @Override
-    protected List<String> parseFilters(final String filterString) {
-        // null for unfiltered
-        if ( filterString.equals(VCFConstants.UNFILTERED) )
-            return null;
-
-        if ( filterString.equals(VCFConstants.PASSES_FILTERS_v4) )
-            return Collections.emptyList();
-        if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
-            generateException(VCFConstants.PASSES_FILTERS_v3 + " is an invalid filter name in vcf4", lineNo);
-        if (filterString.isEmpty())
-            generateException("The VCF specification requires a valid filter status: filter was " + filterString, lineNo);
-
-        // do we have the filter string cached?
-        if ( filterHash.containsKey(filterString) )
-            return filterHash.get(filterString);
-
-        // empty set for passes filters
-        final List<String> fFields = new LinkedList<String>();
-        // otherwise we have to parse and cache the value
-        if ( !filterString.contains(VCFConstants.FILTER_CODE_SEPARATOR) )
-            fFields.add(filterString);
-        else
-            fFields.addAll(Arrays.asList(filterString.split(VCFConstants.FILTER_CODE_SEPARATOR)));
-
-        filterHash.put(filterString, Collections.unmodifiableList(fFields));
+    public boolean canDecode(final String potentialInput) {
+        return canDecodeFile(potentialInput, VCF4_MAGIC_HEADER);
+    }
 
-        return fFields;
+    @Override
+    protected void reportDuplicateFilterIDs(final String duplicateFilterString, final int lineNo) {
+        // older versions of htsjdk have been silently dropping these for a while, but we can at least warn
+        logger.warn(String.format("Duplicate filter %s found on line %d", duplicateFilterString, lineNo));
     }
 
     @Override
-    public boolean canDecode(final String potentialInput) {
-        return canDecodeFile(potentialInput, VCF4_MAGIC_HEADER);
+    protected void reportDuplicateInfoKeyValue(final String duplicateKey, final String infoField, final int lineNo) {
+        logger.warn(String.format("Duplicate key %s found in %s on line %d", duplicateKey, infoField, lineNo));
     }
+
+    /**
+     * parse out the info fields
+     * @param infoField the fields
+     * @return a mapping of keys to objects
+     */
+    protected Map<String, Object> parseInfo(String infoField) {
+        if (infoField.indexOf(' ') != -1) {
+            generateException(
+                    String.format("Whitespace is not allowed in the INFO field in VCF version %s: %s",
+                            version == null ?
+                                    "unknown" :
+                                    version.getVersionString(),
+                            infoField)
+            );
+        }
+        return super.parseInfo(infoField);
+    }
+
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java
index f955a434e1..60eb4fc90f 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java
@@ -25,60 +25,185 @@
 
 package htsjdk.variant.vcf;
 
+import htsjdk.samtools.util.Log;
 import htsjdk.tribble.TribbleException;
-import htsjdk.variant.utils.GeneralUtils;
+import htsjdk.utils.ValidationUtils;
 import htsjdk.variant.variantcontext.GenotypeLikelihoods;
 import htsjdk.variant.variantcontext.VariantContext;
 
+import java.util.Optional;
+import java.util.function.BiFunction;
+import java.util.regex.Pattern;
+
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 
 /**
- * a base class for compound header lines, which include info lines and format lines (so far)
+ * Abstract base class for compound header lines, which include INFO lines and FORMAT lines.
+ *
+ * Compound header lines are distinguished only in that are required to have TYPE and NUMBER attributes
+ * (VCFHeaderLineCount, a VCFHeaderLineType, and a count).
  */
-public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCFIDHeaderLine {
+public abstract class VCFCompoundHeaderLine extends VCFSimpleHeaderLine {
+    private static final long serialVersionUID = 1L;
+    protected static final Log logger = Log.getInstance(VCFCompoundHeaderLine.class);
+
+    // regex pattern corresponding to legal info/format field keys
+    protected static final Pattern VALID_HEADER_ID_PATTERN = Pattern.compile("^[A-Za-z_][0-9A-Za-z_.]*$");
+    protected static final String UNBOUND_DESCRIPTION = "Not provided in original VCF header";
+
+    protected static final String NUMBER_ATTRIBUTE = "Number";
+    protected static final String TYPE_ATTRIBUTE = "Type";
+
+    // List of expected tags that have a predefined order (used by the parser to verify order only). The
+    // header line class itself should verify that all required tags are present.
+    protected static final List<String> expectedTagOrder = Collections.unmodifiableList(
+            new ArrayList<String>(4) {{
+            add(ID_ATTRIBUTE);
+            add(NUMBER_ATTRIBUTE);
+            add(TYPE_ATTRIBUTE);
+            add(DESCRIPTION_ATTRIBUTE);
+        }}
+    );
+
+    // immutable, cached binary representations of compound header line attributes
+    private final VCFHeaderLineType type;
+    private final VCFHeaderLineCount countType;
+    private final int count;
 
-    public enum SupportedHeaderLineType {
-        INFO(true), FORMAT(false);
+    /**
+     * create a VCF compound header line with count type = VCFHeaderLineCount.INTEGER
+     *
+     * @param key          the key (header line type) for this header line
+     * @param headerLineID the is or this header line
+     * @param count        the count for this header line, sets countType type as VCFHeaderLineCount.INTEGER
+     * @param type         the type for this header line
+     * @param description  the description for this header line
+     */
+    protected VCFCompoundHeaderLine(
+            final String key,
+            final String headerLineID,
+            final int count,
+            final VCFHeaderLineType type,
+            final String description)
+    {
+        this(key, createAttributeMap(headerLineID, VCFHeaderLineCount.INTEGER, count, type, description), VCFHeader.DEFAULT_VCF_VERSION);
+    }
 
-        public final boolean allowFlagValues;
-        SupportedHeaderLineType(boolean flagValues) {
-            allowFlagValues = flagValues;
-        }
+    /**
+     * create a VCF compound header line
+     *
+     * @param key          the key (header line type) for this header line
+     * @param headerLineID the id for this header line
+     * @param countType    the count type for this header line
+     * @param type         the type for this header line
+     * @param description  the description for this header line
+     */
+    protected VCFCompoundHeaderLine(
+            final String key,
+            final String headerLineID,
+            final VCFHeaderLineCount countType,
+            final VCFHeaderLineType type,
+            final String description) {
+        this(key, createAttributeMap(headerLineID, countType, VCFHeaderLineCount.VARIABLE_COUNT, type, description), VCFHeader.DEFAULT_VCF_VERSION);
     }
 
-    // the field types
-    private String name;
-    private int count = -1;
-    private VCFHeaderLineCount countType;
-    private String description;
-    private VCFHeaderLineType type;
-    private String source;
-    private String version;
+    /**
+     * create a VCF compound header line from an attribute map
+     *
+     * @param key       the key (header line type) for this header line
+     * @param mapping   the header line attribute map
+     * @param vcfVersion   the VCF header version. This may be null, in which case
+     */
+    protected VCFCompoundHeaderLine(final String key, final Map<String, String> mapping, final VCFHeaderVersion vcfVersion) {
+        super(key, mapping);
+        ValidationUtils.nonNull(vcfVersion);
+
+        this.type = decodeLineType(getGenericFieldValue(TYPE_ATTRIBUTE));
+        final String countString = getGenericFieldValue(NUMBER_ATTRIBUTE);
+        this.countType = decodeCountType(countString, vcfVersion);
+        this.count = decodeCount(countString, this.countType);
+        validateForVersion(vcfVersion);
+    }
+
+    /**
+     * Return the description for this header line.
+     * @return the header line's description
+     */
+    public String getDescription() {
+        final String description = getGenericFieldValue(DESCRIPTION_ATTRIBUTE);
+        return description == null ?
+                UNBOUND_DESCRIPTION :
+                description;
+    }
 
-    // access methods
-    @Override
-    public String getID() { return name; }
-    public String getDescription() { return description; }
     public VCFHeaderLineType getType() { return type; }
+
     public VCFHeaderLineCount getCountType() { return countType; }
-    public boolean isFixedCount() { return countType == VCFHeaderLineCount.INTEGER; }
+
+    /**
+     * @return true if this header line has a fixed integer count type ({@link #getCountType()}
+     * equals {@link VCFHeaderLineCount#INTEGER})
+     */
+    public boolean isFixedCount() { return countType.isFixedCount(); }
+
+    /**
+     * @return the integer count for this header line if the header has a fixed integer
+     * count type ({@link #isFixedCount()} is true). A TribbleException is thrown if the
+     * header line does not have a fixed integer count type ({@link #getCountType()} equals
+     * {@link VCFHeaderLineCount#INTEGER}).
+     *
+     * @throws TribbleException if the {@link VCFHeaderLineCount} is not a fixed integer
+     */
     public int getCount() {
-        if (!isFixedCount())
-            throw new TribbleException("Asking for header line count when type is not an integer");
+        if (!isFixedCount()) {
+            throw new TribbleException("Header line count request when count type is not an integer");
+        }
         return count;
     }
 
     public String getSource() {
-        return source;
+        return getGenericFieldValue(SOURCE_ATTRIBUTE);
     }
 
     public String getVersion() {
-        return version;
+        return getGenericFieldValue(VERSION_ATTRIBUTE);
+    }
+
+    @Override
+    public Optional<VCFValidationFailure<VCFHeaderLine>> getValidationFailure(final VCFHeaderVersion vcfTargetVersion) {
+        // The VCF 4.3 spec does not phrase this restriction as one on the form of the ID value of
+        // INFO/FORMAT lines but instead on the INFO/FORMAT fixed field key values (c.f. section 1.6.1).
+        // However, the key values correspond to INFO/FORMAT header lines defining the attribute and its type,
+        // so we do the validation here
+        if (vcfTargetVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) {
+            if (!validHeaderID(getID())) {
+                final VCFValidationFailure<VCFHeaderLine> validationFailure = new VCFValidationFailure<>(
+                        vcfTargetVersion,
+                        this,
+                        String.format("ID tag \"%s\" does not conform to tag restrictions", getID()));
+                if (VCFUtils.isStrictVCFVersionValidation()) {
+                    return Optional.of(validationFailure);
+                } else {
+                    // warn for older versions - this line can't be used as a v4.3 line
+                    logger.warn(validationFailure.getFailureMessage());
+                }
+            }
+        }
+
+        return super.getValidationFailure(vcfTargetVersion);
+    }
+
+    /**
+     * @param id the candidate ID
+     * @return true if ID conforms to header line id requirements, otherwise false
+     */
+    //TODO: the existing VCFHeaderLine.validateKeyOrID method should be refactored so it can be used instead of this
+    protected boolean validHeaderID(final String id) {
+        return VALID_HEADER_ID_PATTERN.matcher(id).matches();
     }
 
     /**
@@ -113,278 +238,209 @@ public int getCount(final VariantContext vc) {
         }
     }
 
-    public void setNumberToUnbounded() {
-        countType = VCFHeaderLineCount.UNBOUNDED;
-        count = -1;
-    }
-
-    // our type of line, i.e. format, info, etc
-    private final SupportedHeaderLineType lineType;
-
     /**
-     * create a VCF format header line
+     * Specify annotation source
+     * <p>
+     * This value is optional starting with VCFv4.2.
      *
-     * @param name         the name for this header line
-     * @param count        the count for this header line
-     * @param type         the type for this header line
-     * @param description  the description for this header line
-     * @param lineType     the header line type
+     * @param source  annotation source (case-insensitive, e.g. "dbsnp")
      */
-    protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
-        this(name, count, type, description, lineType, null, null);
+    @Deprecated // after 2.24.1
+    public void setSource(final String source) {
+        updateGenericField(SOURCE_ATTRIBUTE, source);
     }
 
     /**
-     * create a VCF format header line
+     * Specify annotation version
+     * <p>
+     * This value is optional starting with VCFv4.2.
      *
-     * @param name         the name for this header line
-     * @param count        the count type for this header line
-     * @param type         the type for this header line
-     * @param description  the description for this header line
-     * @param lineType     the header line type
+     * @param version exact version (e.g. "138")
      */
-    protected VCFCompoundHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
-        this(name, count, type, description, lineType, null, null);
+    @Deprecated // after version 2.24.1
+    public void setVersion(final String version) {
+        updateGenericField(VERSION_ATTRIBUTE, version);
     }
 
-    /**
-     * create a VCF format header line
-     *
-     * @param name         the name for this header line
-     * @param count        the count for this header line
-     * @param type         the type for this header line
-     * @param description  the description for this header line
-     * @param lineType     the header line type
-     * @param source       annotation source (case-insensitive, e.g. "dbsnp")
-     * @param version      exact version (e.g. "138")
-     */
-    protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType, String source, String version) {
-        super(lineType.toString(), "");
-        this.name = name;
-        this.countType = VCFHeaderLineCount.INTEGER;
-        this.count = count;
-        this.type = type;
-        this.description = description;
-        this.lineType = lineType;
-        this.source = source;
-        this.version = version;
-        validate();
-    }
+    @Override
+    public boolean equals(final Object o) {
+        if (this == o) return true;
+        if (!(o instanceof VCFCompoundHeaderLine)) return false;
+        if (!super.equals(o)) return false;
 
-    /**
-     * create a VCF format header line
-     *
-     * @param name         the name for this header line
-     * @param count        the count type for this header line
-     * @param type         the type for this header line
-     * @param description  the description for this header line
-     * @param lineType     the header line type
-     * @param source       annotation source (case-insensitive, e.g. "dbsnp")
-     * @param version      exact version (e.g. "138")
-     */
-    protected VCFCompoundHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType, String source, String version) {
-        super(lineType.toString(), "");
-        this.name = name;
-        this.countType = count;
-        this.type = type;
-        this.description = description;
-        this.lineType = lineType;
-        this.source = source;
-        this.version = version;
-        validate();
+        final VCFCompoundHeaderLine that = (VCFCompoundHeaderLine) o;
+
+        if (count != that.count) return false;
+        if (type != that.type) return false;
+        return countType == that.countType;
     }
 
-    /**
-     * create a VCF format header line
-     *
-     * @param line   the header line
-     * @param version      the VCF header version
-     * @param lineType     the header line type
-     *
-     */
-    protected VCFCompoundHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) {
-        super(lineType.toString(), "");
+    @Override
+    public int hashCode() {
+        int result = super.hashCode();
+        result = 31 * result + type.hashCode();
+        result = 31 * result + countType.hashCode();
+        result = 31 * result + count;
+        return result;
+    }
 
-        final ArrayList<String> expectedTags = new ArrayList(Arrays.asList("ID", "Number", "Type", "Description"));
-        final List<String> recommendedTags;
-        if (version.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_2)) {
-            recommendedTags = Arrays.asList("Source", "Version");
-        } else {
-            recommendedTags = Collections.emptyList();
-        }
-        final Map<String, String> mapping = VCFHeaderLineTranslator.parseLine(version, line, expectedTags, recommendedTags);
-        name = mapping.get("ID");
-        count = -1;
-        final String numberStr = mapping.get("Number");
-        if (numberStr.equals(VCFConstants.PER_ALTERNATE_COUNT)) {
-            countType = VCFHeaderLineCount.A;
-        } else if (numberStr.equals(VCFConstants.PER_ALLELE_COUNT)) {
-            countType = VCFHeaderLineCount.R;
-        } else if (numberStr.equals(VCFConstants.PER_GENOTYPE_COUNT)) {
-            countType = VCFHeaderLineCount.G;
-        } else if ((version.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_0) && numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v4)) ||
-                   (!version.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_0) && numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v3))) {
-            countType = VCFHeaderLineCount.UNBOUNDED;
+    private VCFHeaderLineType decodeLineType(final String lineTypeString) {
+        if (lineTypeString == null) {
+            throw new TribbleException(String.format("A line type attribute is required for %s header lines", getKey()));
         } else {
-            countType = VCFHeaderLineCount.INTEGER;
-            count = Integer.parseInt(numberStr);
-
-        }
-
-        if (count < 0 && countType == VCFHeaderLineCount.INTEGER)
-            throw new TribbleException.InvalidHeader("Count < 0 for fixed size VCF header field " + name);
-
-        try {
-            type = VCFHeaderLineType.valueOf(mapping.get("Type"));
-        } catch (Exception e) {
-            throw new TribbleException(mapping.get("Type") + " is not a valid type in the VCF specification (note that types are case-sensitive)");
+            try {
+                return VCFHeaderLineType.valueOf(lineTypeString);
+            } catch (IllegalArgumentException e) {
+                throw new TribbleException(String.format(
+                        "\"%s\" is not a valid type for %s header lines (note that types are case-sensitive)",
+                        lineTypeString,
+                        getKey()));
+            }
         }
-        if (type == VCFHeaderLineType.Flag && !allowFlagValues())
-            throw new IllegalArgumentException("Flag is an unsupported type for this kind of field at line - " + line);
-
-        description = mapping.get("Description");
-        if (description == null && ALLOW_UNBOUND_DESCRIPTIONS) // handle the case where there's no description provided
-            description = UNBOUND_DESCRIPTION;
-
-        this.lineType = lineType;
+    }
 
-        if (version.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_2)) {
-            this.source = mapping.get("Source");
-            this.version = mapping.get("Version");
+    private VCFHeaderLineCount decodeCountType(final String countString, final VCFHeaderVersion vcfVersion) {
+        if (countString == null) {
+            throw new TribbleException.InvalidHeader(
+                    String.format("A count type/value must be provided for %s header lines.", getID()));
         }
-
-        validate();
+        return VCFHeaderLineCount.decode(vcfVersion, countString);
     }
 
-    private void validate() {
-        if (type != VCFHeaderLineType.Flag && countType == VCFHeaderLineCount.INTEGER && count <= 0)
-            throw new IllegalArgumentException(String.format("Invalid count number, with fixed count the number should be 1 or higher: key=%s name=%s type=%s desc=%s lineType=%s count=%s",
-                getKey(), name, type, description, lineType, count));
-        if (name == null || type == null || description == null || lineType == null)
-            throw new IllegalArgumentException(String.format("Invalid VCFCompoundHeaderLine: key=%s name=%s type=%s desc=%s lineType=%s",
-                getKey(), name, type, description, lineType));
-        if (name.contains("<") || name.contains(">"))
-            throw new IllegalArgumentException("VCFHeaderLine: ID cannot contain angle brackets");
-        if (name.contains("="))
-            throw new IllegalArgumentException("VCFHeaderLine: ID cannot contain an equals sign");
-
-        if (type == VCFHeaderLineType.Flag && count != 0) {
-            count = 0;
-            if (GeneralUtils.DEBUG_MODE_ENABLED) {
-                System.err.println("FLAG fields must have a count value of 0, but saw " + count + " for header line " + getID() + ". Changing it to 0 inside the code");
+    private int decodeCount(final String countString, final VCFHeaderLineCount requestedCountType) {
+        int lineCount = VCFHeaderLineCount.VARIABLE_COUNT;
+        if (requestedCountType.isFixedCount()) {
+            if (countString == null) {
+                throw new TribbleException.InvalidHeader(String.format("Missing count value in VCF header field %s", getID()));
+            }
+            try {
+                lineCount = Integer.parseInt(countString);
+            } catch (NumberFormatException e) {
+                throw new TribbleException.InvalidHeader(String.format("Invalid count value %s in VCF header field %s", lineCount, getID()));
+            }
+            if (getType() == VCFHeaderLineType.Flag) {
+                if (lineCount != 0) {
+                    // This check is here on behalf of INFO lines (which are the only header line type allowed to have Flag
+                    // type). A Flag type with a count value other than 0 violates the spec (at least v4.2 and v4.3), but
+                    // to retain backward compatibility with previous implementations, we accept (and repair) and the line here.
+                    updateGenericField(NUMBER_ATTRIBUTE, "0");
+                    lineCount = 0;
+                    logger.warn(String.format("FLAG fields must have a count value of 0, but saw count %d for header line %s. A value of 0 will be used",
+                            lineCount,
+                            getID()));
+                }
+            } else if (lineCount <= 0) {
+                throw new TribbleException.InvalidHeader(
+                        String.format("Invalid count number %d for fixed count in header line with ID %s. For fixed count, the count number must be 1 or higher.",
+                                lineCount,
+                                getID()));
             }
         }
+        return lineCount;
     }
 
-    /**
-     * make a string representation of this header line
-     * @return a string representation
-     */
-    @Override
-    protected String toStringEncoding() {
-        Map<String, Object> map = new LinkedHashMap<String, Object>();
-        map.put("ID", name);
-        Object number;
-        switch (countType) {
-            case A:
-                number = VCFConstants.PER_ALTERNATE_COUNT;
-                break;
-            case R:
-                number = VCFConstants.PER_ALLELE_COUNT;
-                break;
-            case G:
-                number = VCFConstants.PER_GENOTYPE_COUNT;
-                break;
-            case UNBOUNDED:
-                number = VCFConstants.UNBOUNDED_ENCODING_v4;
-                break;
-            case INTEGER:
-            default:
-                number = count;
-        }
-        map.put("Number", number);
-        map.put("Type", type);
-        map.put("Description", description);
-        if (source != null) {
-            map.put("Source", source);
-        }
-        if (version != null) {
-            map.put("Version", version);
-        }
-        return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
+    // Create a backing attribute map out of VCFCompoundHeaderLine elements
+    private static LinkedHashMap<String, String> createAttributeMap(
+            final String headerLineID,
+            final VCFHeaderLineCount countType,
+            final int count,
+            final VCFHeaderLineType type,
+            final String description) {
+        return new LinkedHashMap<String, String>() {
+            { put(ID_ATTRIBUTE, headerLineID); }
+            { put(NUMBER_ATTRIBUTE, countType.encode(count)); }
+            { put(TYPE_ATTRIBUTE, type.encode()); }
+            {
+                // Handle the case where there's no description provided, ALLOW_UNBOUND_DESCRIPTIONS is the default
+                // note: if no description was provided, don't cache it, which means we don't round trip it
+                if (description != null) {
+                    put(DESCRIPTION_ATTRIBUTE, description);
+                }
+            }
+        };
     }
 
     /**
-     * returns true if we're equal to another compound header line
-     * @param o a compound header line
-     * @return true if equal
+     * Compare two VCFCompoundHeaderLine (FORMAT or INFO) lines to determine if they have compatible number types,
+     * and return a VCFCompoundHeaderLine that can be used to represent the result of merging these lines. In the
+     * case where the merged line requires "promoting" one of the types to the other, a new line of the appropriate
+     * type is created by calling the {@code compoundHeaderLineResolver} to produce new line of the correct
+     * subclass (INFO or FORMAT).
+     *
+     * @param line1 first line to merge
+     * @param line2 second line to merge
+     * @param conflictWarner conflict warning manager
+     * @param compoundHeaderLineResolver function that accepts two compound header lines of the same type (info or
+     *                                   format, and returns a new header line representing the combination of the
+     *                                   two input header lines
+     * @param <T> type of VCFCompoundHeaderLine to merge (subclass of VCFCompoundHeaderLine)
+     * @return the merged line if one can be created
      */
-    @Override
-    public boolean equals(final Object o) {
-        if ( this == o ) {
-            return true;
+    static <T extends VCFCompoundHeaderLine> T getMergedCompoundHeaderLine(
+            final T line1,
+            final T line2,
+            final VCFHeaderMerger.HeaderMergeConflictWarnings conflictWarner,
+            BiFunction<T, T, T> compoundHeaderLineResolver)
+    {
+        ValidationUtils.nonNull(line1);
+        ValidationUtils.nonNull(line2);
+        ValidationUtils.validateArg(line1.getKey().equals(line2.getKey()) && line1.getID().equals(line2.getID()),
+                "header lines must have the same type to merge");
+        T mergedLine = line1;
+
+        if (!line1.equalsExcludingExtraAttributes(line2)) {
+            if (getCompoundLineDifferenceScore(line1, line2) > 1) {
+                // merge lines if they have zero or one mergeable differences, but if there are multiple
+                // differences, call the headers incompatible and bail, since we need to choose one line
+                // or the other as the merge line (we can't do generic field-level resolution)
+                throw new TribbleException(
+                        String.format("Incompatible header merge, can't merge lines with multiple attribute differences %s/%s.",
+                                line1, line2));
+            }
+            if (line1.getType().equals(line2.getType())) {
+                // The lines have a common type.
+                // The Number entry is an Integer that describes the number of values that can be
+                // included with the INFO field. For example, if the INFO field contains a single
+                // number, then this value should be 1. However, if the INFO field describes a pair
+                // of numbers, then this value should be 2 and so on. If the number of possible
+                // values varies, is unknown, or is unbounded, then this value should be '.'.
+                conflictWarner.warn("Promoting header field Number to . due to number differences in header lines: " + line1 + " " + line2);
+                mergedLine = compoundHeaderLineResolver.apply(line1, line2);
+            } else if (line1.getType() == VCFHeaderLineType.Integer && line2.getType() == VCFHeaderLineType.Float) {
+                // promote key to Float
+                conflictWarner.warn("Promoting Integer to Float in header: " + line2);
+                mergedLine = line2;
+            } else if (line1.getType() == VCFHeaderLineType.Float && line2.getType() == VCFHeaderLineType.Integer) {
+                // promote key to Float
+                conflictWarner.warn("Promoting Integer to Float in header: " + line2);
+            } else {
+                throw new IllegalStateException("Attempt to merge incompatible headers, can't merge these lines: " + line1 + " " + line2);
+            }
         }
-        if ( o == null || getClass() != o.getClass() || ! super.equals(o) ) {
-            return false;
+        if (!line1.getDescription().equals(line2.getDescription())) {
+            conflictWarner.warn("Allowing unequal description fields through: keeping " + line2 + " excluding " + line1);
         }
 
-        final VCFCompoundHeaderLine that = (VCFCompoundHeaderLine) o;
-        return equalsExcludingDescription(that) &&
-               description.equals(that.description);
-    }
-
-    @Override
-    public int hashCode() {
-        int result = super.hashCode();
-        result = 31 * result + name.hashCode();
-        result = 31 * result + count;
-        result = 31 * result + (countType != null ? countType.hashCode() : 0);  // only nullable field according to validate()
-        result = 31 * result + description.hashCode();
-        result = 31 * result + type.hashCode();
-        result = 31 * result + lineType.hashCode();
-        result = 31 * result + (source != null ? source.hashCode() : 0);
-        result = 31 * result + (version != null ? version.hashCode() : 0);
-        return result;
+        return mergedLine;
     }
 
-    public boolean equalsExcludingDescription(VCFCompoundHeaderLine other) {
+    boolean equalsExcludingExtraAttributes(final VCFCompoundHeaderLine other) {
         return count == other.count &&
                 countType == other.countType &&
                 type == other.type &&
-                lineType == other.lineType &&
-                name.equals(other.name);
-    }
-
-    public boolean sameLineTypeAndName(VCFCompoundHeaderLine other) {
-        return lineType == other.lineType &&
-                name.equals(other.name);
+                getKey().equals(other.getKey()) &&
+                getID().equals(other.getID());
     }
 
-    /**
-     * do we allow flag (boolean) values? (i.e. booleans where you don't have specify the value, AQ means AQ=true)
-     * @return true if we do, false otherwise
-     */
-    abstract boolean allowFlagValues();
-
-    /**
-     * Specify annotation source
-     * <p>
-     * This value is optional starting with VCFv4.2. 
-     * 
-     * @param source  annotation source (case-insensitive, e.g. "dbsnp")
-     */
-    public void setSource(final String source) {
-        this.source = source;
-    }
-
-    /**
-     * Specify annotation version
-     * <p>
-     * This value is optional starting with VCFv4.2. 
-     *
-     * @param version exact version (e.g. "138")
-     */
-    public void setVersion(final String version) {
-        this.version = version;
+    private static <T extends VCFCompoundHeaderLine> int getCompoundLineDifferenceScore(final T line1, final T line2) {
+        final int dataTypeDiffers = line1.getType().equals(line2.getType()) ? 0 : 1; // data type
+        final int countTypeDiffers = line1.getCountType().equals(line2.getCountType()) ? 0 : 1; // count type
+        // getCount is only valid if the getCountType==Integer
+        final int countDiffers =
+                (countTypeDiffers == 0 &&
+                        line1.getCountType().equals(VCFHeaderLineCount.INTEGER) &&
+                        line2.getCountType().equals(VCFHeaderLineCount.INTEGER) &&
+                        line1.getCount() != line2.getCount()) ? 1 : 0;
+        return dataTypeDiffers + countTypeDiffers + countDiffers;
     }
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFConstants.java b/src/main/java/htsjdk/variant/vcf/VCFConstants.java
index 64fdf2bc8e..11f12cf07c 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFConstants.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFConstants.java
@@ -45,7 +45,7 @@ public final class VCFConstants {
     public static final String GENOTYPE_KEY = "GT";
     public static final String GENOTYPE_POSTERIORS_KEY = "GP";
     public static final String GENOTYPE_QUALITY_KEY = "GQ";
-    public static final String GENOTYPE_ALLELE_DEPTHS = "AD"; //AD isn't reserved, but is specifically handled by VariantContext
+    public static final String GENOTYPE_ALLELE_DEPTHS = "AD"; //AD is now reserved
     public static final String GENOTYPE_PL_KEY = "PL";   // phred-scaled genotype likelihoods
     public static final String EXPECTED_ALLELE_COUNT_KEY = "EC";
     @Deprecated public static final String GENOTYPE_LIKELIHOODS_KEY = "GL";         // log10 scaled genotype likelihoods
@@ -86,14 +86,20 @@ public final class VCFConstants {
     public static final String PHASING_TOKENS = "/|\\";
 
     // header lines
-    public static final String FILTER_HEADER_START = "##FILTER";
-    public static final String FORMAT_HEADER_START = "##FORMAT";
-    public static final String INFO_HEADER_START = "##INFO";
-    public static final String ALT_HEADER_KEY = "ALT";
-    public static final String ALT_HEADER_START = VCFHeader.METADATA_INDICATOR + ALT_HEADER_KEY ;
-    public static final String CONTIG_HEADER_KEY = "contig";
-    public static final String CONTIG_HEADER_START = "##" + CONTIG_HEADER_KEY;
+    public static final String FILTER_HEADER_KEY = "FILTER";
+    public static final String FILTER_HEADER_START = VCFHeader.METADATA_INDICATOR + FILTER_HEADER_KEY;
+    public static final int FILTER_HEADER_OFFSET = FILTER_HEADER_START.length() + 1;
+
+    public static final String FORMAT_HEADER_KEY = "FORMAT";
+    public static final String FORMAT_HEADER_START = VCFHeader.METADATA_INDICATOR + FORMAT_HEADER_KEY;
+    public static final int FORMAT_HEADER_OFFSET = FORMAT_HEADER_START.length() + 1;
+
+    public static final String INFO_HEADER_KEY = "INFO";
+    public static final String INFO_HEADER_START = VCFHeader.METADATA_INDICATOR + INFO_HEADER_KEY;
+    public static final int INFO_HEADER_OFFSET = INFO_HEADER_START.length() + 1;
 
+    public static final String ALT_HEADER_KEY = "ALT";
+    public static final String ALT_HEADER_START = VCFHeader.METADATA_INDICATOR + ALT_HEADER_KEY;
     public static final int ALT_HEADER_OFFSET = ALT_HEADER_START.length() + 1;
 
     public static final String PEDIGREE_HEADER_KEY = "PEDIGREE";
@@ -108,6 +114,10 @@ public final class VCFConstants {
     public static final String META_HEADER_START = VCFHeader.METADATA_INDICATOR + META_HEADER_KEY;
     public static final int META_HEADER_OFFSET = META_HEADER_START.length() + 1;
 
+    public static final String CONTIG_HEADER_KEY = "contig";
+    public static final String CONTIG_HEADER_START = VCFHeader.METADATA_INDICATOR + CONTIG_HEADER_KEY;
+    public static final int CONTIG_HEADER_OFFSET = CONTIG_HEADER_START.length() + 1;
+
     // old indel alleles
     public static final char DELETION_ALLELE_v3 = 'D';
     public static final char INSERTION_ALLELE_v3 = 'I';
diff --git a/src/main/java/htsjdk/variant/vcf/VCFContigHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFContigHeaderLine.java
index 9ec50681b4..d8a19e2fa5 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFContigHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFContigHeaderLine.java
@@ -26,11 +26,14 @@
 package htsjdk.variant.vcf;
 
 import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.util.Log;
 import htsjdk.tribble.TribbleException;
 
 import java.util.Collections;
 import java.util.LinkedHashMap;
 import java.util.Map;
+import java.util.Optional;
+import java.util.regex.Pattern;
 
 /**
  * A special class representing a contig VCF header line.  Knows the true contig order and sorts on that
@@ -40,42 +43,111 @@
  * @author mdepristo
  */
 public class VCFContigHeaderLine extends VCFSimpleHeaderLine {
+    private static final long serialVersionUID = 1L;
+    protected final static Log logger = Log.getInstance(VCFContigHeaderLine.class);
+
+    final static Pattern VALID_CONTIG_ID_PATTERN = Pattern.compile("[0-9A-Za-z!#$%&+./:;?@^_|~-][0-9A-Za-z!#$%&*+./:;=?@^_|~-]*");
     final Integer contigIndex;
 
+    public static final String LENGTH_ATTRIBUTE = "length";
+    public static final String ASSEMBLY_ATTRIBUTE = "assembly";
+    public static final String MD5_ATTRIBUTE = "md5";
+    public static final String URL_ATTRIBUTE = "URL";
+    public static final String SPECIES_ATTRIBUTE = "species";
+
     /**
      * create a VCF contig header line
      *
+     * NOTE: This is retained for backward compatibility, but is deprecated and should not be used.
+     *
      * @param line      the header line
      * @param version   the vcf header version
      * @param key            the key for this header line
+     * @param contigIndex the contig index for this contig
      */
+    @Deprecated // starting after version 2.24.1
     public VCFContigHeaderLine(final String line, final VCFHeaderVersion version, final String key, final int contigIndex) {
-        super(line, version, key, null, Collections.emptyList());
-	    if (contigIndex < 0) throw new TribbleException("The contig index is less than zero.");
-        this.contigIndex = contigIndex;
+        // deprecated because this constructor has a parameter to specify the key (??), but for
+        // contig lines the key has to be "contig"
+        this(line, version, contigIndex);
+        if (!VCFHeader.CONTIG_KEY.equals(key)) {
+            logger.warn(String.format(
+                    "Found key \"%s\". The key for contig header lines must be %s.",
+                    key,
+                    VCFHeader.CONTIG_KEY));
+        }
+    }
+
+    /**
+     * create a VCF contig header line
+     *
+     * @param line      the header line
+     * @param version   the vcf header version
+     * @param contigIndex the contig index for this contig
+     */
+    public VCFContigHeaderLine(final String line, final VCFHeaderVersion version, final int contigIndex) {
+        this(VCFHeaderLineTranslator.parseLine(
+                version, line, Collections.singletonList(VCFSimpleHeaderLine.ID_ATTRIBUTE)), contigIndex);
+        if (!VCFHeader.CONTIG_KEY.equals(getKey())) {
+            logger.warn(String.format(
+                    "Found key \"%s\". The key for contig header lines must be %s.",
+                    getKey(),
+                    VCFHeader.CONTIG_KEY));
+        }
+        if (contigIndex < 0) {
+            throw new TribbleException(String.format("The contig index (%d) is less than zero.", contigIndex));
+        }
+        validateForVersion(version);
     }
 
     public VCFContigHeaderLine(final Map<String, String> mapping, final int contigIndex) {
         super(VCFHeader.CONTIG_KEY, mapping);
-	    if (contigIndex < 0) throw new TribbleException("The contig index is less than zero.");
+	    if (contigIndex < 0) {
+            throw new TribbleException(String.format("The contig index (%d) is less than zero.", contigIndex));
+        }
         this.contigIndex = contigIndex;
     }
 
-	VCFContigHeaderLine(final SAMSequenceRecord sequenceRecord, final String assembly) {
-        // Using LinkedHashMap to preserve order of keys in contig line (ID, length, assembly)
-        super(VCFHeader.CONTIG_KEY, new LinkedHashMap<String, String>() {{
-			// Now inside an init block in an anon HashMap subclass
-			this.put("ID", sequenceRecord.getSequenceName());
-			this.put("length", Integer.toString(sequenceRecord.getSequenceLength()));
-			if ( assembly != null ) this.put("assembly", assembly);
-		}});
-		this.contigIndex = sequenceRecord.getSequenceIndex();
+    /**
+     * Return a VCFContigHeaderLine representing a SAMSequenceRecord.
+     *
+     * NOTE: round-tripping between VCFContigHeaderLines and SAMSequenceRecords can be lossy since they
+     * don't necessarily have equivalent attributes, i.e., SAMSequenceRecord can have a species attribute
+     * that isn't defined by the VCF spec.
+     *
+     * @return VCFContigHeaderLine for the SAMSequenceRecord
+     */
+    public VCFContigHeaderLine(final SAMSequenceRecord sequenceRecord, final String assembly) {
+        // preserve order of keys in contig line (ID, length, assembly)
+        this(new LinkedHashMap<String, String>() {{
+                this.put(ID_ATTRIBUTE, sequenceRecord.getSequenceName());
+                if (sequenceRecord.getSequenceLength() != 0) {
+                    this.put(LENGTH_ATTRIBUTE, Integer.toString(sequenceRecord.getSequenceLength()));
+                }
+                if (assembly != null) {
+                    if (!assembly.equals(sequenceRecord.getAssembly())) {
+                        logger.warn(String.format(
+                                        "Inconsistent \"assembly\" attribute values found while creating VCFContigLine " +
+                                        "(with assembly \"%s\") from SAMSequenceRecord (with assembly \"%s\")",
+                                        assembly,
+                                        sequenceRecord.getAssembly()));
+                    }
+                    this.put(ASSEMBLY_ATTRIBUTE, assembly);
+                }
+                if (sequenceRecord.getMd5() != null) {
+                     this.put(MD5_ATTRIBUTE, sequenceRecord.getMd5());
+                }
+                if (sequenceRecord.getAttribute(SAMSequenceRecord.URI_TAG) != null) {
+                    this.put(URL_ATTRIBUTE, sequenceRecord.getAttribute(SAMSequenceRecord.URI_TAG));
+                }
+                if (sequenceRecord.getAttribute(SAMSequenceRecord.SPECIES_TAG) != null) {
+                    this.put(SPECIES_ATTRIBUTE, sequenceRecord.getAttribute(SAMSequenceRecord.SPECIES_TAG));
+                }
+            }},
+            sequenceRecord.getSequenceIndex()
+        );
 	}
 
-    public Integer getContigIndex() {
-        return contigIndex;
-    }
-
     /**
      * Get the SAMSequenceRecord that corresponds to this VCF header line.
      * If the VCF header line does not have a length tag, the SAMSequenceRecord returned will be set to have a length of
@@ -85,20 +157,56 @@ public Integer getContigIndex() {
      * contig header line does not have a length.
      */
 	public SAMSequenceRecord getSAMSequenceRecord() {
-		final String lengthString = this.getGenericFieldValue("length");
-		final int length;
-		if (lengthString == null) {
-		    length = SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH;
+        final String lengthString = this.getGenericFieldValue(LENGTH_ATTRIBUTE);
+        final int length;
+        if (lengthString == null) {
+            length = SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH;
         } else {
-		    length = Integer.parseInt(lengthString);
+            length = Integer.parseInt(lengthString);
+        }
+        final SAMSequenceRecord record = new SAMSequenceRecord(this.getID(), length);
+        final String assemblyString = this.getGenericFieldValue(ASSEMBLY_ATTRIBUTE);
+        if (assemblyString != null) {
+            record.setAssembly(assemblyString);
         }
-		final SAMSequenceRecord record = new SAMSequenceRecord(this.getID(), length);
-        record.setAssembly(this.getGenericFieldValue("assembly"));
-		record.setSequenceIndex(this.contigIndex);
-		return record;
+        record.setSequenceIndex(this.contigIndex);
+        final String md5 = getGenericFieldValue(MD5_ATTRIBUTE);
+        if (md5 != null) {
+            record.setMd5(md5);
+        }
+        final String url = getGenericFieldValue(URL_ATTRIBUTE);
+        if (url != null) {
+            record.setAttribute(SAMSequenceRecord.URI_TAG, url);
+        }
+        final String species = getGenericFieldValue(SPECIES_ATTRIBUTE);
+        if (species != null) {
+            record.setSpecies(species);
+        }
+        return record;
 	}
 
     @Override
+    public Optional<VCFValidationFailure<VCFHeaderLine>> getValidationFailure(final VCFHeaderVersion vcfTargetVersion) {
+        if (vcfTargetVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) {
+             if (!VALID_CONTIG_ID_PATTERN.matcher(getID()).matches()) {
+                return Optional.of(new VCFValidationFailure<>(
+                        vcfTargetVersion,
+                        this,
+                        String.format("Contig headerLine ID \"%s\" doesn't conform to contig ID restrictions", getID())));
+            }
+        }
+
+        return super.getValidationFailure(vcfTargetVersion);
+    }
+
+    public Integer getContigIndex() {
+        return contigIndex;
+    }
+
+    /**
+     * Note: this class has a natural ordering that is inconsistent with equals()
+     */
+    @Override
     public boolean equals(final Object o) {
         if ( this == o ) {
             return true;
@@ -120,6 +228,11 @@ public int hashCode() {
 
     /**
      * IT IS CRITICAL THAT THIS BE OVERRIDDEN SO WE SORT THE CONTIGS IN THE CORRECT ORDER
+     *
+     * NOTE: this class has a natural ordering that is inconsistent with equals(). This results
+     * in inconsistent behavior when these lines are used in the sets that are created/accepted
+     * by VCFHeader (ie., getMetaDataInSortedOrder will filter out VCFContigHeaderLines that are
+     * returned by getMetaDataInInputOrder or getContigheaderLines).
      */
     @Override
     public int compareTo(final Object other) {
diff --git a/src/main/java/htsjdk/variant/vcf/VCFFilterHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFFilterHeaderLine.java
index 6ca8f3f532..1b890db1b1 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFFilterHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFFilterHeaderLine.java
@@ -25,26 +25,40 @@
 
 package htsjdk.variant.vcf;
 
-import java.util.Arrays;
-import java.util.Collections;
+import htsjdk.tribble.TribbleException;
+
+import java.util.*;
 
 /**
  * @author ebanks
  * 
- * A class representing a key=value entry for FILTER fields in the VCF header
+ * A class representing FILTER fields in the VCF header
  */
-public class VCFFilterHeaderLine extends VCFSimpleHeaderLine  {
-    
+public class VCFFilterHeaderLine extends VCFSimpleHeaderLine {
+
     private static final long serialVersionUID = 1L;
 
+    private static List<String> requiredTagOrder = Collections.unmodifiableList(
+            new ArrayList<String>(2) {{
+            add(ID_ATTRIBUTE);
+            add(VCFSimpleHeaderLine.DESCRIPTION_ATTRIBUTE);
+        }}
+    );
+
     /**
      * create a VCF filter header line
      *
-     * @param name         the name for this header line
+     * @param id         the headerLineID for this header line
      * @param description  the description for this header line
      */
-    public VCFFilterHeaderLine(final String name, final String description) {
-        super("FILTER", name, description);
+    public VCFFilterHeaderLine(final String id, final String description) {
+        super(VCFConstants.FILTER_HEADER_KEY,
+            new LinkedHashMap<String, String>(2) {{
+                put(ID_ATTRIBUTE, id);
+                put(DESCRIPTION_ATTRIBUTE, description);
+            }}
+        );
+        validate();
     }
 
     /**
@@ -52,29 +66,37 @@ public VCFFilterHeaderLine(final String name, final String description) {
      * @param name
      */
     public VCFFilterHeaderLine(final String name) {
-        super("FILTER", name, name);
+        this(name, name);
     }
 
     /**
-     * create a VCF info header line
+     * create a VCF filter header line
      *
      * @param line      the header line
      * @param version   the vcf header version
      */
     public VCFFilterHeaderLine(final String line, final VCFHeaderVersion version) {
-        super(line, version, "FILTER", Arrays.asList("ID", "Description"), Collections.emptyList());
+        super(VCFConstants.FILTER_HEADER_KEY, VCFHeaderLineTranslator.parseLine(version, line, requiredTagOrder));
+        validate();
+        validateForVersion(version);
+    }
+
+    private void validate() {
+        if (getDescription() == null) {
+            throw new TribbleException.InvalidHeader("Missing Description attribute in filter header line");
+        }
     }
 
     @Override
     public boolean shouldBeAddedToDictionary() {
         return true;
     }
-    
+
     /**
      * get the "Description" field
      * @return the "Description" field
      */
     public String getDescription() {
-        return getGenericFieldValue("Description");
+        return getGenericFieldValue(DESCRIPTION_ATTRIBUTE);
     }
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFFormatHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFFormatHeaderLine.java
index 74f4d5e5e3..fc75ee5291 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFFormatHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFFormatHeaderLine.java
@@ -26,34 +26,75 @@
 package htsjdk.variant.vcf;
 
 
+import htsjdk.samtools.util.Log;
+import htsjdk.tribble.TribbleException;
+import htsjdk.utils.ValidationUtils;
+
 /**
  * @author ebanks
  *         <p>
  *         Class VCFFormatHeaderLine
  *         </p>
  *         <p>
- *         A class representing a key=value entry for genotype FORMAT fields in the VCF header</p>
+ *         A class representing genotype FORMAT fields in the VCF header</p>
  */
 public class VCFFormatHeaderLine extends VCFCompoundHeaderLine {
+    private static final long serialVersionUID = 1L;
+    protected final static Log logger = Log.getInstance(VCFFormatHeaderLine.class);
 
     public VCFFormatHeaderLine(String name, int count, VCFHeaderLineType type, String description) {
-        super(name, count, type, description, SupportedHeaderLineType.FORMAT);
-        if (type == VCFHeaderLineType.Flag)
-            throw new IllegalArgumentException("Flag is an unsupported type for format fields");
+        super(VCFConstants.FORMAT_HEADER_KEY, name, count, type, description);
+        validate();
     }
 
     public VCFFormatHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
-        super(name, count, type, description, SupportedHeaderLineType.FORMAT);
+        super(VCFConstants.FORMAT_HEADER_KEY, name, count, type, description);
+        validate();
     }
 
     public VCFFormatHeaderLine(String line, VCFHeaderVersion version) {
-        super(line, version, SupportedHeaderLineType.FORMAT);
+        super(VCFConstants.FORMAT_HEADER_KEY,
+              VCFHeaderLineTranslator.parseLine(version, line, expectedTagOrder),
+              version);
+        validate();
+        validateForVersion(version);
     }
 
-    // format fields do not allow flag values (that wouldn't make much sense, how would you encode this in the genotype).
-    @Override
-    boolean allowFlagValues() {
-        return false;
+    /**
+     * Compare two VCFFormatHeaderLine objects to determine if they have compatible number types, and return a
+     * VCFFormatHeaderLine that represents the result of merging these two lines.
+     *
+     * @param formatLine1 first format line to merge
+     * @param formatLine2 second format line to merge
+     * @param conflictWarner conflict warning emitter
+     * @return a merged VCFFormatHeaderLine
+     */
+    public static VCFFormatHeaderLine getMergedFormatHeaderLine(
+            final VCFFormatHeaderLine formatLine1,
+            final VCFFormatHeaderLine formatLine2,
+            final VCFHeaderMerger.HeaderMergeConflictWarnings conflictWarner)
+    {
+        ValidationUtils. nonNull(formatLine1);
+        ValidationUtils. nonNull(formatLine2);
+        ValidationUtils. nonNull(conflictWarner);
+
+        // delegate to the generic VCFCompoundHeaderLine merger, passing a resolver lambda
+        return VCFCompoundHeaderLine.getMergedCompoundHeaderLine(
+                formatLine1,
+                formatLine2,
+                conflictWarner,
+                (l1, l2) -> new VCFFormatHeaderLine(
+                        l1.getID(),
+                        VCFHeaderLineCount.UNBOUNDED,
+                        l1.getType(),
+                        l1.getDescription())
+        );
+    }
+
+    private void validate() {
+        if (this.getType() == VCFHeaderLineType.Flag) {
+            throw new TribbleException(String.format("Flag is an unsupported type for format fields: ", this.toStringEncoding()));
+        }
     }
 
     @Override
diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeader.java b/src/main/java/htsjdk/variant/vcf/VCFHeader.java
index c39bef5684..637c04c4fc 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFHeader.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFHeader.java
@@ -27,60 +27,52 @@
 
 import htsjdk.beta.plugin.HtsHeader;
 import htsjdk.samtools.SAMSequenceDictionary;
-import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.util.Log;
 import htsjdk.tribble.TribbleException;
 import htsjdk.tribble.util.ParsingUtils;
 import htsjdk.utils.ValidationUtils;
-import htsjdk.variant.utils.GeneralUtils;
 import htsjdk.variant.variantcontext.VariantContextComparator;
 
 import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeSet;
-
+import java.util.*;
+import java.util.stream.Collectors;
 
 /**
- * A class to represent a VCF header
+ * A class to represent a VCF header.
+ *
+ * A VCFHeader has a "current" VCFHeaderVersion that is established when the header is constructed. If
+ * metadata lines are provided to the constructor, a ##fileformat line must be included, and all lines
+ * in that are provided must be valid for the specified version. If no metadata lines are initially
+ * provided, the default version {@link VCFHeader#DEFAULT_VCF_VERSION} will be used.
+ *
+ * Each line in the list is always guaranteed to be valid for the current version, and any line added must
+ * conform to the current version (as defined by the VCF specification). If a new line is added that fails to
+ * validate against the current version, or a new line that changes the current version, and an existing line
+ * in the list fails to validate against the new version, an exception will be thrown.
  *
- * @author aaron
- * NOTE: This class stores header lines in lots of places. The original author noted that this should
- * be cleaned up at some point in the future (jgentry - 5/2013)
+ * Once a header version is established, it can be changed by adding a new file format/version line (see
+ * {@link VCFHeader#makeHeaderVersionLine)} (the new version line will replace any existing line), but only
+ * if the new version is newer than the previous version. Attempts to move the version to an older version
+ * will result in an exception.
  */
 public class VCFHeader implements HtsHeader, Serializable {
     public static final long serialVersionUID = 1L;
+    protected static final Log logger = Log.getInstance(VCFHeader.class);
+    public static final VCFHeaderVersion DEFAULT_VCF_VERSION = VCFHeaderVersion.VCF4_2;
 
     // the mandatory header fields
     public enum HEADER_FIELDS {
         CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO
     }
 
-    /**
-     * The VCF version for this header; once a header version is established, it can only be
-     * changed subject to version transition rules defined by
-     * {@link #validateVersionTransition(VCFHeaderVersion, VCFHeaderVersion)}
-     */
+    // the VCF version for this header
     private VCFHeaderVersion vcfHeaderVersion;
 
-    // the associated meta data
-    private final Set<VCFHeaderLine> mMetaData = new LinkedHashSet<VCFHeaderLine>();
-    private final Map<String, VCFInfoHeaderLine> mInfoMetaData = new LinkedHashMap<String, VCFInfoHeaderLine>();
-    private final Map<String, VCFFormatHeaderLine> mFormatMetaData = new LinkedHashMap<String, VCFFormatHeaderLine>();
-    private final Map<String, VCFFilterHeaderLine> mFilterMetaData = new LinkedHashMap<String, VCFFilterHeaderLine>();
-    private final Map<String, VCFHeaderLine> mOtherMetaData = new LinkedHashMap<String, VCFHeaderLine>();
-    private final Map<String, VCFContigHeaderLine> contigMetaData = new LinkedHashMap<>();
+    // header meta data
+    private final VCFMetaDataLines mMetaData = new VCFMetaDataLines();
 
-    // the list of auxillary tags
-    private final List<String> mGenotypeSampleNames = new ArrayList<String>();
+    // the list of auxiliary tags
+    private final List<String> mGenotypeSampleNames = new ArrayList<>();
 
     // the character string that indicates meta data
     public static final String METADATA_INDICATOR = "##";
@@ -108,59 +100,74 @@ public enum HEADER_FIELDS {
     private boolean writeCommandLine = true;
 
     /**
-     * Create an empty VCF header with no header lines and no samples
+     * Create an empty VCF header with no header lines and no samples. Defaults to
+     * VCF version {@link VCFHeader#DEFAULT_VCF_VERSION}.
      */
     public VCFHeader() {
-        this(Collections.<VCFHeaderLine>emptySet(), Collections.<String>emptySet());
+        this(makeHeaderVersionLineSet(DEFAULT_VCF_VERSION), Collections.emptySet());
     }
 
     /**
-     * create a VCF header, given a list of meta data and auxiliary tags
+     * Create a VCF header, given a list of meta data and auxiliary tags. The provided metadata
+     * header line list MUST contain a version (fileformat) line in order to establish the version
+     * for the header, and each metadata line must be valid for that version.
      *
-     * @param metaData     the meta data associated with this header
+     * @param metaData the meta data associated with this header
+     * @throws TribbleException if the provided header line metadata does not include a header line that
+     * establishes the VCF version for the lines, or if any line does not conform to the established
+     * version
      */
     public VCFHeader(final Set<VCFHeaderLine> metaData) {
-        mMetaData.addAll(metaData);
-        removeVCFVersionLines(mMetaData);
-        createLookupEntriesForAllHeaderLines();
-        checkForDeprecatedGenotypeLikelihoodsKey();
+        this(metaData, Collections.emptySet());
     }
 
     /**
-     * Creates a deep copy of the given VCFHeader, duplicating all its metadata and
+     * Creates a copy of the given VCFHeader, duplicating all it's metadata and
      * sample names.
      */
     public VCFHeader(final VCFHeader toCopy) {
-        this(toCopy.mMetaData, toCopy.mGenotypeSampleNames);
+        this(toCopy.getMetaDataInInputOrder(), toCopy.mGenotypeSampleNames);
     }
 
     /**
-     * create a VCF header, given a list of meta data and auxiliary tags
+     * Create a VCF header, given a set of meta data and auxiliary tags. The provided metadata
+     * list MUST contain a version (fileformat) line in order to establish the version
+     * for this header, and each metadata line must be valid for that version.
      *
-     * @param metaData            the meta data associated with this header
+     * @param metaData            set of meta data associated with this header
      * @param genotypeSampleNames the sample names
+     * @throws TribbleException if the provided header line metadata does not include a header line that
+     * establishes the VCF version for the lines, or if any line does not conform to the established
+     * version
      */
     public VCFHeader(final Set<VCFHeaderLine> metaData, final Set<String> genotypeSampleNames) {
-        this(metaData, new ArrayList<String>(genotypeSampleNames));
+        this(metaData, new ArrayList<>(genotypeSampleNames));
     }
 
     /**
-     * create a VCF header, given a target version, a list of meta data and auxiliary tags
+     * Create a versioned VCF header.
      *
-     * @param vcfHeaderVersion    the vcf header version for this header, can not be null
-     * @param metaData            the meta data associated with this header
-     * @param genotypeSampleNames the sample names
+     * @param metaData The metadata lines for this header.The provided metadata
+     * header line list MUST contain a version (fileformat) line in order to establish the version
+     * for this header, and each metadata line must be valid for that version.
+     * @param genotypeSampleNames Sample names for this header.
+     * @throws TribbleException if the provided header line metadata does not include a header line that
+     * establishes the VCF version for the lines, or if any line does not conform to the established
+     * version
      */
-    public VCFHeader(final VCFHeaderVersion vcfHeaderVersion, final Set<VCFHeaderLine> metaData, final Set<String> genotypeSampleNames) {
-        this(metaData, new ArrayList(genotypeSampleNames));
-        ValidationUtils.nonNull(vcfHeaderVersion);
-        setVCFHeaderVersion(vcfHeaderVersion);
-    }
-
     public VCFHeader(final Set<VCFHeaderLine> metaData, final List<String> genotypeSampleNames) {
-        this(metaData);
+        ValidationUtils.nonNull(metaData);
+        ValidationUtils.nonNull(genotypeSampleNames);
 
-        if ( genotypeSampleNames.size() != new HashSet<String>(genotypeSampleNames).size() )
+        // propagate the lines and establish the version for this header; note that if multiple version
+        // lines are presented in the set, a warning will be issued, only the last one will be retained,
+        // and the header version will be established using the last version line encountered
+        mMetaData.addMetaDataLines(metaData);
+        vcfHeaderVersion = initializeHeaderVersion();
+        mMetaData.validateMetaDataLines(vcfHeaderVersion);
+
+        checkForDeprecatedGenotypeLikelihoodsKey();
+        if ( genotypeSampleNames.size() != new HashSet<>(genotypeSampleNames).size() )
             throw new TribbleException.InvalidHeader("BUG: VCF header has duplicate sample names");
 
         mGenotypeSampleNames.addAll(genotypeSampleNames);
@@ -168,50 +175,34 @@ public VCFHeader(final Set<VCFHeaderLine> metaData, final List<String> genotypeS
         buildVCFReaderMaps(genotypeSampleNames);
     }
 
-    /**
-     * Establish the header version for this header. If the header version has already been established
-     * for this header, the new version will be subject to version transition validation.
-     * @param vcfHeaderVersion
-     * @throws TribbleException if the requested header version is not compatible with the existing version
-     */
-    public void setVCFHeaderVersion(final VCFHeaderVersion vcfHeaderVersion) {
-        validateVersionTransition(this.vcfHeaderVersion, vcfHeaderVersion);
-        this.vcfHeaderVersion = vcfHeaderVersion;
+   /**
+    * Get the header version for this header.
+    * @return the VCFHeaderVersion for this header. will not be null
+    */
+    public VCFHeaderVersion getVCFHeaderVersion() {
+        return vcfHeaderVersion;
     }
 
     /**
-     * Throw if {@code fromVersion} is not compatible with a {@code toVersion}. Generally, any version before
-     * version 4.2 can be up-converted to version 4.2, but not to version 4.3. Once a header is established as
-     * version 4.3, it cannot be up or down converted, and it must remain at version 4.3.
-     * @param fromVersion current version. May be null, in which case {@code toVersion} can be any version
-     * @param toVersion new version. Cannot be null.
-     * @throws TribbleException if {@code fromVersion} is not compatible with {@code toVersion}
+     * Adds a new line to the VCFHeader. If a duplicate line is already exists (same key/ID pair for
+     * structured lines, or duplicate content for unstructured lines with identical keys), the new
+     * line will replace the existing line.
+     *
+     * @param headerLine header line to attempt to add
      */
-    public static void validateVersionTransition(final VCFHeaderVersion fromVersion, final VCFHeaderVersion toVersion) {
-        ValidationUtils.nonNull(toVersion);
-
-        final String errorMessageFormatString = "VCF cannot be automatically promoted from %s to %s";
-
-        // fromVersion can be null, in which case anything goes (any transition from null is legal)
-        if (fromVersion != null) {
-            if (toVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) {
-                if (!fromVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) {
-                    // we're trying to go from pre-v4.3 to v4.3+
-                    throw new TribbleException(String.format(errorMessageFormatString, fromVersion, toVersion));
-                }
+    public void addMetaDataLine(final VCFHeaderLine headerLine) {
+        // propagate the new line to the metadata lines object
+        mMetaData.addMetaDataLine(headerLine);
 
-            } else if (fromVersion.equals(VCFHeaderVersion.VCF4_3)) {
-                // we're trying to go from v4.3 to pre-v4.3
-                throw new TribbleException(String.format(errorMessageFormatString, fromVersion, toVersion));
-            }
+        // update the current version in case this line triggered a version change
+        final VCFHeaderVersion newHeaderVersion = mMetaData.getVCFVersion();
+        if (!newHeaderVersion.equals(vcfHeaderVersion)) {
+            validateVersionTransition(vcfHeaderVersion, newHeaderVersion);
         }
-    }
+        vcfHeaderVersion = newHeaderVersion;
+        headerLine.validateForVersion(vcfHeaderVersion);
 
-    /**
-     * @return the VCFHeaderVersion for this header. Can be null.
-     */
-    public VCFHeaderVersion getVCFHeaderVersion() {
-        return vcfHeaderVersion;
+        checkForDeprecatedGenotypeLikelihoodsKey();
     }
 
     /**
@@ -220,81 +211,58 @@ public VCFHeaderVersion getVCFHeaderVersion() {
      * using this header (i.e., read by the VCFCodec) will have genotypes
      * occurring in the same order
      *
-     * @param genotypeSampleNamesInAppearenceOrder genotype sample names, must iterator in order of appearance
+     * @param genotypeSampleNamesInAppearanceOrder genotype sample names, must iterator in order of appearance
      */
-    private void buildVCFReaderMaps(final Collection<String> genotypeSampleNamesInAppearenceOrder) {
-        sampleNamesInOrder = new ArrayList<String>(genotypeSampleNamesInAppearenceOrder.size());
-        sampleNameToOffset = new HashMap<String, Integer>(genotypeSampleNamesInAppearenceOrder.size());
+    private void buildVCFReaderMaps(final Collection<String> genotypeSampleNamesInAppearanceOrder) {
+        sampleNamesInOrder = new ArrayList<>(genotypeSampleNamesInAppearanceOrder.size());
+        sampleNameToOffset = new HashMap<>(genotypeSampleNamesInAppearanceOrder.size());
 
         int i = 0;
-        for (final String name : genotypeSampleNamesInAppearenceOrder) {
+        for (final String name : genotypeSampleNamesInAppearanceOrder) {
             sampleNamesInOrder.add(name);
             sampleNameToOffset.put(name, i++);
         }
         Collections.sort(sampleNamesInOrder);
     }
 
-
     /**
-     * Adds a new line to the VCFHeader. If there is an existing header line of the
-     * same type with the same key, the new line is not added and the existing line
-     * is preserved.
+     * Return all contig line in SORTED order, where the sort order is determined by contig index.
+     * Note that this behavior differs from other VCFHeader methods that return lines in input order.
      *
-     * @param headerLine header line to attempt to add
-     */
-    public void addMetaDataLine(final VCFHeaderLine headerLine) {
-        // Try to create a lookup entry for the new line. If this succeeds (because there was
-        // no line of this type with the same key), add the line to our master list of header
-        // lines in mMetaData.
-        if ( addMetadataLineLookupEntry(headerLine) ) {
-            mMetaData.add(headerLine);
-            checkForDeprecatedGenotypeLikelihoodsKey();
-        }
-    }
-
-    /**
-     * @return all of the VCF header lines of the ##contig form in order, or an empty list if none were present
+     * @return all of the VCF header lines of the ##contig form in SORTED order, or an empty list if none were present
      */
     public List<VCFContigHeaderLine> getContigLines() {
-        // this must preserve input order
-        return Collections.unmodifiableList(new ArrayList<>(contigMetaData.values()));
-    }
+        // this must return lines in SORTED order
+        return mMetaData.getContigLines();
+   }
 
     /**
-     * Returns the contigs in this VCF file as a SAMSequenceDictionary. Returns null if contigs lines are
-     * not present in the header. If contig lines are missing length tags, they will be created with
-     * length set to SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH. Records with unknown length will match any record with
-     * the same name when evaluated by SAMSequenceRecord.isSameSequence.
+     * Returns the contigs in this VCF Header as a SAMSequenceDictionary.
+     *
+     * @return Returns null if contig lines are not present in the header.
+     * @throws TribbleException if one or more contig lines do not have length
+     * information.
      */
     public SAMSequenceDictionary getSequenceDictionary() {
+        // this must ensure that the lines used to create the dictionary are sorted by contig index
         final List<VCFContigHeaderLine> contigHeaderLines = this.getContigLines();
-        if (contigHeaderLines.isEmpty()) return null;
-
-        final List<SAMSequenceRecord> sequenceRecords = new ArrayList<SAMSequenceRecord>(contigHeaderLines.size());
-        for (final VCFContigHeaderLine contigHeaderLine : contigHeaderLines) {
-            final SAMSequenceRecord samSequenceRecord = contigHeaderLine.getSAMSequenceRecord();
-            sequenceRecords.add(samSequenceRecord);
-        }
-
-        return new SAMSequenceDictionary(sequenceRecords);
+        return contigHeaderLines.isEmpty() ? null  :
+                new SAMSequenceDictionary(
+                    contigHeaderLines.stream()
+                            .map(contigLine -> contigLine.getSAMSequenceRecord())
+                            .collect(Collectors.toCollection(ArrayList::new))
+                );
     }
 
     /**
-     * Completely replaces the contig records in this header with those in the given SAMSequenceDictionary.
+     * Completely replaces all contig header lines in this header with ones derived from the given SAMSequenceDictionary.
+     *
+     * @param dictionary SAMSequenceDictionary to use to create VCFContigHeaderLines for this header
      */
     public void setSequenceDictionary(final SAMSequenceDictionary dictionary) {
-        this.contigMetaData.clear();
-
-        // Also need to remove contig record lines from mMetaData
-        final List<VCFHeaderLine> toRemove = new ArrayList<VCFHeaderLine>();
-        for (final VCFHeaderLine line : mMetaData) {
-            if (line instanceof VCFContigHeaderLine) {
-                toRemove.add(line);
-            }
-        }
-        mMetaData.removeAll(toRemove);
-        for (final SAMSequenceRecord record : dictionary.getSequences()) {
-            addMetaDataLine(new VCFContigHeaderLine(record, record.getAssembly()));
+        getContigLines().forEach(hl -> mMetaData.removeMetaDataLine(hl));
+        if (dictionary != null) {
+            dictionary.getSequences().forEach(r -> addMetaDataLine(new VCFContigHeaderLine(r, r.getAssembly())));
         }
     }
 
@@ -305,128 +273,12 @@ public VariantContextComparator getVCFRecordComparator() {
     /**
      * @return all of the VCF FILTER lines in their original file order, or an empty list if none were present
      */
-    public List<VCFFilterHeaderLine> getFilterLines() {
-        final List<VCFFilterHeaderLine> filters = new ArrayList<VCFFilterHeaderLine>();
-        for (final VCFHeaderLine line : mMetaData) {
-            if ( line instanceof VCFFilterHeaderLine )  {
-                filters.add((VCFFilterHeaderLine)line);
-            }
-        }
-        return filters;
-    }
-
-    /**
-     * @return all of the VCF ID-based header lines in their original file order, or an empty list if none were present
-     */
-    public List<VCFIDHeaderLine> getIDHeaderLines() {
-        final List<VCFIDHeaderLine> lines = new ArrayList<VCFIDHeaderLine>();
-        for (final VCFHeaderLine line : mMetaData) {
-            if (line instanceof VCFIDHeaderLine)  {
-                lines.add((VCFIDHeaderLine)line);
-            }
-        }
-        return lines;
-    }
-
-    /**
-     * Remove all lines with a VCF version tag from the provided set of header lines
-     */
-    private void removeVCFVersionLines( final Set<VCFHeaderLine> headerLines ) {
-        final List<VCFHeaderLine> toRemove = new ArrayList<VCFHeaderLine>();
-        for (final VCFHeaderLine line : headerLines) {
-            if (VCFHeaderVersion.isFormatString(line.getKey())) {
-                toRemove.add(line);
-            }
-        }
-        headerLines.removeAll(toRemove);
-    }
+    public List<VCFFilterHeaderLine> getFilterLines() { return mMetaData.getFilterLines(); }
 
     /**
-     * Creates lookup table entries for all header lines in mMetaData.
+     * @return all of the VCFSimpleHeaderLine (ID)  lines in their original file order, or an empty list if none are present
      */
-    private void createLookupEntriesForAllHeaderLines() {
-        for (final VCFHeaderLine line : mMetaData) {
-            addMetadataLineLookupEntry(line);
-        }
-    }
-
-    /**
-     * Add a single header line to the appropriate type-specific lookup table (but NOT to the master
-     * list of lines in mMetaData -- this must be done separately if desired).
-     *
-     * If a header line is present that has the same key as an existing line, it will not be added.  A warning
-     * will be shown if this occurs when GeneralUtils.DEBUG_MODE_ENABLED is true, otherwise this will occur
-     * silently.
-     *
-     * @param line header line to attempt to add to its type-specific lookup table
-     * @return true if the line was added to the appropriate lookup table, false if there was an existing
-     *         line with the same key and the new line was not added
-     */
-    private boolean addMetadataLineLookupEntry(final VCFHeaderLine line) {
-        if ( line instanceof VCFInfoHeaderLine )  {
-            final VCFInfoHeaderLine infoLine = (VCFInfoHeaderLine)line;
-            return addMetaDataLineMapLookupEntry(mInfoMetaData, infoLine.getID(), infoLine);
-        } else if ( line instanceof VCFFormatHeaderLine ) {
-            final VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line;
-            return addMetaDataLineMapLookupEntry(mFormatMetaData, formatLine.getID(), formatLine);
-        } else if ( line instanceof VCFFilterHeaderLine ) {
-            final VCFFilterHeaderLine filterLine = (VCFFilterHeaderLine)line;
-            return addMetaDataLineMapLookupEntry(mFilterMetaData, filterLine.getID(), filterLine);
-        } else if ( line instanceof VCFContigHeaderLine ) {
-            return addContigMetaDataLineLookupEntry((VCFContigHeaderLine) line);
-        } else {
-            return addMetaDataLineMapLookupEntry(mOtherMetaData, line.getKey(), line);
-        }
-    }
-
-    /**
-     * Add a contig header line to the lookup list for contig lines (contigMetaData). If there's
-     * already a contig line with the same ID, does not add the line.
-     *
-     * Note: does not add the contig line to the master list of header lines in mMetaData --
-     *       this must be done separately if desired.
-     *
-     * @param line contig header line to add
-     * @return true if line was added to the list of contig lines, otherwise false
-     */
-    private boolean addContigMetaDataLineLookupEntry(final VCFContigHeaderLine line) {
-        // if we are trying to add a contig for the same ID
-        if (contigMetaData.containsKey(line.getID())) {
-            if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
-                System.err.println("Found duplicate VCF contig header lines for " + line.getID() + "; keeping the first only" );
-            }
-            // do not add this contig if it exists
-            return false;
-        }
-        contigMetaData.put(line.getID(), line);
-        return true;
-    }
-
-    /**
-     * Add a header line to the provided map at a given key.  If the key already exists, it will not be replaced.
-     * If it does already exist and GeneralUtils.DEBUG_MODE_ENABLED is true, it will issue warnings about duplicates,
-     * otherwise it will silently leave the existing key/line pair as is.
-     *
-     * Note: does not add the header line to the master list of header lines in mMetaData --
-     *       this must be done separately if desired.
-     *
-     * @param map a map from each key to the associated VCFHeaderLine
-     * @param key the key to insert this line at
-     * @param line the line to insert at this key
-     * @param <T> a type of vcf header line that extends VCFHeaderLine
-     * @return true if the line was added to the map, false if it was not added because there's already a line with that key
-     */
-    private <T extends VCFHeaderLine> boolean addMetaDataLineMapLookupEntry(final Map<String, T> map, final String key, final T line) {
-        if ( map.containsKey(key) ) {
-            if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
-                System.err.println("Found duplicate VCF header lines for " + key + "; keeping the first only" );
-            }
-            return false;
-        }
-
-        map.put(key, line);
-        return true;
-    }
+    public List<VCFSimpleHeaderLine> getIDHeaderLines() { return mMetaData.getIDHeaderLines(); }
 
     /**
      * Check for the presence of a format line with the deprecated key {@link VCFConstants#GENOTYPE_LIKELIHOODS_KEY}.
@@ -435,12 +287,14 @@ private <T extends VCFHeaderLine> boolean addMetaDataLineMapLookupEntry(final Ma
      */
     private void checkForDeprecatedGenotypeLikelihoodsKey() {
         if ( hasFormatLine(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && ! hasFormatLine(VCFConstants.GENOTYPE_PL_KEY) ) {
-            if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
-                System.err.println("Found " + VCFConstants.GENOTYPE_LIKELIHOODS_KEY + " format, but no "
-                        + VCFConstants.GENOTYPE_PL_KEY + " field.  We now only manage PL fields internally"
-                        + " automatically adding a corresponding PL field to your VCF header");
-            }
-            addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
+            logger.warn("Found " + VCFConstants.GENOTYPE_LIKELIHOODS_KEY + " format, but no "
+                    + VCFConstants.GENOTYPE_PL_KEY + " field.  We now only manage PL fields internally"
+                    + " automatically adding a corresponding PL field to your VCF header");
+            addMetaDataLine(new VCFFormatHeaderLine(
+                    VCFConstants.GENOTYPE_PL_KEY,
+                    VCFHeaderLineCount.G,
+                    VCFHeaderLineType.Integer,
+                    "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
         }
     }
 
@@ -451,48 +305,44 @@ private void checkForDeprecatedGenotypeLikelihoodsKey() {
      * @return a set of the header fields, in order
      */
     public Set<HEADER_FIELDS> getHeaderFields() {
-        return new LinkedHashSet<HEADER_FIELDS>(Arrays.asList(HEADER_FIELDS.values()));
+        return new LinkedHashSet<>(Arrays.asList(HEADER_FIELDS.values()));
     }
 
     /**
-     * get the meta data, associated with this header, in sorted order
+     * get the meta data, associated with this header, in input order
      *
      * @return a set of the meta data
      */
-    public Set<VCFHeaderLine> getMetaDataInInputOrder() {
-        return makeGetMetaDataSet(mMetaData);
-    }
-
-    public Set<VCFHeaderLine> getMetaDataInSortedOrder() {
-        return makeGetMetaDataSet(new TreeSet<VCFHeaderLine>(mMetaData));
-    }
+    public Set<VCFHeaderLine> getMetaDataInInputOrder() { return mMetaData.getMetaDataInInputOrder(); }
 
-    private Set<VCFHeaderLine> makeGetMetaDataSet(final Set<VCFHeaderLine> headerLinesInSomeOrder) {
-        final Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>();
-        if (vcfHeaderVersion != null && vcfHeaderVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) {
-            // always propagate version 4.3+ to prevent these header lines from magically being back-versioned to < 4.3
-            lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_3.getFormatString(), VCFHeaderVersion.VCF4_3.getVersionString()));
-        } else {
-            lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_2.getFormatString(), VCFHeaderVersion.VCF4_2.getVersionString()));
-        }
-        lines.addAll(headerLinesInSomeOrder);
-        return Collections.unmodifiableSet(lines);
-    }
+    /**
+     * Get the metadata associated with this header in sorted order.
+     *
+     * @return Metadata lines in sorted order (based on lexicographical sort of string encodings).
+     */
+    public Set<VCFHeaderLine> getMetaDataInSortedOrder() { return mMetaData.getMetaDataInSortedOrder(); }
 
     /**
      * Get the VCFHeaderLine whose key equals key.  Returns null if no such line exists
-     * @param key
-     * @return
+     * 
+     * Deprecated. Use {@link #getMetaDataLines(String)}. see https://github.com/samtools/hts-specs/issues/602
+     * 
+     * @param key the key to use to find header lines to return
+     * @return the header line with key "key", or null if none is present
      */
+    @Deprecated // starting after version 2.24.1
     public VCFHeaderLine getMetaDataLine(final String key) {
-        for (final VCFHeaderLine line: mMetaData) {
-            if ( line.getKey().equals(key) )
-                return line;
-        }
-
-        return null;
+        return mMetaData.getMetaDataLines(key).stream().findFirst().orElse(null);
     }
 
+    /**
+     * Get the VCFHeaderLines whose key equals key.  Returns an empty list if no such lines exist.
+     *
+     * @param key the key to use to find header lines to return
+     * @return the header lines with key "key"
+     */
+    public Collection<VCFHeaderLine> getMetaDataLines(final String key) { return mMetaData.getMetaDataLines(key); }
+    
     /**
      * get the genotyping sample names
      *
@@ -532,40 +382,32 @@ public int getColumnCount() {
     /**
      * Returns the INFO HeaderLines in their original ordering
      */
-    public Collection<VCFInfoHeaderLine> getInfoHeaderLines() {
-        return mInfoMetaData.values();
-    }
+    public Collection<VCFInfoHeaderLine> getInfoHeaderLines() { return mMetaData.getInfoHeaderLines(); }
 
     /**
      * Returns the FORMAT HeaderLines in their original ordering
      */
-    public Collection<VCFFormatHeaderLine> getFormatHeaderLines() {
-        return mFormatMetaData.values();
-    }
+    public Collection<VCFFormatHeaderLine> getFormatHeaderLines() { return mMetaData.getFormatHeaderLines(); }
 
     /**
-     * @param id the header key name
+     * @param id the id of the requested header line
      * @return the meta data line, or null if there is none
      */
     public VCFInfoHeaderLine getInfoHeaderLine(final String id) {
-        return mInfoMetaData.get(id);
+        return mMetaData.getInfoHeaderLine(id);
     }
 
     /**
-     * @param id    the header key name
+     * @param id  the id of the requested header line
      * @return the meta data line, or null if there is none
      */
-    public VCFFormatHeaderLine getFormatHeaderLine(final String id) {
-        return mFormatMetaData.get(id);
-    }
+    public VCFFormatHeaderLine getFormatHeaderLine(final String id) { return mMetaData.getFormatHeaderLine(id); }
 
     /**
-     * @param id    the header key name
+     * @param id the id of the requested header line
      * @return the meta data line, or null if there is none
      */
-    public VCFFilterHeaderLine getFilterHeaderLine(final String id) {
-        return mFilterMetaData.get(id);
-    }
+    public VCFFilterHeaderLine getFilterHeaderLine(final String id) { return mMetaData.getFilterHeaderLine(id); }
 
     public boolean hasInfoLine(final String id) {
         return getInfoHeaderLine(id) != null;
@@ -580,24 +422,82 @@ public boolean hasFilterLine(final String id) {
     }
 
     /**
-     * @param key    the header key name
+     * Deprecated. Use {@link #getOtherHeaderLines(String)}. see https://github.com/samtools/hts-specs/issues/602
+     *
+     * @param key the of the requested header line
      * @return the meta data line, or null if there is none
      */
+    @Deprecated // starting after version 2.24.1 this selects one from what can be many)
     public VCFHeaderLine getOtherHeaderLine(final String key) {
-        return mOtherMetaData.get(key);
+        final Collection<VCFHeaderLine> otherLines = mMetaData.getOtherHeaderLines();
+        for (final VCFHeaderLine next: otherLines) {
+            if (next.getKey().equals(key)) {
+                // note that this returns the first match it finds, which is why this method is deprecated
+                return next;
+            }
+        }
+        return null;
     }
 
     /**
-     * Returns the other HeaderLines in their original ordering
+     * Returns all "other" VCFHeaderLines, in their original (input) order, where "other" means any
+     * VCFHeaderLine that is not a contig, info, format or filter header line.
+     */
+    public Collection<VCFHeaderLine> getOtherHeaderLines() { return mMetaData.getOtherHeaderLines(); }
+
+    /**
+     * Returns "other" HeaderLines that have the key "key", in their original ordering, where "other"
+     * means any VCFHeaderLine that is not a contig, info, format or filter header line.
+     */
+    public List<VCFHeaderLine> getOtherHeaderLines(final String key) {
+        return mMetaData.getOtherHeaderLines().stream().filter(hl -> hl.getKey().equals(key)).collect(Collectors.toList());
+    }
+
+    /**
+     * Adds a single "other" VCFHeaderLine that has key "key". Any lines with that key that already exist
+     * in the header will be removed. This method can only be used to set unique non-structured (non-ID)
+     * header lines.
+     *
+     * @param uniqueLine the unique line to add
+     * @throws TribbleException if the line to be added is an ID line.
      */
-    public Collection<VCFHeaderLine> getOtherHeaderLines() {
-        return mOtherMetaData.values();
+    public void addOtherHeaderLineUnique(final VCFHeaderLine uniqueLine) {
+        if (uniqueLine.isIDHeaderLine()) {
+            throw new TribbleException(String.format("Only non-ID header lines can be added using this method: %s", uniqueLine));
+        }
+        getOtherHeaderLines(uniqueLine.getKey()).forEach(hl -> mMetaData.removeMetaDataLine(hl));
+        addMetaDataLine(uniqueLine);
+    }
+
+    /**
+     * Returns a single "other" VCFHeaderLine that has the key "key", where "other"
+     * means any VCFHeaderLine that is not a contig, info, format or filter header line. If more than
+     * one such line is available, throws a TribbleException.
+     *
+     * @param key the key to match
+     * @return a single VCHeaderLine, or null if none
+     * @throws TribbleException if more than one other line matches the key
+     */
+    public VCFHeaderLine getOtherHeaderLineUnique(final String key) {
+        final List<VCFHeaderLine> lineList = getOtherHeaderLines(key);
+        if (lineList.isEmpty()) {
+            return null;
+        } else if (lineList.size() > 1) {
+            throw new TribbleException(
+                    String.format(
+                            "More than one \"other\" header line matches the key \"%s\". Use getOtherHeaderLines() to retrieve multiple lines:",
+                            key,
+                            lineList.stream().map(VCFHeaderLine::toString).collect(Collectors.joining(","))));
+        } else {
+            return lineList.get(0);
+        }
     }
 
     /**
      * If true additional engine headers will be written to the VCF, otherwise only the walker headers will be output.
      * @return true if additional engine headers will be written to the VCF
      */
+    @Deprecated // starting after version 2.24.1
     public boolean isWriteEngineHeaders() {
         return writeEngineHeaders;
     }
@@ -606,6 +506,7 @@ public boolean isWriteEngineHeaders() {
      * If true additional engine headers will be written to the VCF, otherwise only the walker headers will be output.
      * @param writeEngineHeaders true if additional engine headers will be written to the VCF
      */
+    @Deprecated // starting after version 2.24.1
     public void setWriteEngineHeaders(final boolean writeEngineHeaders) {
         this.writeEngineHeaders = writeEngineHeaders;
     }
@@ -614,6 +515,7 @@ public void setWriteEngineHeaders(final boolean writeEngineHeaders) {
      * If true, and isWriteEngineHeaders also returns true, the command line will be written to the VCF.
      * @return true if the command line will be written to the VCF
      */
+    @Deprecated // starting after version 2.24.1
     public boolean isWriteCommandLine() {
         return writeCommandLine;
     }
@@ -622,6 +524,7 @@ public boolean isWriteCommandLine() {
      * If true, and isWriteEngineHeaders also returns true, the command line will be written to the VCF.
      * @param writeCommandLine true if the command line will be written to the VCF
      */
+    @Deprecated // starting after version 2.24.1
     public void setWriteCommandLine(final boolean writeCommandLine) {
         this.writeCommandLine = writeCommandLine;
     }
@@ -640,10 +543,98 @@ public HashMap<String, Integer> getSampleNameToOffset() {
 
     @Override
     public String toString() {
-        final StringBuilder b = new StringBuilder();
-        b.append("[VCFHeader:");
-        for ( final VCFHeaderLine line : mMetaData )
-            b.append("\n\t").append(line);
-        return b.append("\n]").toString();
+        return mMetaData.toString();
     }
+
+    /**
+     * Obtain a valid fileformat/version line for the requestedVersion
+     * @param requestedVersion the version for which a version line should be obtained
+     * @return the version line
+     */
+    public static VCFHeaderLine makeHeaderVersionLine(final VCFHeaderVersion requestedVersion) {
+        return new VCFHeaderLine(requestedVersion.getFormatString(), requestedVersion.getVersionString());
+    }
+
+    /**
+     * Obtain a VCFHeaderLine set containing only a fileformat/version line for the requestedVersion
+     * @param requestedVersion the version for which a version line should be obtained
+     * @return a VCFHeaderLine set containing only fileformat/version line for the requestedVersion
+     */
+    public static Set<VCFHeaderLine> makeHeaderVersionLineSet(final VCFHeaderVersion requestedVersion) {
+        return new LinkedHashSet<VCFHeaderLine>() {{ add(VCFHeader.makeHeaderVersionLine(requestedVersion)); }};
+    }
+
+    @Override
+    public boolean equals(final Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        final VCFHeader vcfHeader = (VCFHeader) o;
+
+        if (samplesWereAlreadySorted != vcfHeader.samplesWereAlreadySorted) return false;
+        if (writeEngineHeaders != vcfHeader.writeEngineHeaders) return false;
+        if (writeCommandLine != vcfHeader.writeCommandLine) return false;
+        if (vcfHeaderVersion != vcfHeader.vcfHeaderVersion) return false;
+        if (!mMetaData.equals(vcfHeader.mMetaData)) return false;
+        if (mGenotypeSampleNames != null ? !mGenotypeSampleNames.equals(vcfHeader.mGenotypeSampleNames) :
+                vcfHeader.mGenotypeSampleNames != null)
+            return false;
+        if (sampleNamesInOrder != null ? !sampleNamesInOrder.equals(vcfHeader.sampleNamesInOrder) :
+                vcfHeader.sampleNamesInOrder != null)
+            return false;
+        return sampleNameToOffset != null ? sampleNameToOffset.equals(vcfHeader.sampleNameToOffset) :
+                vcfHeader.sampleNameToOffset == null;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = vcfHeaderVersion.hashCode();
+        result = 31 * result + mMetaData.hashCode();
+        result = 31 * result + (mGenotypeSampleNames != null ? mGenotypeSampleNames.hashCode() : 0);
+        result = 31 * result + (samplesWereAlreadySorted ? 1 : 0);
+        result = 31 * result + (sampleNamesInOrder != null ? sampleNamesInOrder.hashCode() : 0);
+        result = 31 * result + (sampleNameToOffset != null ? sampleNameToOffset.hashCode() : 0);
+        result = 31 * result + (writeEngineHeaders ? 1 : 0);
+        result = 31 * result + (writeCommandLine ? 1 : 0);
+        return result;
+    }
+
+    /**
+     * Establish the version for this header using the (required) ##fileformat metadata line in the metadata list.
+     * @throws TribbleException if no ##fileformat line is included in the metadata lines
+     */
+    private VCFHeaderVersion initializeHeaderVersion() {
+        final VCFHeaderVersion metaDataVersion = mMetaData.getVCFVersion();
+        if (metaDataVersion == null) {
+            //we dont relax this even if VCFUtils.getStrictVCFVersionValidation() == false, since that
+            //would confound subsequent header version management
+            throw new TribbleException("The VCFHeader metadata must include a ##fileformat (version) header line");
+        }
+        return metaDataVersion;
+    }
+
+    private void validateVersionTransition(
+            final VCFHeaderVersion previousVersion,
+            final VCFHeaderVersion newVersion) {
+        final int compareTo = newVersion.compareTo(previousVersion);
+        if (compareTo < 0) {
+            // We only allow going forward to a newer version, not backwards to an older one, since there
+            // is really no way to validate old header lines (pre vcfV4.2). The only way to create a header with
+            // an old version is to create it that way from the start.
+            // to be created with the old version from the start.
+            throw new TribbleException(String.format(
+                    "When changing a header version, the new header version %s must be > the previous version %s",
+                    newVersion,
+                    previousVersion));
+        } else if (compareTo > 0) {
+            logger.debug(() -> String.format("Updating VCFHeader version from %s to %s",
+                    previousVersion.getVersionString(),
+                    newVersion.getVersionString()));
+
+            // the version moved forward, so validate ALL of the existing lines in the list to ensure
+            // that the transition is valid
+            mMetaData.validateMetaDataLines(newVersion);
+        }
+    }
+
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFHeaderLine.java
index 0d07a83078..94a3a0849e 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFHeaderLine.java
@@ -26,28 +26,23 @@
 package htsjdk.variant.vcf;
 
 import htsjdk.tribble.TribbleException;
+import htsjdk.utils.ValidationUtils;
 
 import java.io.Serializable;
 import java.util.Map;
-
+import java.util.Optional;
 
 /**
- * @author ebanks
- *         <p>
- *         Class VCFHeaderLine
- *         </p>
- *         <p>
- *         A class representing a key=value entry in the VCF header
- *         </p>
+ * <p> A class representing a key=value entry in the VCF header, and the base class for structured header lines.
+ * Header lines are immutable, and derived classes should maintain immutability.
+ * </p>
  */
 public class VCFHeaderLine implements Comparable, Serializable {
     public static final long serialVersionUID = 1L;
 
-    protected static final boolean ALLOW_UNBOUND_DESCRIPTIONS = true;
-    protected static final String UNBOUND_DESCRIPTION = "Not provided in original VCF header";
-
-    private String mKey = null;
-    private String mValue = null;
+    // immutable - we don't want to let the hash value change
+    private final String mKey;
+    private final String mValue;
 
     /**
      * create a VCF header line
@@ -56,14 +51,9 @@ public class VCFHeaderLine implements Comparable, Serializable {
      * @param value   the value for this header line
      */
     public VCFHeaderLine(String key, String value) {
-        if ( key == null )
-            throw new IllegalArgumentException("VCFHeaderLine: key cannot be null");
-        if ( key.contains("<") || key.contains(">") )
-            throw new IllegalArgumentException("VCFHeaderLine: key cannot contain angle brackets");
-        if ( key.contains("=") )
-            throw new IllegalArgumentException("VCFHeaderLine: key cannot contain an equals sign");
         mKey = key;
         mValue = value;
+        validate();
     }
 
     /**
@@ -76,16 +66,97 @@ public String getKey() {
     }
 
     /**
-     * Get the value
+     * Get the value. May be null.
      *
-     * @return the value
+     * @return the value. may be null (for subclass implementations that use structured values)
      */
     public String getValue() {
         return mValue;
     }
 
     /**
-     * By default the header lines won't be added to the dictionary, unless this method will be override (for example in FORMAT, INFO or FILTER header lines)
+     * @return true if this is a structured header line (has a unique ID, and key/value pairs), otherwise false
+     */
+    public boolean isIDHeaderLine() { return false; }
+
+    /**
+     * Return the unique ID for this line. Returns null iff {@link #isIDHeaderLine()} is false.
+     * @return the line's ID, or null if isIDHeaderLine() is false
+     */
+    public String getID() { return null; }
+
+    /**
+     * Validates this header line against {@code vcfTargetVersion}.
+     * Subclasses can override this to provide line type-specific version validation, and the
+     * overrides should also call super.getValidationFailure to allow each class in the class hierarchy
+     * to do class-level validation.
+     *
+     * @return Optional containing a {@link VCFValidationFailure} describing validation failure if this
+     * line fails validation, otherwise Optional.empty().
+     */
+    public Optional<VCFValidationFailure<VCFHeaderLine>> getValidationFailure(final VCFHeaderVersion vcfTargetVersion) {
+        // If this header line is itself a fileformat/version line,
+        // make sure it doesn't clash with the requested vcfTargetVersion.
+        if (VCFHeaderVersion.isFormatString(getKey())) {
+            if (!vcfTargetVersion.getFormatString().equals(getKey()) ||
+                !vcfTargetVersion.getVersionString().equals(getValue())
+            ) {
+                return Optional.of(new VCFValidationFailure<>(
+                        vcfTargetVersion,
+                        this,
+                        String.format("The target version (%s) is incompatible with the header line's content.",
+                                vcfTargetVersion)));
+            }
+        } else if (getKey().equals(VCFConstants.PEDIGREE_HEADER_KEY)) {
+            // previous to vcf4.3, PEDIGREE header lines are not modeled as VCFPedigreeHeaderLine because they
+            // were not structured header lines (had no ID), so we need to check HERE to see if an attempt is
+            // being made to use one of those old-style pedigree lines in a newer-versioned header, and reject
+            // it if so
+            if (vcfTargetVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3) && ! (this instanceof VCFPedigreeHeaderLine)) {
+                return Optional.of(new VCFValidationFailure<>(
+                        vcfTargetVersion,
+                        this,
+                        String.format("A pedigree line with no ID cannot be merged with version %s", vcfTargetVersion)));
+            }
+        }
+
+        return Optional.empty();
+    }
+
+    /**
+     * Validate that the header line conforms to {@code vcfTargetVersion.
+     * @param vcfTargetVersion
+     * @throws {@link TribbleException.VersionValidationFailure} if this header line fails to conform
+     */
+    public void validateForVersion(final VCFHeaderVersion vcfTargetVersion) {
+        final Optional<VCFValidationFailure<VCFHeaderLine>> error = getValidationFailure(vcfTargetVersion);
+        if (error.isPresent()) {
+            throw new TribbleException.VersionValidationFailure(error.get().getSourceMessage());
+        }
+    }
+
+    /**
+     * Validate a string that is to be used as a unique id or key field.
+     */
+    protected static void validateKeyOrID(final String keyString, final String sourceName) {
+        ValidationUtils.nonNull(sourceName);
+        if (keyString == null) {
+            throw new TribbleException(
+                    String.format("VCFHeaderLine: %s cannot be null or empty", sourceName));
+        }
+        if ( keyString.contains("<") || keyString.contains(">") ) {
+            throw new TribbleException(
+                    String.format("VCFHeaderLine: %s cannot contain angle brackets", sourceName));
+        }
+        if ( keyString.contains("=") ) {
+            throw new TribbleException(
+                    String.format("VCFHeaderLine: %s cannot contain an equals sign", sourceName));
+        }
+    }
+
+    /**
+     * By default the header lines won't be added to the BCF dictionary, unless this method is overriden
+     * (for example in FORMAT, INFO or FILTER header lines).
      *
      * @return false
      */
@@ -141,10 +212,11 @@ public static boolean isHeaderLine(String line) {
     }
 
     /**
-     * create a string of a mapping pair for the target VCF version
+     * create a string of a mapping pair
      * @param keyValues a mapping of the key-&gt;value pairs to output
      * @return a string, correctly formatted
      */
+    @Deprecated // starting after version 2.24.1
     public static String toStringEncoding(Map<String, ? extends Object> keyValues) {
         StringBuilder builder = new StringBuilder();
         builder.append('<');
@@ -167,6 +239,13 @@ public static String toStringEncoding(Map<String, ? extends Object> keyValues) {
         return builder.toString();
     }
 
+    /**
+     * Validate the state of this header line. Require the key be valid as an "id".
+     */
+    private void validate() {
+        validateKeyOrID(mKey, "key");
+    }
+
     private static String escapeQuotes(final String value) {
         // java escaping in a string literal makes this harder to read than it should be
         // without string literal escaping and quoting the regex would be: replaceAll( ([^\])" , $1\" )
diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeaderLineCount.java b/src/main/java/htsjdk/variant/vcf/VCFHeaderLineCount.java
index 080153a990..24195c73d3 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFHeaderLineCount.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFHeaderLineCount.java
@@ -25,9 +25,78 @@
 
 package htsjdk.variant.vcf;
 
+import htsjdk.utils.ValidationUtils;
+
 /**
  * the count encodings we use for fields in VCF header lines
  */
 public enum VCFHeaderLineCount {
     INTEGER, A, R, G, UNBOUNDED;
+
+    // A default int value used to represent an integral count value (not a count *type*) when the
+    // actual count is derived and not a fixed integer (i.e., when isFixedCount()==false)
+    public static final int VARIABLE_COUNT = -1;
+
+    /** Return true if this line uses a fixed (integer) count. **/
+    public boolean isFixedCount() { return this.equals(INTEGER); }
+
+    /**
+     * Decode a header line count string and return the corresponding VCFHeaderLineCount enum value.
+     * If the value is not recognized as a valid constant, assume the string represents a fixed, numeric
+     * value, and return Integer. The caller should convert and validate the actual value.
+     *
+     * @param vcfVersion
+     * @param countTypeString
+     * @return
+     */
+    protected static VCFHeaderLineCount decode(final VCFHeaderVersion vcfVersion, final String countTypeString) {
+        ValidationUtils.nonNull(vcfVersion);
+        ValidationUtils.nonNull(countTypeString);
+
+        if (countTypeString.equals(VCFConstants.PER_ALTERNATE_COUNT)) {
+            return A;
+        } else if (countTypeString.equals(VCFConstants.PER_ALLELE_COUNT)) {
+            return R;
+        } else if (countTypeString.equals(VCFConstants.PER_GENOTYPE_COUNT)) {
+            return G;
+        } else if (
+                (vcfVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_0) && countTypeString.equals(VCFConstants.UNBOUNDED_ENCODING_v4)) ||
+                (!vcfVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_0) && countTypeString.equals(VCFConstants.UNBOUNDED_ENCODING_v3))) {
+            return VCFHeaderLineCount.UNBOUNDED;
+        } else {
+            return VCFHeaderLineCount.INTEGER; // assume integer
+        }
+    }
+
+    /**
+     * Encode a count type as a string suitable for serialization to a VCF header. Note this is
+     * not version aware and defaults to VCFv4 format.
+     *
+     * @param actualCount Must be the special value {@code VARIABLE_COUNT} unless this object is {@code VCFHeaderLineCount.INTEGER}.
+     * @return String encoding of this enum, or the {@code actualCount} if the type of this count
+     * is VCFHeaderLineCount.INTEGER.
+     *
+     * @throws IllegalArgumentException if {@code actualCount} is not the special value {@code VARIABLE_COUNT} and this
+     * is not the {@code VCFHeaderLineCount.INTEGER} enum object.
+     */
+    public String encode(final int actualCount) {
+        if (this != INTEGER && actualCount != VARIABLE_COUNT) {
+            // Should only supply an actualCount if the count type == INTEGER
+            throw new IllegalArgumentException("Inconsistent header line number encoding request");
+        }
+        switch (this) {
+            case A:
+                return VCFConstants.PER_ALTERNATE_COUNT;
+            case R:
+                return VCFConstants.PER_ALLELE_COUNT;
+            case G:
+                return VCFConstants.PER_GENOTYPE_COUNT;
+            case UNBOUNDED:
+                return VCFConstants.UNBOUNDED_ENCODING_v4;
+            case INTEGER:
+                return Integer.toString(actualCount);
+        }
+        throw new IllegalStateException("Unexpected VCFHeaderLineCount enum value");
+    }
+
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeaderLineTranslator.java b/src/main/java/htsjdk/variant/vcf/VCFHeaderLineTranslator.java
index 6c83574fee..a22ecd2102 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFHeaderLineTranslator.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFHeaderLineTranslator.java
@@ -34,7 +34,7 @@
 import java.util.Map;
 
 /**
- * A class for translating between vcf header versions
+ * A class for translating between vcf header versions and corresponding header line parsers.
  */
 public class VCFHeaderLineTranslator {
     private static final Map<VCFHeaderVersion,VCFLineParser> mapping;
@@ -50,57 +50,57 @@ public class VCFHeaderLineTranslator {
         mapping = Collections.unmodifiableMap(map);
     }
 
+    /**
+     * Parse a VCFHeaderLine for the given version.
+     *
+     * @param version VCFHeaderVersion of the header line
+     * @param valueLine the header line string
+     * @param expectedTagOrder List of expected tags (interpreted differently by the VCF3 and VCF4 parsers).
+     * @return a mapping of the tags parsed out. Note that the order of attributes is significant (ID must be
+     * first) and this should return a LinkedHashMap in order to preserve attribute order.
+     */
     public static Map<String,String> parseLine(VCFHeaderVersion version, String valueLine, List<String> expectedTagOrder) {
-        return parseLine(version, valueLine, expectedTagOrder, Collections.emptyList());
-    }
-    
-    public static Map<String,String> parseLine(VCFHeaderVersion version, String valueLine, List<String> expectedTagOrder, List<String> recommendedTags) {
-        return mapping.get(version).parseLine(valueLine, expectedTagOrder, recommendedTags);
+        return mapping.get(version).parseLine(valueLine, expectedTagOrder);
     }
 }
 
-
+/**
+ * Parse a VCFHeaderLine.
+ */
 interface VCFLineParser {
     /**
      * parse a VCF line
-     * 
-     * @see #parseLine(String, List, List) VCFv4.2+ recommended tags support
-     * 
-     * @param valueLine the line
-     * @param expectedTagOrder List of expected tags
-     * @return a mapping of the tags parsed out
-     */
-    default Map<String,String> parseLine(String valueLine, List<String> expectedTagOrder) {
-        return parseLine(valueLine, expectedTagOrder, Collections.emptyList());
-    }
-
-    /**
-     * parse a VCF line
-     * 
-     * The recommended tags were introduced in VCFv4.2. 
-     * Older implementations may throw an exception when the recommendedTags field is not empty.
-     * 
-     * We use a list to represent tags as we assume there will be a very small amount of them,
-     * so using a {@code Set} is overhead.
-     * 
+     *
+     * @see #parseLine(String, List) VCFv4.2+ recommended tags support
+     *
      * @param valueLine the line
      * @param expectedTagOrder List of expected tags
-     * @param recommendedTags List of tags that may or may not be present. Use an empty list instead of NULL for none.
      * @return a mapping of the tags parsed out
      */
-    Map<String,String> parseLine(String valueLine, List<String> expectedTagOrder, List<String> recommendedTags);
+    Map<String,String> parseLine(String valueLine, List<String> expectedTagOrder);
 }
 
-
 /**
  * a class that handles the to and from disk for VCF 4 lines
  */
 class VCF4Parser implements VCFLineParser {
-    
+
+    /**
+     * Parse a VCFHeaderLine. The expectedTagOrder list prescribes the order in which tags should appear, but
+     * all tags are treated as optional. Additional tags are allowed after the expected tags, and may appear in
+     * any order. It is the caller's responsibility to validate that all required tags are present and that
+     * any additional "optional" tags are valid.
+     *
+     * @param valueLine the header line string
+     * @param expectedTagOrder List of tags that are required to appear in the order they're expected. Additional
+     *                         "extra" tags are allowed after the tags in this list, and must be validated by
+     *                         the caller.
+     * @return a mapping of all tags parsed out
+     */
     @Override
-    public Map<String, String> parseLine(String valueLine, List<String> expectedTagOrder, List<String> recommendedTags) {
+    public Map<String, String> parseLine(String valueLine, List<String> expectedTagOrder) {
         // our return map
-        Map<String, String> ret = new LinkedHashMap<String, String>();
+        Map<String, String> ret = new LinkedHashMap<>();
 
         // a builder to store up characters as we go
         StringBuilder builder = new StringBuilder();
@@ -159,28 +159,23 @@ public Map<String, String> parseLine(String valueLine, List<String> expectedTagO
             throw new TribbleException.InvalidHeader("Unclosed quote in header line value " + valueLine);
         }
 
-        // validate the tags against the expected list
-        index = 0;
+        // Validate the order of all discovered tags against requiredTagOrder. All tags are treated as
+        // "optional". Succeeding does not mean that all expected tags in the list were seen. Also, all
+        // structured header lines can have "extra" tags, with no order specified, so additional tags
+        // are tolerated.
         if ( expectedTagOrder != null ) {
-            if (ret.keySet().isEmpty() && !expectedTagOrder.isEmpty()) {
-                throw new TribbleException.InvalidHeader("Header with no tags is not supported when there are expected tags in line " + valueLine);
-            }
-            for ( String str : ret.keySet() ) {
-                if (index < expectedTagOrder.size()) {
-                    if (!expectedTagOrder.get(index).equals(str)) {
-                        if (expectedTagOrder.contains(str)) {
-                            throw new TribbleException.InvalidHeader("Tag " + str + " in wrong order (was #" + (index+1) + ", expected #" + (expectedTagOrder.indexOf(str)+1) + ") in line " + valueLine);
-                        } else if (recommendedTags.contains(str)) {
-                            throw new TribbleException.InvalidHeader("Recommended tag " + str + " must be listed after all expected tags in line " + valueLine);
-                        }
-                        else {
-                            throw new TribbleException.InvalidHeader("Unexpected tag " + str + " in line " + valueLine);
-                        }
-                    }
+            index = 0;
+            for (String str : ret.keySet()) {
+                if (index >= expectedTagOrder.size()) {
+                    break; // done - end of requiredTagOrder list
+                } else if (!expectedTagOrder.get(index).equals(str)) {
+                    throw new TribbleException.InvalidHeader(
+                            String.format("Unexpected tag or tag order for tag \"%s\" in line %s", str, valueLine));
                 }
                 index++;
             }
         }
+
         return ret;
     }
 }
@@ -188,13 +183,9 @@ public Map<String, String> parseLine(String valueLine, List<String> expectedTagO
 class VCF3Parser implements VCFLineParser {
 
     @Override
-    public Map<String, String> parseLine(String valueLine, List<String> expectedTagOrder, List<String> recommendedTags) {
-        if (!recommendedTags.isEmpty()) {
-            throw new TribbleException.InternalCodecException("Recommended tags are not allowed in VCFv3.x");
-        }
-        
+    public Map<String, String> parseLine(String valueLine, List<String> expectedTagOrder) {
         // our return map
-        Map<String, String> ret = new LinkedHashMap<String, String>();
+        Map<String, String> ret = new LinkedHashMap<>();
 
         // a builder to store up characters as we go
         StringBuilder builder = new StringBuilder();
@@ -211,20 +202,34 @@ public Map<String, String> parseLine(String valueLine, List<String> expectedTagO
         for (char c: valueLine.toCharArray()) {
             switch (c) {
                 case ('\"') : inQuote = !inQuote; break; // a quote means we ignore ',' in our strings, keep track of it
-                case (',') : if (!inQuote) { ret.put(expectedTagOrder.get(tagIndex++),builder.toString()); builder = new StringBuilder(); break; } // drop the current key value to the return map
+                case (',') :
+                    if (!inQuote) {
+                        ret.put(expectedTagOrder.get(tagIndex++),builder.toString());
+                        builder = new StringBuilder();
+                        break;
+                    } // drop the current key value to the return map
                 default: builder.append(c); // otherwise simply append to the current string
             }
             index++;
         }
         ret.put(expectedTagOrder.get(tagIndex++),builder.toString());
         
-        // validate the tags against the expected list
+        // Validate that:
+        //      we have no more tags than are expected
+        //      the ones we have are in the expected list
+        //      they appear in the same order as in the expected list
+        // This does no checking for missing tags; all tags are treated as optional
+        //
         index = 0;
-        if (tagIndex != expectedTagOrder.size()) throw new IllegalArgumentException("Unexpected tag count " + tagIndex + ", we expected " + expectedTagOrder.size());
+        if (tagIndex != expectedTagOrder.size()) {
+            throw new IllegalArgumentException("Unexpected tag count " + tagIndex + ", we expected " + expectedTagOrder.size());
+        }
         for (String str : ret.keySet()){
-            if (!expectedTagOrder.get(index).equals(str)) throw new IllegalArgumentException("Unexpected tag " + str + " in string " + valueLine);
+            if (!expectedTagOrder.get(index).equals(str)) {
+                throw new IllegalArgumentException("Unexpected tag " + str + " in string " + valueLine);
+            }
             index++;
         }
         return ret;
     }
-}
+}
\ No newline at end of file
diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeaderLineType.java b/src/main/java/htsjdk/variant/vcf/VCFHeaderLineType.java
index 785449de89..88432f0b18 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFHeaderLineType.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFHeaderLineType.java
@@ -25,9 +25,37 @@
 
 package htsjdk.variant.vcf;
 
+import htsjdk.utils.ValidationUtils;
+
 /**
  * the type encodings we use for fields in VCF header lines
  */
 public enum VCFHeaderLineType {
-    Integer, Float, String, Character, Flag;
+    Integer,
+    Float,
+    String,
+    Character,
+    Flag;
+
+    /**
+     * Decode a header line count string and return the corresponding VCFHeaderLineCount enum value.
+     * If the value is not recognized as a valid constant, we assume the string represents a numeric
+     * value and return Integer. The caller should convert and validate the value.
+     *
+     * @param lineTypeString
+     * @return VCFHeaderLineType for {@code lineTypeString}
+     */
+    protected static VCFHeaderLineType decode(final String lineTypeString) {
+        ValidationUtils.nonNull(lineTypeString);
+        return VCFHeaderLineType.valueOf(lineTypeString);
+    }
+
+    /**
+     * Encode this line type as a string suitable for serialization to a VCF header. Note this is
+     * not version specific and defaults to VCFv42.
+     *
+     * The serialized encoding is the simple name of the enum constant
+     * @return string encoding of this line type
+     */
+    String encode() { return this.toString(); }
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeaderMerger.java b/src/main/java/htsjdk/variant/vcf/VCFHeaderMerger.java
new file mode 100644
index 0000000000..becbf64eb1
--- /dev/null
+++ b/src/main/java/htsjdk/variant/vcf/VCFHeaderMerger.java
@@ -0,0 +1,286 @@
+package htsjdk.variant.vcf;
+
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.SAMSequenceDictionaryUtils;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.tribble.TribbleException;
+import htsjdk.utils.ValidationUtils;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Class used to produce a set of header lines resulting from the merger of one or more input VCFHeaders.
+ * <p>
+ * The resulting lines have a version line matching the highest version of any of the input headers.
+ * <p>
+ * The headers to be merged must conform to certain requirements:
+ * Some headers sets cannot be merged, and will result in an exception being thrown:
+ * <ul>
+ * <li> Headers must have a version that is at least VCF v4.2. Headers from older versions may not be merged (note
+ * that older headers that are read from input files are automatically "converted" to VCF v4.2 by VCFCodec. See
+ * {@link AbstractVCFCodec#setVCFHeader(VCFHeader).}</li>
+ * <li> any header that contains a header line that doesn't conform to the resulting (highest )version of any
+ * header in the merge list </li>
+ * <li> any header that has a sequence dictionary that is incompatible with any other merged header's
+ * sequence dictionary. All headers must either share a common sequence dictionary, or have a sequence dictionary
+ * that is a subset of the common sequence dictionary that is taken from the remaining headers. </li>
+ * </ul>
+ */
+public class VCFHeaderMerger {
+
+    /**
+     * Merge all header lines in a set of headers into a single set of header lines. The resulting set includes
+     * all unique lines that appeared in any header; duplicates of lines are excluded from the result set. Equivalent
+     * header lines are reduced to a single representative header line. The resulting set contains a ##fileformat
+     * version line for the newest version seen in any of the headers provided in the input header collection,
+     * and all lines in the merged set are compatible with that version.
+     *
+     * @param headers the headers to merge
+     * @param emitWarnings true if warnings should be emitted
+     * @return a set of merged VCFHeaderLines
+     * @throws TribbleException if any header has a version < VCFv4.2, or if any header line in any
+     * input header is not compatible the newest version selected from amongst all headers provided, or if any
+     * header has a sequence dictionary that is incompatible with any other header's sequence dictionary
+     */
+    public static Set<VCFHeaderLine> getMergedHeaderLines(final Collection<VCFHeader> headers, final boolean emitWarnings) {
+        ValidationUtils.nonNull(headers, "headers");
+        ValidationUtils.validateArg(!headers.isEmpty(), "headers collection must be non empty");
+
+        // use a VCFMetaDataLines object to accumulate header lines
+        final VCFMetaDataLines mergedMetaData = new VCFMetaDataLines();
+        final HeaderMergeConflictWarnings conflictWarner = new HeaderMergeConflictWarnings(emitWarnings);
+
+        final VCFHeaderVersion newestVersion = getNewestHeaderVersion(headers);
+        final SAMSequenceDictionary commonSequenceDictionary = getCommonSequenceDictionaryOrThrow(headers, conflictWarner);
+
+        for (final VCFHeader sourceHeader : headers) {
+            for (final VCFHeaderLine line : sourceHeader.getMetaDataInSortedOrder()) {
+                final String key = line.getKey();
+                if (VCFHeaderVersion.isFormatString(key) || key.equals(VCFHeader.CONTIG_KEY)) {
+                    // drop all version and contig lines, and at the end we'll set the version and
+                    // commonSequenceDictionary
+                    continue;
+                }
+
+                // Structured header lines are only considered equal if they have identical key, id, and
+                // attribute/value pairs, but for merging we need to reduce lines that have the same key/id pairs
+                // but different attributes to a single line. So use the more permissive "findEquivalentHeaderLine"
+                // to detect equivalent lines, and delegate to the individual header line implementations to do the
+                // smart reconciliation.
+                final VCFHeaderLine other = mergedMetaData.findEquivalentHeaderLine(line);
+                if (other != null && !line.equals(other)) {
+                    if (key.equals(VCFConstants.FORMAT_HEADER_KEY)) {
+                        // Delegate to the FORMAT line resolver
+                        mergedMetaData.addMetaDataLine(
+                                VCFFormatHeaderLine.getMergedFormatHeaderLine(
+                                    (VCFFormatHeaderLine) line,
+                                    (VCFFormatHeaderLine) other,
+                                    conflictWarner)
+                        );
+                    } else if (key.equals(VCFConstants.INFO_HEADER_KEY)) {
+                        // Delegate to the INFO line resolver
+                        mergedMetaData.addMetaDataLine(
+                                VCFInfoHeaderLine.getMergedInfoHeaderLine(
+                                    (VCFInfoHeaderLine) line,
+                                    (VCFInfoHeaderLine) other,
+                                    conflictWarner)
+                        );
+                    } else if (line.isIDHeaderLine()) {
+                        // equivalent ID header line, but not a compound(format/info) line, and also not strictly equal
+                        // to the existing line: preserve the existing line (this *may* drop attributes/values if the
+                        // dropped line has additional attributes)
+                        conflictWarner.warn(
+                                String.format("Dropping duplicate header line %s during header merge, retaining equivalent line %s",
+                                        line,
+                                        other));
+                    } else {
+                        // a non-structured line with a duplicate key of an existing line, but a different value,
+                        // retain the new line in addition to the old one
+                        mergedMetaData.addMetaDataLine(line);
+                    }
+                } else {
+                    mergedMetaData.addMetaDataLine(line);
+                }
+            }
+        }
+        return makeMergedMetaDataSet(mergedMetaData, newestVersion, commonSequenceDictionary, conflictWarner);
+    }
+
+    // Create the final set of all of our merged header lines. Start with the version line for the new
+    // version, add in the lines from the merged set, use the resulting list to create a header, add the common
+    // sequence dictionary to that, and then extract and return the resulting set of lines in sorted order
+    private static Set<VCFHeaderLine> makeMergedMetaDataSet(
+            final VCFMetaDataLines mergedMetaData,
+            final VCFHeaderVersion newestVersion,
+            final SAMSequenceDictionary commonSequenceDictionary,
+            final HeaderMergeConflictWarnings conflictWarner) {
+
+        if (conflictWarner.emitWarnings) {
+            mergedMetaData.getValidationErrors(newestVersion)
+                    .forEach(validationError -> conflictWarner.warn(validationError.getFailureMessage()));
+        }
+
+        final Set<VCFHeaderLine> mergedLines = VCFHeader.makeHeaderVersionLineSet(newestVersion);
+        mergedLines.addAll(mergedMetaData.getMetaDataInInputOrder());
+        final VCFHeader mergedHeader = new VCFHeader(mergedLines, Collections.emptySet());
+        if (commonSequenceDictionary != null) {
+            mergedHeader.setSequenceDictionary(commonSequenceDictionary);
+        } else {
+            conflictWarner.warn(
+                    "The header lines resulting from a header merge contain no contig lines because none " +
+                            "of the input headers contains a sequence dictionary.");
+        }
+
+        return new LinkedHashSet<>(mergedHeader.getMetaDataInSortedOrder());
+    }
+
+    // Find the newest version af any header in the input set, and return that to use as the target
+    // version for the merged lines.
+    private static VCFHeaderVersion getNewestHeaderVersion(final Collection<VCFHeader> vcfHeaders) {
+        VCFHeaderVersion newestVersion = null;
+        for (final VCFHeader header : vcfHeaders) {
+            final VCFHeaderVersion vcfVersion = header.getVCFHeaderVersion();
+            if (!vcfVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_2)) {
+                throw new TribbleException(String.format(
+                        "Cannot merge a VCFHeader with version (%s) that is older than version %s",
+                        header.getVCFHeaderVersion(), VCFHeaderVersion.VCF4_2));
+            }
+            if (newestVersion == null || (vcfVersion.ordinal() > newestVersion.ordinal())) {
+                newestVersion = vcfVersion;
+            }
+        }
+        return newestVersion;
+    }
+
+    // Create a common sequence dictionary from the set of dictionaries in VCFHeaders. The headers must
+    // either have identical dictionaries, or contain a common superset dictionary where individual dictionaries
+    // contain a dictionary that is subset of that common superset. Otherwise throw.
+    private static SAMSequenceDictionary getCommonSequenceDictionaryOrThrow(
+            final Collection<VCFHeader> headers,
+            final HeaderMergeConflictWarnings conflictWarner) {
+        SAMSequenceDictionary candidateDictionary = null;
+
+        // Because we're doing pairwise comparisons and always selecting the best dictionary as
+        // our running candidate, we need to visit the headers in order of dictionary size
+        // (largest first). This prevents a premature failure where an individual pairwise
+        // comparison erroneously fails because the source is pairwise incompatible with the
+        // running candidate, and the common superset exists but we just haven't seen it yet.
+        final List<VCFHeader> headersByDictionarySize = new ArrayList<>(headers);
+        headersByDictionarySize.sort(((Comparator<VCFHeader>)
+                (hdr1, hdr2) -> Integer.compare(getDictionarySize(hdr1), getDictionarySize(hdr2))).reversed());
+
+        for ( final VCFHeader sourceHeader : headersByDictionarySize ) {
+            final SAMSequenceDictionary sourceDictionary = sourceHeader.getSequenceDictionary();
+            if (sourceDictionary != null) {
+                if (candidateDictionary == null) {
+                    candidateDictionary = sourceDictionary;
+                } else {
+                    // first, compare with checkContigOrdering on
+                    final SAMSequenceDictionaryUtils.SequenceDictionaryCompatibility compatibility =
+                            SAMSequenceDictionaryUtils.compareDictionaries(
+                                    candidateDictionary,
+                                    sourceDictionary,
+                                    true);
+                    switch (compatibility) {
+                        case IDENTICAL: // existing candidateDictionary is identical to sourceDictionary, so keep it
+                        case SUPERSET:  // existing candidateDictionary is a superset of sourceDictionary, so keep it
+                            break;
+
+                        case COMMON_SUBSET: // fall through
+                        case DIFFERENT_INDICES:
+                            // There exists a common subset of contigs, but for merging purposes we have a slightly
+                            // stricter requirement, that one dictionary is a superset of the other. So try the
+                            // comparison again with checkContigOrdering off, in both directions. If one is a
+                            // superset of the other, retain the superset.
+                            if (SAMSequenceDictionaryUtils.SequenceDictionaryCompatibility.SUPERSET ==
+                                    SAMSequenceDictionaryUtils.compareDictionaries(
+                                            candidateDictionary,
+                                            sourceDictionary,
+                                            false)) {
+                                break; // keep our candidate
+                            } else if (SAMSequenceDictionaryUtils.SequenceDictionaryCompatibility.SUPERSET ==
+                                    SAMSequenceDictionaryUtils.compareDictionaries(
+                                            sourceDictionary,
+                                            candidateDictionary,
+                                            false)) {
+                                candidateDictionary = sourceDictionary; // take the sourceDictionary as the new candidate
+                            } else {
+                                // dictionaries are disjoint, and we have no basis to choose a merge order for the
+                                // non-common contigs, so give up
+                                throw new TribbleException(
+                                        createHeaderDictionaryFailureMessage(
+                                                candidateDictionary, sourceHeader, sourceDictionary, compatibility));
+                            }
+                            break;
+
+                        case NO_COMMON_CONTIGS:              // no overlap between dictionaries
+                        case UNEQUAL_COMMON_CONTIGS:         // common subset has contigs that have the same name but different lengths
+                        case NON_CANONICAL_HUMAN_ORDER:      // human reference detected but the order of the contigs is non-standard (lexicographic, for example)
+                        case OUT_OF_ORDER:                   // the two dictionaries overlap but the overlapping contigs occur in different
+                        default:
+                            throw new TribbleException(
+                                    createHeaderDictionaryFailureMessage(
+                                            candidateDictionary, sourceHeader, sourceDictionary, compatibility));
+                    }
+                }
+            } else {
+                conflictWarner.warn(
+                        String.format(
+                                "Merging header with no sequence dictionary: %s",
+                                getHeaderFragmentForDisplay(sourceHeader)));
+            }
+        }
+        return candidateDictionary;
+    }
+
+    private static Integer getDictionarySize(final VCFHeader hdr) {
+        final SAMSequenceDictionary dictionary = hdr.getSequenceDictionary();
+        return dictionary == null ? 0 : dictionary.size();
+    }
+
+    private static String createHeaderDictionaryFailureMessage(
+            final SAMSequenceDictionary commonSequenceDictionary,
+            final VCFHeader sourceHeader,
+            final SAMSequenceDictionary sourceSequenceDictionary,
+            final SAMSequenceDictionaryUtils.SequenceDictionaryCompatibility failureReason) {
+        // return a nice long message that includes as much of the offending context as is reasonable,
+        // without printing the entire context, since the headers and sequence dictionaries can have
+        // thousands of entries
+        return String.format(
+                "Can't merge VCF headers with incompatible sequence dictionaries, merge failed due to %s:" +
+                        "\n\nHeader dictionary:\n\n%1.2000s\n\nis incompatible with the common dictionary:\n\n%1.2000s\n\n merging VCF header:\n\n%1.2000s\n",
+                failureReason,
+                sourceSequenceDictionary.getSequences().stream().map(SAMSequenceRecord::toString).collect(Collectors.joining("\n")),
+                commonSequenceDictionary.getSequences().stream().map(SAMSequenceRecord::toString).collect(Collectors.joining("\n")),
+                getHeaderFragmentForDisplay(sourceHeader));
+    }
+
+    private static String getHeaderFragmentForDisplay(final VCFHeader sourceHeader) {
+        return sourceHeader.getContigLines().stream().map(VCFContigHeaderLine::toString).collect(Collectors.joining("\n"));
+    }
+
+    /** Only displays a warning if warnings are enabled and an identical warning hasn't been already issued */
+    static final class HeaderMergeConflictWarnings {
+        boolean emitWarnings;
+        final Set<String> alreadyIssued = new HashSet<>();
+
+        protected HeaderMergeConflictWarnings(final boolean emitWarnings ) {
+            this.emitWarnings = emitWarnings;
+        }
+
+        public void warn(final String msg) {
+            if ( emitWarnings && ! alreadyIssued.contains(msg) ) {
+                alreadyIssued.add(msg);
+                VCFHeader.logger.warn(msg);
+            }
+        }
+    }
+}
diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeaderVersion.java b/src/main/java/htsjdk/variant/vcf/VCFHeaderVersion.java
index 43f43c65c3..ce5ed1920a 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFHeaderVersion.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFHeaderVersion.java
@@ -26,6 +26,7 @@
 package htsjdk.variant.vcf;
 
 import htsjdk.tribble.TribbleException;
+import htsjdk.utils.ValidationUtils;
 
 /**
  * information that identifies each header version
@@ -47,7 +48,7 @@ public enum VCFHeaderVersion {
      * @param vString the version string
      * @param fString the format string
      */
-    VCFHeaderVersion(String vString, String fString) {
+     VCFHeaderVersion(String vString, String fString) {
         this.versionString = vString;
         this.formatString = fString;
     }
@@ -67,7 +68,8 @@ public static VCFHeaderVersion toHeaderVersion(String version) {
 
     /**
      * are we a valid version string of some type
-     * @param version the version string
+     * @param version the version string (the part of the header line that specifies the version,
+     *               i.e., "VCFv4.3" if the line is "##fileformat=VCFv4.3")
      * @return true if we're valid of some type, false otherwise
      */
     public static boolean isVersionString(String version){
@@ -75,7 +77,8 @@ public static boolean isVersionString(String version){
     }
 
     /**
-     * are we a valid format string for some type
+     * are we a valid format string for some type (the key part of the header line that specifies a version,
+     *               i.e., "fileformat" if the line is "##fileformat=VCFv4.3")
      * @param format the format string
      * @return true if we're valid of some type, false otherwise
      */
@@ -87,8 +90,16 @@ public static boolean isFormatString(String format){
         return false;
     }
 
-    public static VCFHeaderVersion getHeaderVersion(String versionLine) {
-        String[] lineFields = versionLine.split("=");
+    /**
+     *
+     * @param versionLine a VCF header version line, including the leading meta data indicator,
+     *                    for example "##fileformat=VCFv4.2"
+     * @return the VCFHeaderVersion for this string
+     * @throws TribbleException.InvalidHeader if the string is not a version string for a recognized supported version
+     */
+    public static VCFHeaderVersion fromHeaderVersionLine(final String versionLine) {
+        ValidationUtils.nonNull(versionLine, "version line");
+        final String[] lineFields = versionLine.split("=");
         if ( lineFields.length != 2 || !isFormatString(lineFields[0].substring(2)) )
             throw new TribbleException.InvalidHeader(versionLine + " is not a valid VCF version line");
 
@@ -98,6 +109,13 @@ public static VCFHeaderVersion getHeaderVersion(String versionLine) {
         return toHeaderVersion(lineFields[1]);
     }
 
+    /**
+     * @return A VCF "##fileformat=version" metadata string for the supplied version.
+     */
+    public String toHeaderVersionLine() {
+        return String.format("%s%s=%s", VCFHeader.METADATA_INDICATOR, getFormatString(), getVersionString());
+    }
+
     /**
      * Utility function to clean up a VCF header string
      * 
@@ -118,6 +136,20 @@ public boolean isAtLeastAsRecentAs(final VCFHeaderVersion target) {
         return this.ordinal() >= target.ordinal();
     }
 
+    /**
+     * Determine if two header versions are compatible (header lines from these versions are interchangeable).
+     * For now, the only incompatibility is between V4.3 and any other version. All other version combinations
+     * are compatible.
+     * @param v1 first version to compare
+     * @param v2 scond version to compare
+     * @return true if the versions are compatible
+     */
+    //TODO: this method can be removed once this is rebased on the vcf4.3 writing branch
+    public static boolean versionsAreCompatible(final VCFHeaderVersion v1, final VCFHeaderVersion v2) {
+        return v1.equals(v2) ||
+                (!v1.isAtLeastAsRecentAs(VCF4_3) && !v2.isAtLeastAsRecentAs(VCF4_3));
+    }
+
     public String getVersionString() {
         return versionString;
     }
@@ -125,4 +157,5 @@ public String getVersionString() {
     public String getFormatString() {
         return formatString;
     }
+
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFInfoHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFInfoHeaderLine.java
index 13df34bc87..12a29a1f6c 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFInfoHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFInfoHeaderLine.java
@@ -26,44 +26,90 @@
 package htsjdk.variant.vcf;
 
 
+import htsjdk.samtools.util.Log;
+import htsjdk.utils.ValidationUtils;
+
 /**
- * @author ebanks
  *         <p>
  *         Class VCFInfoHeaderLine
  *         </p>
  *         <p>
- *         A class representing a key=value entry for INFO fields in the VCF header
+ *         A class representing an INFO field in the VCF header
  *         </p>
  */
 public class VCFInfoHeaderLine extends VCFCompoundHeaderLine {
-    public VCFInfoHeaderLine(String name, int count, VCFHeaderLineType type, String description) {
-        super(name, count, type, description, SupportedHeaderLineType.INFO);
-    }
+    private static final long serialVersionUID = 1L;
+
+    protected final static Log logger = Log.getInstance(VCFFormatHeaderLine.class);
 
     public VCFInfoHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
-        super(name, count, type, description, SupportedHeaderLineType.INFO);
+        super(VCFConstants.INFO_HEADER_KEY, name, count, type, description);
+    }
+
+    public VCFInfoHeaderLine(String name, int count, VCFHeaderLineType type, String description) {
+        super(VCFConstants.INFO_HEADER_KEY, name, count, type, description);
     }
 
     public VCFInfoHeaderLine(String name, int count, VCFHeaderLineType type, String description, String source, String version) {
-        super(name, count, type, description, SupportedHeaderLineType.INFO, source, version);
+        super(VCFConstants.INFO_HEADER_KEY, name, count, type, description);
+        this.updateGenericField(SOURCE_ATTRIBUTE, source);
+        this.updateGenericField(VERSION_ATTRIBUTE, version);
     }
 
     public VCFInfoHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description, String source, String version) {
-        super(name, count, type, description, SupportedHeaderLineType.INFO, source, version);
+        super(VCFConstants.INFO_HEADER_KEY, name, count, type, description);
+        this.updateGenericField(SOURCE_ATTRIBUTE, source);
+        this.updateGenericField(VERSION_ATTRIBUTE, version);
     }
 
     public VCFInfoHeaderLine(String line, VCFHeaderVersion version) {
-        super(line, version, SupportedHeaderLineType.INFO);
+        super(VCFConstants.INFO_HEADER_KEY,
+              VCFHeaderLineTranslator.parseLine(version, line, expectedTagOrder),
+              version
+        );
+        validateForVersion(version);
     }
 
-    // info fields allow flag values
-    @Override
-    boolean allowFlagValues() {
-        return true;
+    /**
+     * Compare two VCFInfoHeaderLine objects to determine if they have compatible number types, and return a
+     * VCFInfoHeaderLine that represents the result of merging these two lines.
+     *
+     * @param infoLine1 first info line to merge
+     * @param infoLine2 second info line to merge
+     * @param conflictWarner conflict warning emitter
+     * @return a merged VCFInfoHeaderLine
+     */
+    public static VCFInfoHeaderLine getMergedInfoHeaderLine(
+            final VCFInfoHeaderLine infoLine1,
+            final VCFInfoHeaderLine infoLine2,
+            final VCFHeaderMerger.HeaderMergeConflictWarnings conflictWarner)
+    {
+        ValidationUtils. nonNull(infoLine1);
+        ValidationUtils. nonNull(infoLine2);
+        ValidationUtils. nonNull(conflictWarner);
+
+        // delegate to the generic VCFCompoundHeaderLine merger, passing a resolver lambda
+        return VCFCompoundHeaderLine.getMergedCompoundHeaderLine(
+                infoLine1,
+                infoLine2,
+                conflictWarner,
+                (l1, l2) -> new VCFInfoHeaderLine(
+                        l1.getID(),
+                        VCFHeaderLineCount.UNBOUNDED,
+                        l1.getType(),
+                        l1.getDescription())
+        );
     }
 
     @Override
     public boolean shouldBeAddedToDictionary() {
         return true;
     }
+
+    @Override
+    //TODO: integrate this with the existing validateKeyOrID method
+    protected boolean validHeaderID(final String id) {
+        return super.validHeaderID(id) || id.equals(VCFConstants.THOUSAND_GENOMES_KEY);
+    }
+
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFMetaDataLines.java b/src/main/java/htsjdk/variant/vcf/VCFMetaDataLines.java
new file mode 100644
index 0000000000..843fdf98cc
--- /dev/null
+++ b/src/main/java/htsjdk/variant/vcf/VCFMetaDataLines.java
@@ -0,0 +1,525 @@
+package htsjdk.variant.vcf;
+
+import htsjdk.annotations.InternalAPI;
+import htsjdk.samtools.util.Log;
+import htsjdk.tribble.TribbleException;
+import htsjdk.utils.ValidationUtils;
+
+import java.io.Serializable;
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ * Class for managing the set of VCFHeaderLines maintained by a VCFHeader.
+ *
+ * Since this class is used to incrementally build up a set of header lines for use with a VCFHeader,
+ * it does not require that the list always contain a fileformat line (its VCFHeader's job to enforce
+ * that condition).
+ *
+ * This class maintains several invariants:
+ *
+ *  - The list keeps track of the "current version" by tracking whether a version line (a line that
+ *    establishes the VCFHeaderVersion, such as format/fileformat line) is contained in the list. If
+ *    no version line has been added, the list will have a null current version, and contain 0 version
+ *    lines. If a version line has been added, it will have a non-null version, and contain 1 version line.
+ *    If the version line is manually removed, the "current version" is reset to null.
+ *
+ *  - Each contig line that is retained is guaranteed to have a unique contig index. This does
+ *    NOT guarantee that the contig indices are contiguous, or ordered, only that they are unique.
+ *
+ *  - Each structured (ID) line for a given key will have a unique ID. Any new line that has the same
+ *    key/ID pair as an existing line will replace the previous line. (Previous htsjdk implementations
+ *    preserve such lines in a master line list, but would silently drop them from the typed
+ *    lookup lists, so such duplicates would never be returned in queries for typed lines such as
+ *    getInfoHeaderLines(), but would still be serialized on write.)
+ *
+ *    This class does NOT validate that the lines contained are valid for the current version (that is
+ *    the caller's responsibilty).
+ */
+//Visible to allow disq Kryo registration for serialization
+@InternalAPI
+final class VCFMetaDataLines implements Serializable {
+    public static final long serialVersionUID = 1L;
+    protected final static Log logger = Log.getInstance(VCFMetaDataLines.class);
+
+    // Master map of all header lines (including file format version lines and contig header lines)
+    private final Map<HeaderLineMapKey, VCFHeaderLine> mMetaData = new LinkedHashMap<>();
+
+    // Map of contig index to contig header line. Must be kept in sync with the mMetaData map
+    private final Map<Integer, VCFContigHeaderLine> contigIndexMap = new LinkedHashMap<>();
+
+    // Current version for lines included in the list. May be null. Must be kept in sync with the
+    // contents of the mMetaData map.
+    private VCFHeaderVersion vcfVersion;
+
+    /**
+     * Add all metadata lines from Set. If a duplicate line is encountered (duplicate content for
+     * unstructured lines with identical keys, or duplicate key/ID pair for structured lines), only
+     * the new line will be retained.
+     *
+     * @param newMetaData Set of lines to be added to the list.
+     * @throws IllegalArgumentException if a version is established or if any line fails validation for that version
+     */
+    public void addMetaDataLines(final Set<VCFHeaderLine> newMetaData) {
+        newMetaData.forEach(this::addMetaDataLine);
+    }
+
+    /**
+     * Add a metadata line to the list. If a duplicate line is encountered (duplicate content for
+     * unstructured lines with identical keys, or duplicate key/ID pair for structured lines), only
+     * the newest line will be retained.
+     *
+     * @param newMetaDataLine header line to attempt to add
+     * @returns an existing (equivalent) header line that was replaced by newMetaDataLine, if any,
+     * otherwise null
+     */
+    public VCFHeaderLine addMetaDataLine(final VCFHeaderLine newMetaDataLine) {
+        ValidationUtils.nonNull(newMetaDataLine, "metadata line");
+
+        if (VCFHeaderVersion.isFormatString(newMetaDataLine.getKey())) {
+            // for format lines, we need to remove any existing format line (which may have a different key
+            // than the new line, since old VCF versions use a different format key than modern versions)
+            return updateVersion(newMetaDataLine);
+        } else {
+            // otherwise, see if there is an equivalent line that the new line will replace
+            final HeaderLineMapKey newMapKey = makeKeyForLine(newMetaDataLine);
+            final VCFHeaderLine equivalentMetaDataLine = mMetaData.get(newMapKey);
+            if (equivalentMetaDataLine == null) {
+                createNewMapEntry(newMapKey, newMetaDataLine);
+            } else {
+                replaceExistingMapEntry(newMapKey, equivalentMetaDataLine, newMetaDataLine);
+            }
+            return equivalentMetaDataLine;
+        }
+    }
+
+    /**
+     * Remove a metadata line from the list. This is the inverse of addMetaDataLine - it removes a
+     * line that has an identical key and value as lineToRemove if lineToRemove is an unstructured (non-ID)
+     * but if lineToRemove is a structured line, it will remove the line that has the same key/ID pair as
+     * lineToRemove, regardless of other content.
+     *
+     * The removed value is returned, and can be used by the caller to determine if the removed line has a
+     * different value than the line presented.
+     *
+     * @param lineToRemove the header line to remove
+     * @return The actual headerline removed, or null of no equivalent headerline was found to remove
+     */
+    public VCFHeaderLine removeMetaDataLine(final VCFHeaderLine lineToRemove) {
+        final VCFHeaderLine removedLine = mMetaData.remove(makeKeyForLine(lineToRemove));
+        if (removedLine != null) {
+            // only synchronize the dependent version and contig map variables if a line was ACTUALLY removed
+            if (VCFHeaderVersion.isFormatString(removedLine.getKey())) {
+                vcfVersion = null;
+            } else if (lineToRemove.isIDHeaderLine() && lineToRemove.getKey().equals(VCFHeader.CONTIG_KEY)) {
+                removeFromContigIndexMap((VCFContigHeaderLine) lineToRemove);
+            }
+        }
+        return removedLine;
+    }
+
+    /**
+     * @return the version for any contained version line. may be null if no file format version
+     * line is in the list
+     */
+    public VCFHeaderVersion getVCFVersion() {
+        return vcfVersion;
+    }
+
+    /**
+     * Return the existing line from the list that is "equivalent" to the query line, where
+     * equivalent is defined as having the same key and value for unstructured header lines, or the
+     * same key and ID, but not necessarily the same value (for structured header lines). The
+     * "equivalent" line returned by this method is not guaranteed to be equal to the queryLine,
+     * in the case where the queryLine is an ID line.
+     *
+     * The method is a way to ask "if the queryLine were added to this object via addMetaDataLine, what
+     * line, if any, would it replace".
+     *
+     * @param queryLine the source line to use to check for equivalents
+     * @return The existing header line of the type/key provided, otherwise NULL.
+     */
+    public VCFHeaderLine findEquivalentHeaderLine(final VCFHeaderLine queryLine) {
+        return mMetaData.get(makeKeyForLine(queryLine));
+    }
+
+    /**
+     * Validate all metadata lines except the file format line against a target version.
+     * Throws {@link TribbleException.VersionValidationFailure} if any line is incompatible with the given version.
+     * @param targetVersion the target version to validate against
+     * @throws TribbleException if any existing line fails to validate against {@code targetVersion}
+     */
+    //TODO: we need to tell users how to resolve the case where this fails due to version validation
+    //i.e, use a custom upgrade tool
+    public void validateMetaDataLines(final VCFHeaderVersion targetVersion) {
+        mMetaData.values().forEach(headerLine -> {
+            if (!VCFHeaderVersion.isFormatString(headerLine.getKey())) {
+                headerLine.validateForVersion(targetVersion);
+            }
+        });
+    }
+
+    /**
+     * Get a list of validation failures for all metadata lines (except the file format line) against
+     * a target version.
+     *
+     * @param targetVersion the target version to validate against
+     * @return an Collection<VCFValidationFailure> describing the lines that failed to validate
+     * incompatible with targetVersion. The collections is empty if validation succeeded for all lines.
+     */
+    public Collection<VCFValidationFailure> getValidationErrors(final VCFHeaderVersion targetVersion) {
+        return mMetaData.values().stream()
+                .filter(line -> !VCFHeaderVersion.isFormatString(line.getKey()))
+                .map(l -> l.getValidationFailure(targetVersion))
+                .filter(o -> o.isPresent())
+                .map(o -> o.get())
+                .collect(Collectors.toList());
+    }
+
+    /**
+     * get the meta data, associated with this header, in input order
+     *
+     * @return a set of the meta data
+     */
+    public Set<VCFHeaderLine> getMetaDataInInputOrder() {
+        return Collections.unmodifiableSet(new LinkedHashSet<>(mMetaData.values()));
+    }
+
+    /**
+     * get the meta data, associated with this header, in SORTED order
+     *
+     * @return a set of the meta data
+     */
+    public Set<VCFHeaderLine> getMetaDataInSortedOrder() {
+        // Use an intermediate TreeSet to get the correct sort order (via the header line
+        // comparators), but return an (unmodifiable) LinkedHashSet because TreeSet has a
+        // `contains` implementation based on comparator equality that can lead to inconsistent
+        // results for header line types like VCFContigHeaderLine that have a compareTo
+        // implementation that is inconsistent with equals.
+        return Collections.unmodifiableSet(new LinkedHashSet<>(new TreeSet<>(mMetaData.values())));
+    }
+
+    /**
+     * @return all of the structured (ID) lines in their original file order, or an empty list if none were present
+     */
+    public List<VCFSimpleHeaderLine> getIDHeaderLines() {
+        return mMetaData.values().stream()
+                .filter(VCFHeaderLine::isIDHeaderLine)
+                .map(hl -> (VCFSimpleHeaderLine) hl)
+                .collect(Collectors.toCollection(ArrayList::new));
+    }
+
+    /**
+     * @return all of the VCF FILTER lines in their original file order, or an empty list if none were present
+     */
+    public List<VCFFilterHeaderLine> getFilterLines() {
+        return mMetaData.values().stream()
+                .filter(hl -> hl.getKey().equals(VCFConstants.FILTER_HEADER_KEY))
+                .map(hl -> (VCFFilterHeaderLine) hl)
+                .collect(Collectors.toCollection(ArrayList::new));
+    }
+
+    /**
+     * @return all of the VCF header lines of the ##contig form in SORTED order, or an empty list if none were present
+     */
+    public List<VCFContigHeaderLine> getContigLines() {
+        return Collections.unmodifiableList(new ArrayList<>(new TreeSet<>(contigIndexMap.values())));
+    }
+
+    /**
+     * Get the VCFHeaderLine(s) whose key equals key.  Returns null if no such line exists
+     * @param key the VCFHeaderLine key to use to locate the headerline
+     * @return collection of VCFHeaderLine
+     */
+    public Collection<VCFHeaderLine> getMetaDataLines(final String key) {
+        return mMetaData.values().stream()
+                .filter(hl -> hl.getKey().equals(key)).collect(Collectors.toList());
+    }
+
+    /**
+     * Returns the INFO VCFHeaderLine in their original ordering
+     */
+    public Collection<VCFInfoHeaderLine> getInfoHeaderLines() {
+        return mMetaData.values().stream()
+                .filter(hl -> hl.getKey().equals(VCFConstants.INFO_HEADER_KEY))
+                .map(hl -> (VCFInfoHeaderLine) hl)
+                .collect(Collectors.toCollection(ArrayList::new));
+    }
+
+    /**
+     * Returns the FORMAT VCFHeaderLine in their original ordering
+     */
+    public Collection<VCFFormatHeaderLine> getFormatHeaderLines() {
+        return mMetaData.values().stream()
+                .filter(hl -> hl.getKey().equals(VCFConstants.FORMAT_HEADER_KEY))
+                .map(hl -> (VCFFormatHeaderLine) hl)
+                .collect(Collectors.toCollection(ArrayList::new));
+    }
+
+    /**
+     * @param id the id of the requested header line
+     * @return the VCFHeaderLine info line, or null if there is none
+     */
+    public VCFInfoHeaderLine getInfoHeaderLine(final String id) {
+        return (VCFInfoHeaderLine) mMetaData.get(makeKey(VCFConstants.INFO_HEADER_KEY, id));
+    }
+
+    /**
+     * @param id the id of the requested header format line
+     * @return the meta data line, or null if there is none
+     */
+    public VCFFormatHeaderLine getFormatHeaderLine(final String id) {
+        return (VCFFormatHeaderLine) mMetaData.get(makeKey(VCFConstants.FORMAT_HEADER_KEY, id));
+    }
+
+    /**
+     * @param id the id of the requested header line
+     * @return the meta data line, or null if there is none
+     */
+    public VCFFilterHeaderLine getFilterHeaderLine(final String id) {
+        return (VCFFilterHeaderLine) mMetaData.get(makeKey(VCFConstants.FILTER_HEADER_KEY, id));
+    }
+
+    /**
+     * Returns the other VCFHeaderLines in their original ordering, where "other" means any
+     * VCFHeaderLine that is not a contig, info, format or filter header line.
+     */
+    public Collection<VCFHeaderLine> getOtherHeaderLines() {
+        return mMetaData.values().stream().filter(
+            hl ->
+                !hl.getKey().equals(VCFConstants.CONTIG_HEADER_KEY) &&
+                !hl.getKey().equals(VCFConstants.INFO_HEADER_KEY) &&
+                !hl.getKey().equals(VCFConstants.FILTER_HEADER_KEY) &&
+                !hl.getKey().equals(VCFConstants.FORMAT_HEADER_KEY)
+        )
+        .collect(Collectors.toCollection(ArrayList::new));
+    }
+
+    /**
+     * The version/fileformat header line if one exists, otherwise null.
+     * @return The version/fileformat header line if one exists, otherwise null.
+     */
+    public VCFHeaderLine getFileFormatLine() {
+        // find any existing version line(s). since there are multiple possible keys that
+        // represent version lines (old V3 specs used "format" instead of "fileformat")
+        final List<VCFHeaderLine> existingVersionLines = mMetaData.values()
+                .stream()
+                .filter(line -> VCFHeaderVersion.isFormatString(line.getKey()))
+                .collect(Collectors.toList());
+
+        // This class doesn't mandate that the list it maintains always contains a fileformat line
+        // (its VCFHeader's job to maintain that condition for the header).
+        if (!existingVersionLines.isEmpty()) {
+            if (existingVersionLines.size() > 1) {
+                throw new IllegalStateException(
+                        String.format("The metadata lines class contains more than one version line (%s)",
+                                existingVersionLines.stream()
+                                        .map(VCFHeaderLine::toString)
+                                        .collect(Collectors.joining(","))));
+            }
+            return existingVersionLines.get(0);
+        } else {
+            return null;
+        }
+    }
+
+    @Override
+    public String toString() {
+        final StringBuilder b = new StringBuilder();
+        b.append("[VCFMetaDataLines:");
+        for ( final VCFHeaderLine line : mMetaData.values() )
+            b.append("\n\t").append(line);
+        return b.append("\n]").toString();
+    }
+
+    @Override
+    public boolean equals(final Object o) {
+        if (this == o) return true;
+        if (!(o instanceof VCFMetaDataLines)) return false;
+
+        final VCFMetaDataLines that = (VCFMetaDataLines) o;
+
+        return mMetaData.equals(that.mMetaData);
+    }
+
+    @Override
+    public int hashCode() {
+        return mMetaData.hashCode();
+    }
+
+    /**
+     * Generate a unique key for a VCFHeaderLine. If the header line is a VCFStructuredHeaderLine, the key
+     * is the concatenation of the VCFHeaderLine's key (i.e., the type of the VCFHeaderLine) and the ID for
+     * that VCFHeaderLine (with a ":" separator). Otherwise, we use the concatenation of the OTHER_KEY, the
+     * VCFHeaderLine's key, and a nonce value to ensure that unstructured lines never collide with structured
+     * lines, and also can have duplicate identical instances.
+     *
+     * @param headerLine the {@link VCFHeaderLine} for which a key should be returned
+     * @return the generated HeaderLineMapKey
+     */
+    private HeaderLineMapKey makeKeyForLine(final VCFHeaderLine headerLine) {
+        if (headerLine.isIDHeaderLine()) {
+            // these are required to have a unique ID, so use the line key as the key, and the id as the constraint
+            return makeKey(headerLine.getKey(), headerLine.getID());
+        } else {
+            // Allow duplicate unstructured "other" keys, as long as they have different values. Use
+            // the line key as the key, and the line hashcode as the constraint.
+            //
+            // The previous implementation dropped duplicate keys for unstructured lines, but the spec doesn't
+            // require these to be unique (only to have unique values). This implementation is more permissive in
+            // that it allows lines with duplicate keys to accumulate as long as they have different values, but
+            // retains only one with a unique value.
+            return makeKey(headerLine.getKey(), Integer.toString(headerLine.hashCode()));
+        }
+    }
+
+    // Create a VCFHeaderLine hashmap key given a key and an id
+    private HeaderLineMapKey makeKey(final String nameSpace, final String id) { return new HeaderLineMapKey(nameSpace, id); }
+
+    private void createNewMapEntry(final HeaderLineMapKey newMapKey, final VCFHeaderLine newMetaDataLine) {
+        // for creation of a new entry, call updateMapEntry, but validate that it ALWAYS returns the
+        final VCFHeaderLine existingLine = updateMapEntry(newMapKey, newMetaDataLine);
+        if (existingLine != null ) {
+            throw new TribbleException(String.format(
+                    "Internal header synchronization error - found unexpected previous value %s while adding %s",
+                    existingLine,
+                    newMetaDataLine));
+        }
+    }
+
+    private VCFHeaderLine updateMapEntry(final HeaderLineMapKey newMapKey, final VCFHeaderLine newMetaDataLine) {
+        final VCFHeaderLine existingLine = mMetaData.put(newMapKey, newMetaDataLine);
+        if (newMetaDataLine.isIDHeaderLine() && newMetaDataLine.getKey().equals(VCFHeader.CONTIG_KEY)) {
+            addToContigIndexMap((VCFContigHeaderLine) newMetaDataLine);
+        }
+        return existingLine;
+    }
+
+    // We can't just blindly replace a line in the map based on the key using map.put, because the contig
+    // map will get out of sync if the line being replaced is a contig line that has a different contig
+    // index than the line being replaced. So replace the line in two atomic operations; first remove
+    // the old line and it's corresponding contig index entry, then add the new contig line and it's
+    // corresponding contig index entry.
+    private VCFHeaderLine replaceExistingMapEntry(
+            final HeaderLineMapKey newMapKey,
+            final VCFHeaderLine existingMetaDataLine,
+            final VCFHeaderLine newMetaDataLine) {
+        removeFromMapOrThrow(existingMetaDataLine);
+        logger.debug(() ->
+             "Replacing existing header metadata line: " +
+                existingMetaDataLine.toStringEncoding() +
+                " with header metadata line: " +
+                newMetaDataLine.toStringEncoding() +
+                ".");
+        createNewMapEntry(newMapKey, newMetaDataLine);
+        return existingMetaDataLine;
+    }
+
+    // remove a line that is expected to be  currently in the list, and throw if the line
+    // isn't found, or if the removed line is different (not equal to) the line to remove
+    private void removeFromMapOrThrow(final VCFHeaderLine lineToRemove) {
+        final VCFHeaderLine removedLine = removeMetaDataLine(lineToRemove);
+        if (removedLine == null || !removedLine.equals(lineToRemove)) {
+            // sanity check since in this case there should ALWAYS be a non-null line that was removed
+            // that is an exact duplicate of the "existingLine"
+            throw new TribbleException(String.format("Internal header synchronization error %s/%s",
+                    lineToRemove,
+                    removedLine == null ? "null line" : removedLine));
+        }
+    }
+
+    //add the new line to our contig index map
+    private void addToContigIndexMap(final VCFContigHeaderLine newContigLine) {
+        final VCFContigHeaderLine collidingContigLine = contigIndexMap.get(newContigLine.getContigIndex());
+        if (collidingContigLine != null && !collidingContigLine.equals(newContigLine)) {
+            if (collidingContigLine.getID().equals(newContigLine.getID())) {
+                // the new line has the same contig ID and index as an existing line, but differ in
+                // some other attribute, so accept it but log a warning
+                logger.warn(String.format(
+                        "Replacing an existing contig header line (%s) with a new, similar line that has different attributes (%s)",
+                        collidingContigLine,
+                        newContigLine));
+            } else {
+                // the new contig line collides with an existing contig index, but specifies a different
+                // contig name, so reject it
+                throw new TribbleException(String.format(
+                        "Attempt to replace a contig header line (%s) that has the same contig index as an existing line (%s)",
+                        newContigLine,
+                        collidingContigLine));
+            }
+        }
+        contigIndexMap.put(newContigLine.getContigIndex(), newContigLine);
+    }
+
+    // remove the contig header line from the contig index map
+    private void removeFromContigIndexMap(final VCFContigHeaderLine existingContigLine) {
+        // this remove overload only removes the specified object if its actually in the map
+        contigIndexMap.remove(existingContigLine.getContigIndex(), existingContigLine);
+    }
+
+    // First, check for existing header lines that establish a header version. Whenever a new one is
+    // added, we need to remove the previous version line, validate all remaining lines against the new
+    // version,  then add the new version line, and update our version state. We have to explicitly
+    // call isFormatString, and manually update the lines, since there is more than one header line key
+    // that can change the version. In some cases this will result in removing a line fileformat/version
+    // line with one key and replacing it with a line that has a different key.
+    private final VCFHeaderLine updateVersion(final VCFHeaderLine newMetaDataLine) {
+        ValidationUtils.validateArg(
+                VCFHeaderVersion.isFormatString(newMetaDataLine.getKey()),
+                "a file format line is required");
+
+        final VCFHeaderLine currentVersionLine = getFileFormatLine();
+        final VCFHeaderVersion newVCFVersion = VCFHeaderVersion.toHeaderVersion(newMetaDataLine.getValue());
+
+        if (vcfVersion == null) {
+            logger.debug("Establishing header metadata version ", newVCFVersion);
+        } else if (!newVCFVersion.equals(vcfVersion)) {
+            logger.debug(() ->
+                    "Updating header metadata version from " +
+                    vcfVersion +
+                    " to " +
+                    newVCFVersion);
+            removeFromMapOrThrow(currentVersionLine);
+        }
+
+        mMetaData.put(makeKeyForLine(newMetaDataLine), newMetaDataLine);
+        vcfVersion = newVCFVersion;
+        return currentVersionLine;
+    }
+
+    // composite keys used by the metadata lines map
+    private static class HeaderLineMapKey implements Serializable {
+        public static final long serialVersionUID = 1L;
+
+        final String key;
+        final String constraint;
+
+        public HeaderLineMapKey(final String key, final String constraint) {
+            this.key = key;
+            this.constraint = constraint;
+        }
+
+        public final String getKey() { return key; }
+        public final String getConstraint() { return constraint; }
+
+        @Override
+        public boolean equals(final Object o) {
+            if (this == o) return true;
+            if (o == null || getClass() != o.getClass()) return false;
+
+            final HeaderLineMapKey that = (HeaderLineMapKey) o;
+
+            if (!key.equals(that.key)) return false;
+            return constraint.equals(that.constraint);
+        }
+
+        @Override
+        public int hashCode() {
+            int result = key.hashCode();
+            result = 31 * result + constraint.hashCode();
+            return result;
+        }
+    }
+
+}
+
diff --git a/src/main/java/htsjdk/variant/vcf/VCFMetaHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFMetaHeaderLine.java
index 991faa806f..d8cd83b8bb 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFMetaHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFMetaHeaderLine.java
@@ -1,13 +1,41 @@
 package htsjdk.variant.vcf;
 
+import java.util.Map;
+import java.util.Optional;
+
 /**
- * A class representing META fields in the VCF header
+ * A class representing META fields in the VCF header.
  */
 public class VCFMetaHeaderLine extends VCFSimpleHeaderLine {
     private static final long serialVersionUID = 1L;
 
     public VCFMetaHeaderLine(final String line, final VCFHeaderVersion version) {
-        super(VCFConstants.META_HEADER_KEY, VCFHeaderLineTranslator.parseLine(version, line, null));
+        // We need to use the V4 parser directly, since the V3 parser requires ALL permissible/expected
+        // tags to be supplied, which is inconsistent with modern structured header lines that allow
+        // other tags. So let validateForVersion detect any version incompatibility, ie., if this is ever
+        // called with a V3 version.
+        super(VCFConstants.META_HEADER_KEY, new VCF4Parser().parseLine(line, expectedTagOrder));
+        validateForVersion(version);
+    }
+
+    public VCFMetaHeaderLine(final Map<String, String> mapping) {
+        super(VCFConstants.META_HEADER_KEY, mapping);
+    }
+
+    @Override
+    public Optional<VCFValidationFailure<VCFHeaderLine>> getValidationFailure(final VCFHeaderVersion vcfTargetVersion) {
+        if (!vcfTargetVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) {
+            return Optional.of(
+                    new VCFValidationFailure<>(
+                            vcfTargetVersion,
+                            this,
+                        String.format("%s header lines are not allowed in VCF version %s headers",
+                        getKey(),
+                        vcfTargetVersion
+                )));
+        }
+
+        return super.getValidationFailure(vcfTargetVersion);
     }
 
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFPedigreeHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFPedigreeHeaderLine.java
index 33f163e8dc..f5bd71c474 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFPedigreeHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFPedigreeHeaderLine.java
@@ -1,13 +1,51 @@
 package htsjdk.variant.vcf;
 
+import java.util.Map;
+import java.util.Optional;
+
 /**
- * A class representing PEDIGREE fields in the VCF header
+ * A class representing PEDIGREE fields in the VCF header. Applicable starting with version VCFv4.3.
+ *
+ * ##PEDIGREE=<ID=TumourSample,Original=GermlineID>
+ * ##PEDIGREE=<ID=SomaticNonTumour,Original=GermlineID>
+ * ##PEDIGREE=<ID=ChildID,Father=FatherID,Mother=MotherID>
+ * ##PEDIGREE=<ID=SampleID,Name_1=Ancestor_1,...,Name_N=Ancestor_N>
  */
 public class VCFPedigreeHeaderLine extends VCFSimpleHeaderLine {
+
     private static final long serialVersionUID = 1L;
 
     public VCFPedigreeHeaderLine(String line, VCFHeaderVersion version) {
-        super(VCFConstants.PEDIGREE_HEADER_KEY, VCFHeaderLineTranslator.parseLine(version, line, null));
+        // We need to use the V4 parser directly, since the V3 parser requires ALL permissible/expected
+        // tags to be supplied, which is inconsistent with modern structured header lines that allow
+        // other tags. So let validateForVersion detect any version incompatibility, ie., if this is ever
+        // called with a V3 version.
+        super(VCFConstants.PEDIGREE_HEADER_KEY, new VCF4Parser().parseLine(line, expectedTagOrder));
+        validateForVersion(version);
+    }
+
+    public VCFPedigreeHeaderLine(final Map<String, String> mapping) {
+        super(VCFConstants.PEDIGREE_HEADER_KEY, mapping);
+    }
+
+    @Override
+    public Optional<VCFValidationFailure<VCFHeaderLine>> getValidationFailure(final VCFHeaderVersion vcfTargetVersion) {
+        if (!vcfTargetVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) {
+            // previous to VCFv4.3, the PEDIGREE line did not have an ID. Such lines are not modeled by this
+            // class (since it is derived from VCFSimpleHeaderLine). Therefore instances of this class always
+            // represent VCFv4.3 or higher. So throw if the requested version is less than 4.3.
+            final String message = String.format("%s header lines are not allowed in VCF version %s headers",
+                getKey(),
+                vcfTargetVersion
+            );
+            if (VCFUtils.isStrictVCFVersionValidation()) {
+                return Optional.of(new VCFValidationFailure<>(vcfTargetVersion, this, message));
+            } else {
+                logger.warn(message);
+            }
+        }
+
+        return super.getValidationFailure(vcfTargetVersion);
     }
 
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFRecordCodec.java b/src/main/java/htsjdk/variant/vcf/VCFRecordCodec.java
index 8fe9b67d6d..cbefb13237 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFRecordCodec.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFRecordCodec.java
@@ -27,8 +27,7 @@ public VCFRecordCodec(final VCFHeader header) {
 
 	public VCFRecordCodec(final VCFHeader header, final boolean allowMissingFieldsInHeader) {
 		this.vcfEncoder = new VCFEncoder(header, allowMissingFieldsInHeader, false);
-		// Explicitly set the version because it's not available in the header itself.
-		this.vcfDecoder.setVCFHeader(header, VCFHeaderVersion.VCF4_2);
+		this.vcfDecoder.setVCFHeader(header);
 	}
 
 	@Override
diff --git a/src/main/java/htsjdk/variant/vcf/VCFSampleHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFSampleHeaderLine.java
index 973a976baa..7c45e9a1b2 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFSampleHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFSampleHeaderLine.java
@@ -1,13 +1,42 @@
 package htsjdk.variant.vcf;
 
+import java.util.Map;
+import java.util.Optional;
+
 /**
- * A class representing SAMPLE fields in the VCF header
  */
 public class VCFSampleHeaderLine extends VCFSimpleHeaderLine {
+
     private static final long serialVersionUID = 1L;
 
     public VCFSampleHeaderLine(String line, VCFHeaderVersion version) {
-        super(VCFConstants.SAMPLE_HEADER_KEY, VCFHeaderLineTranslator.parseLine(version, line, null));
+        // We need to use the V4 parser directly, since the V3 parser requires ALL permissible/expected
+        // tags to be supplied, which is inconsistent with modern structured header lines that allow
+        // other tags. So let validateForVersion detect any version incompatibility, ie., if this is ever
+        // called with a V3 version.
+        super(VCFConstants.SAMPLE_HEADER_KEY, new VCF4Parser().parseLine(line, expectedTagOrder));
+        validateForVersion(version);
+    }
+
+    public VCFSampleHeaderLine(final Map<String, String> mapping) {
+        super(VCFConstants.SAMPLE_HEADER_KEY, mapping);
+    }
+
+    @Override
+    public Optional<VCFValidationFailure<VCFHeaderLine>> getValidationFailure(final VCFHeaderVersion vcfTargetVersion) {
+        if (!vcfTargetVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_0)) {
+            final String message = String.format("%s header lines are not allowed in VCF version %s headers",
+                getKey(),
+                vcfTargetVersion
+            );
+            if (VCFUtils.isStrictVCFVersionValidation()) {
+                return Optional.of(new VCFValidationFailure<>(vcfTargetVersion, this, message));
+            } else {
+                logger.warn(message);
+            }
+        }
+
+        return super.getValidationFailure(vcfTargetVersion);
     }
 
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java
index 12b45e5bc9..c0a3abce5c 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java
@@ -1,5 +1,5 @@
 /*
-* Copyright (c) 2012 The Broad Institute
+* Copyright (c) 2017 The Broad Institute
 * 
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
@@ -25,98 +25,120 @@
 
 package htsjdk.variant.vcf;
 
+import htsjdk.samtools.util.Log;
+import htsjdk.tribble.TribbleException;
+import htsjdk.utils.ValidationUtils;
+
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
-
+import java.util.stream.Collectors;
 
 /**
- * @author ebanks
- * 
- * A class representing a key=value entry for simple VCF header types
+ * An abstract class representing a VCF metadata line with a key and attribute=value pairs, one of
+ * which represents an ID. The key determines the "type" of the structured header line (i.e., contig, FILTER,
+ * INFO, ALT, PEDIGREE, META).
+ *
+ * The attribute/value pairs are ordered. The first entry in the map must be an ID attribute (used by the
+ * VCFHeader to ensure that no two structured header lines that share the same key in a given header have the
+ * same ID).
  */
 public class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFIDHeaderLine {
-
-    private String name;
-    private Map<String, String> genericFields = new LinkedHashMap<String, String>();
+    private static final long serialVersionUID = 1L;
+    protected static final Log logger = Log.getInstance(VCFSimpleHeaderLine.class);
 
     public static final String ID_ATTRIBUTE = "ID";
     public static final String DESCRIPTION_ATTRIBUTE = "Description";
+    public static final String SOURCE_ATTRIBUTE = "Source";
+    public static final String VERSION_ATTRIBUTE = "Version";
+
+    // List of expected tags (for this base class, its ID only; subclasses with more required tags
+    // should use a custom tag order if more required tags are expected
+    protected static final List<String> expectedTagOrder = Collections.unmodifiableList(
+            new ArrayList<String>(1) {{ add(ID_ATTRIBUTE); }});
+
+    // Map used to retain the attribute/value pairs, in original order. The first entry in the map must be
+    // an ID field. The entire map must be immutable to prevent hash values from changing, since these are
+    // often stored in Sets. Its not ACTUALLY immutable in orderto allow for special cases where subclasses
+    // have to be able to "repair" header lines (via a call to updateGenericField) during constructor validation.
+    //
+    // Otherwise the values here should never change during the lifetime of the header line.
+    private final Map<String, String> genericFields = new LinkedHashMap();
 
     /**
-     * create a VCF filter header line
-     *
-     * @param key            the key for this header line
-     * @param name           the name for this header line
-     * @param description    description for this header line
+     * Constructor that accepts a key and string that represetns the rest of the line (after the ##KEY=").
+     * @param key the key to use for this line
+     * @param line the value part of the line
+     * @param version the target version to validate the line against
      */
-    public VCFSimpleHeaderLine(String key, String name, String description) {
-        super(key, "");
-        Map<String, String> map = new LinkedHashMap<String, String>(1);
-        map.put(DESCRIPTION_ATTRIBUTE, description);
-        initialize(name, map);
+    public VCFSimpleHeaderLine(final String key, final String line, final VCFHeaderVersion version) {
+        this(key, VCFHeaderLineTranslator.parseLine(version, line, expectedTagOrder));
+        validate();
+        validateForVersion(version);
     }
 
     /**
-     * create a VCF info header line
-     * 
-     * @see #VCFSimpleHeaderLine(String, VCFHeaderVersion, String, List, List) VCFv4.2+ recommended tags support
+     * Key cannot be null or empty.
      *
-     * @param line      the header line
-     * @param version   the vcf header version
-     * @param key            the key for this header line
-     * @param expectedTagOrdering the tag ordering expected for this header line
+     * @param key key to use for this header line. can not be null.
+     * @param id id name to use for this line
+     * @param description string that will be added as a "Description" tag to this line
      */
-    public VCFSimpleHeaderLine(final String line, final VCFHeaderVersion version, final String key, final List<String> expectedTagOrdering) {
-        this(line, version, key, expectedTagOrdering, Collections.emptyList());
+    public VCFSimpleHeaderLine(final String key, final String id, final String description) {
+        super(key, "");
+        genericFields.put(ID_ATTRIBUTE, id);
+        genericFields.put(DESCRIPTION_ATTRIBUTE, description);
+        validate();
     }
 
     /**
-     * create a VCF info header line
+     * Key cannot be null or empty.
+     *
+     * Note that for attributes where the order is significant, use a LinkedHashMap
+     * to ensure that attribute order is honored.
      *
-     * @param line      the header line
-     * @param version   the vcf header version
-     * @param key            the key for this header line
-     * @param expectedTagOrdering the tag ordering expected for this header line
-     * @param recommendedTags tags that are optional for this header line                            
+     * @param key key to use for this header line. can not be null.
+     * @param attributeMapping field mappings to use. may not be null. must contain an "ID" field to use as
+     *                         a unique id for this line
      */
-    public VCFSimpleHeaderLine(final String line, final VCFHeaderVersion version, final String key, final List<String> expectedTagOrdering, final List<String> recommendedTags) {
-        this(key, VCFHeaderLineTranslator.parseLine(version, line, expectedTagOrdering, recommendedTags));
+    public VCFSimpleHeaderLine(final String key, final Map<String, String> attributeMapping) {
+        super(key, "");
+        ValidationUtils.nonNull(attributeMapping, "An attribute map is required for structured header lines");
+        genericFields.putAll(attributeMapping);
+        validate();
     }
 
-    public VCFSimpleHeaderLine(final String key, final Map<String, String> mapping) {
-        super(key, "");
-        name = mapping.get(ID_ATTRIBUTE);
-        initialize(name, mapping);
+    /**
+     * @return true if this is a structured header line (has a unique ID and multiple key/value pairs),
+     * otherwise false
+     */
+    @Override
+    public boolean isIDHeaderLine() { return true; }
+
+    /**
+     * Return the unique ID for this line. Returns null iff isIDHeaderLine is false.
+     * @return
+     */
+    @Override
+    public String getID() {
+        return getGenericFieldValue(ID_ATTRIBUTE);
     }
 
-	/**
-	 * Returns the String value associated with the given key. Returns null if there is no value. Key
-	 * must not be null.
-	 */
-	String getGenericFieldValue(final String key) {
-		return this.genericFields.get(key);
-	}
-
-    protected void initialize(String name, Map<String, String> genericFields) {
-        if ( name == null || genericFields == null || genericFields.isEmpty() )
-            throw new IllegalArgumentException(String.format("Invalid VCFSimpleHeaderLine: key=%s name=%s", super.getKey(), name));
-        if ( name.contains("<") || name.contains(">") )
-            throw new IllegalArgumentException("VCFHeaderLine: ID cannot contain angle brackets");
-        if ( name.contains("=") )
-            throw new IllegalArgumentException("VCFHeaderLine: ID cannot contain an equals sign");
-
-        this.name = name;
-        this.genericFields.putAll(genericFields);
+    /**
+     * Returns the String value associated with the given key. Returns null if there is no value. Key
+     * must not be null.
+     */
+    public String getGenericFieldValue(final String key) {
+        return this.genericFields.get(key);
     }
 
-    @Override
-    protected String toStringEncoding() {
-        Map<String, Object> map = new LinkedHashMap<String, Object>();
-        map.put(ID_ATTRIBUTE, name);
-        map.putAll(genericFields);
-        return getKey() + "=" + VCFHeaderLine.toStringEncoding(map);
+    /**
+     * Returns a list of all attributes for this header line.
+     */
+    public Map<String, String> getGenericFields() {
+        return Collections.unmodifiableMap(this.genericFields);
     }
 
     @Override
@@ -129,28 +151,78 @@ public boolean equals( final Object o ) {
         }
 
         final VCFSimpleHeaderLine that = (VCFSimpleHeaderLine) o;
-        return name.equals(that.name) &&
-               genericFields.equals(that.genericFields);
+        return genericFields.equals(that.genericFields);
     }
 
     @Override
     public int hashCode() {
         int result = super.hashCode();
-        result = 31 * result + name.hashCode();
         result = 31 * result + genericFields.hashCode();
         return result;
     }
 
+    /**
+     * create a string of a mapping pair for the target VCF version
+     * @return a string, correctly formatted
+     */
     @Override
-    public String getID() {
-        return name;
+    protected String toStringEncoding() {
+        //NOTE: this preserves/round-trips "extra" attributes such as SOURCE, VERSION, etc.
+        final StringBuilder builder = new StringBuilder();
+        builder.append(getKey());
+        builder.append("=<");
+        builder.append(genericFields.entrySet().stream()
+                .map(e -> e.getKey() + "=" + quoteAttributeValueForSerialization(e.getKey(), e.getValue()))
+                .collect(Collectors.joining(",")));
+        builder.append('>');
+        return builder.toString();
     }
 
+    // Called by VCFInfoHeaderLine to allow repairing of VCFInfoLines that have a Flag type and a non-zero count
+    // (the combination of which is forbidden by the spec, but which we tolerate for backward compatibility with
+    // previous versions of htsjdk, which silently repaired these).
+    //
+    // Replaces the original generic fields map with another immutable map with the updated value.
+    protected void updateGenericField(final String attributeName, final String value) {
+        genericFields.put(attributeName, value);
+    }
 
     /**
-     * @return a map of all pairs of fields and values in this header line
+     * Return true if the attribute name requires quotes.
+     * @param attributeName name of the attribute being serialized
+     * @return boolean indicating whether the value should be embedded n quotes during serialization
      */
-    public Map<String, String> getGenericFields() {
-        return Collections.unmodifiableMap(genericFields);
+    protected boolean getIsQuotableAttribute(final String attributeName) {
+        // the (VF4.3) spec says that the DESCRIPTION, SOURCE, and VERSION attributes should be quoted
+        // for INFO/FORMAT lines, but htsjdk seems to have historically quoted these for all structured
+        // header lines
+        return attributeName.equals(DESCRIPTION_ATTRIBUTE) ||
+                attributeName.equals(SOURCE_ATTRIBUTE) ||
+                attributeName.equals(VERSION_ATTRIBUTE);
     }
- }
+
+    private void validate() {
+        if ( genericFields.isEmpty() || !genericFields.keySet().stream().findFirst().get().equals(ID_ATTRIBUTE)) {
+            throw new TribbleException(
+                    String.format("The required ID tag is missing or not the first attribute: key=%s", super.getKey()));
+        }
+        validateKeyOrID(getGenericFieldValue(ID_ATTRIBUTE), "ID");
+    }
+
+    // Add quotes around any attribute value that contains a space or comma, or is supposed to be quoted by
+    // definition per the spec (i.e., Description, Source, Version for INFO lines).
+    private String quoteAttributeValueForSerialization(final String attribute, final String originalValue) {
+        return originalValue.contains(",") || originalValue.contains(" ") || getIsQuotableAttribute(attribute) ?
+                "\""+ escapeQuotes(originalValue) + "\"" :
+                originalValue;
+    }
+
+    private static String escapeQuotes(final String value) {
+        // java escaping in a string literal makes this harder to read than it should be
+        // without string literal escaping and quoting the regex would be: replaceAll( ([^\])" , $1\" )
+        // ie replace: something that's not a backslash ([^\]) followed by a double quote
+        // with: the thing that wasn't a backslash ($1), followed by a backslash, followed by a double quote
+        return value.replaceAll("([^\\\\])\"", "$1\\\\\"");
+    }
+
+}
\ No newline at end of file
diff --git a/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java b/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java
index 6e9e713a20..0d61cf35e4 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java
@@ -51,15 +51,21 @@ public class VCFStandardHeaderLines {
     /**
      * Enabling this causes us to repair header lines even if only their descriptions differ.
      */
-    private final static boolean REPAIR_BAD_DESCRIPTIONS = false;
-    private static Standards<VCFFormatHeaderLine> formatStandards = new Standards<VCFFormatHeaderLine>();
-    private static Standards<VCFInfoHeaderLine> infoStandards = new Standards<VCFInfoHeaderLine>();
+    private static Standards<VCFFormatHeaderLine> formatStandards = new Standards<>();
+    private static Standards<VCFInfoHeaderLine> infoStandards = new Standards<>();
 
     /**
      * Walks over the VCF header and repairs the standard VCF header lines in it, returning a freshly
      * allocated {@link VCFHeader} with standard VCF header lines repaired as necessary.
      */
     public static VCFHeader repairStandardHeaderLines(final VCFHeader oldHeader) {
+        if (oldHeader.getVCFHeaderVersion().isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) {
+            // the "repair" operation effectively upgrades old header lines to v4.2 format,
+            // but we don't "back-version" headers that are already newer than v4.2, so skip
+            // repair for newer headers
+            return oldHeader;
+        }
+
         final Set<VCFHeaderLine> newLines = new LinkedHashSet<VCFHeaderLine>(oldHeader.getMetaDataInInputOrder().size());
         for ( VCFHeaderLine line : oldHeader.getMetaDataInInputOrder() ) {
             if ( line instanceof VCFFormatHeaderLine ) {
@@ -67,17 +73,17 @@ public static VCFHeader repairStandardHeaderLines(final VCFHeader oldHeader) {
             } else if ( line instanceof VCFInfoHeaderLine) {
                 line = infoStandards.repair((VCFInfoHeaderLine) line);
             }
-
             newLines.add(line);
         }
 
+        //NOTE that its possible for this to fail in the (probably rare) case that the repaired
+        //lines (which are "version-less") fail validation against the header version
         final VCFHeader repairedHeader = new VCFHeader(newLines, oldHeader.getGenotypeSamples());
-        final VCFHeaderVersion oldHeaderVersion = oldHeader.getVCFHeaderVersion();
-        if (oldHeaderVersion != null && oldHeaderVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) {
-            // this needs to maintain version 4.3 (and not back-version to v4.2), so propagate
-            // the old version only for v4.3
-            repairedHeader.setVCFHeaderVersion(oldHeaderVersion);
-        }
+
+        // the "repair" operation effectively upgrades old header lines to v4.2 format, so the new header should
+        // reflect that since it may no longer conform to it's original version
+        // new header reflects that
+        repairedHeader.addMetaDataLine(VCFHeader.makeHeaderVersionLine(VCFHeaderVersion.VCF4_2));
         return repairedHeader;
     }
 
@@ -159,9 +165,9 @@ private static void registerStandard(final VCFFormatHeaderLine line) {
     //
     static {
         // FORMAT lines
-        registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY,           1,                            VCFHeaderLineType.String,  "Genotype"));
-        registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY,   1,                            VCFHeaderLineType.Integer, "Genotype Quality"));
-        registerStandard(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY,              1,                            VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
+        registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY,           1,                     VCFHeaderLineType.String,  "Genotype"));
+        registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY,   1,                     VCFHeaderLineType.Integer, "Genotype Quality"));
+        registerStandard(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY,              1,                     VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
         registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY,        VCFHeaderLineCount.G,         VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
         registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFHeaderLineCount.R,         VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed"));
         registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY,    VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String,  "Genotype-level filter"));
@@ -169,16 +175,16 @@ private static void registerStandard(final VCFFormatHeaderLine line) {
         registerStandard(new VCFFormatHeaderLine(VCFConstants.PHASE_QUALITY_KEY,      1,                            VCFHeaderLineType.Float,   "Read-backed phasing quality"));
 
         // INFO lines
-        registerStandard(new VCFInfoHeaderLine(VCFConstants.END_KEY,                  1,                    VCFHeaderLineType.Integer, "Stop position of the interval"));
-        registerStandard(new VCFInfoHeaderLine(VCFConstants.DBSNP_KEY,                0,                    VCFHeaderLineType.Flag,    "dbSNP Membership"));
-        registerStandard(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY,                1,                    VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered"));
-        registerStandard(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY,          1,                    VCFHeaderLineType.Float,   "Strand Bias"));
+        registerStandard(new VCFInfoHeaderLine(VCFConstants.END_KEY,                  1,             VCFHeaderLineType.Integer, "Stop position of the interval"));
+        registerStandard(new VCFInfoHeaderLine(VCFConstants.DBSNP_KEY,                0,             VCFHeaderLineType.Flag,    "dbSNP Membership"));
+        registerStandard(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY,                1,             VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered"));
+        registerStandard(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY,          1,             VCFHeaderLineType.Float,   "Strand Bias"));
         registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY,     VCFHeaderLineCount.A, VCFHeaderLineType.Float,   "Allele Frequency, for each ALT allele, in the same order as listed"));
         registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY,         VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"));
-        registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY,        1,                    VCFHeaderLineType.Integer, "Total number of alleles in called genotypes"));
-        registerStandard(new VCFInfoHeaderLine(VCFConstants.MAPPING_QUALITY_ZERO_KEY, 1,                    VCFHeaderLineType.Integer, "Total Mapping Quality Zero Reads"));
-        registerStandard(new VCFInfoHeaderLine(VCFConstants.RMS_MAPPING_QUALITY_KEY,  1,                    VCFHeaderLineType.Float,   "RMS Mapping Quality"));
-        registerStandard(new VCFInfoHeaderLine(VCFConstants.SOMATIC_KEY,              0,                    VCFHeaderLineType.Flag,    "Somatic event"));
+        registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY,        1,              VCFHeaderLineType.Integer, "Total number of alleles in called genotypes"));
+        registerStandard(new VCFInfoHeaderLine(VCFConstants.MAPPING_QUALITY_ZERO_KEY, 1,              VCFHeaderLineType.Integer, "Total Mapping Quality Zero Reads"));
+        registerStandard(new VCFInfoHeaderLine(VCFConstants.RMS_MAPPING_QUALITY_KEY,  1,              VCFHeaderLineType.Float,   "RMS Mapping Quality"));
+        registerStandard(new VCFInfoHeaderLine(VCFConstants.SOMATIC_KEY,              0,              VCFHeaderLineType.Flag,    "Somatic event"));
     }
 
     private static class Standards<T extends VCFCompoundHeaderLine> {
@@ -191,7 +197,7 @@ public T repair(final T line) {
                 final boolean badCount     = line.isFixedCount() && ! badCountType && line.getCount() != standard.getCount();
                 final boolean badType      = line.getType() != standard.getType();
                 final boolean badDesc      = ! line.getDescription().equals(standard.getDescription());
-                final boolean needsRepair  = badCountType || badCount || badType || (REPAIR_BAD_DESCRIPTIONS && badDesc);
+                final boolean needsRepair  = badCountType || badCount || badType;
 
                 if ( needsRepair ) {
                     if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
diff --git a/src/main/java/htsjdk/variant/vcf/VCFUtils.java b/src/main/java/htsjdk/variant/vcf/VCFUtils.java
index 6d0e2d7b68..3599da7edc 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFUtils.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFUtils.java
@@ -25,110 +25,59 @@
 
 package htsjdk.variant.vcf;
 
+import htsjdk.samtools.Defaults;
 import htsjdk.samtools.SAMSequenceDictionary;
 import htsjdk.samtools.SAMSequenceRecord;
 import htsjdk.samtools.util.FileExtensions;
-import htsjdk.variant.utils.GeneralUtils;
 import htsjdk.variant.variantcontext.VariantContext;
 import htsjdk.variant.variantcontext.writer.Options;
 import htsjdk.variant.variantcontext.writer.VariantContextWriter;
 import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
 
 import java.io.File;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
 import java.io.IOException;
 import java.util.*;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-import java.util.stream.Collectors;
 
 public class VCFUtils {
 
     private static final Pattern INF_OR_NAN_PATTERN = Pattern.compile("^(?<sign>[-+]?)((?<inf>(INF|INFINITY))|(?<nan>NAN))$", Pattern.CASE_INSENSITIVE);
+    private static final boolean DEFAULT_VCF_STRICT_VERSION_VALIDATION = true;
 
-    public static Set<VCFHeaderLine> smartMergeHeaders(final Collection<VCFHeader> headers, final boolean emitWarnings) throws IllegalStateException {
-        // We need to maintain the order of the VCFHeaderLines, otherwise they will be scrambled in the returned Set.
-        // This will cause problems for VCFHeader.getSequenceDictionary and anything else that implicitly relies on the line ordering.
-        final LinkedHashMap<String, VCFHeaderLine> map = new LinkedHashMap<>(); // from KEY.NAME -> line
-        final HeaderConflictWarner conflictWarner = new HeaderConflictWarner(emitWarnings);
-        final Set<VCFHeaderVersion> headerVersions = new HashSet<>(2);
+    // a global mutable static - is there an alternative ?
+    // there isn't any other reasonable place to keep this state
+    private static boolean vcfStrictVersionValidation = true;
 
-        // todo -- needs to remove all version headers from sources and add its own VCF version line
-        for (final VCFHeader source : headers) {
-            for (final VCFHeaderLine line : source.getMetaDataInSortedOrder()) {
-
-                enforceHeaderVersionMergePolicy(headerVersions, source.getVCFHeaderVersion());
-                String key = line.getKey();
-                if (line instanceof VCFIDHeaderLine)
-                    key = key + "-" + ((VCFIDHeaderLine) line).getID();
-
-                if (map.containsKey(key)) {
-                    final VCFHeaderLine other = map.get(key);
-                    if (line.equals(other)) {
-                        // continue;
-                    } else if (!line.getClass().equals(other.getClass())) {
-                        throw new IllegalStateException("Incompatible header types: " + line + " " + other);
-                    } else if (line instanceof VCFFilterHeaderLine) {
-                        final String lineName = ((VCFFilterHeaderLine) line).getID();
-                        final String otherName = ((VCFFilterHeaderLine) other).getID();
-                        if (!lineName.equals(otherName))
-                            throw new IllegalStateException("Incompatible header types: " + line + " " + other);
-                    } else if (line instanceof VCFCompoundHeaderLine) {
-                        final VCFCompoundHeaderLine compLine = (VCFCompoundHeaderLine) line;
-                        final VCFCompoundHeaderLine compOther = (VCFCompoundHeaderLine) other;
-
-                        // if the names are the same, but the values are different, we need to quit
-                        if (!(compLine).equalsExcludingDescription(compOther)) {
-                            if (compLine.getType().equals(compOther.getType())) {
-                                // The Number entry is an Integer that describes the number of values that can be
-                                // included with the INFO field. For example, if the INFO field contains a single
-                                // number, then this value should be 1. However, if the INFO field describes a pair
-                                // of numbers, then this value should be 2 and so on. If the number of possible
-                                // values varies, is unknown, or is unbounded, then this value should be '.'.
-                                conflictWarner.warn(line, "Promoting header field Number to . due to number differences in header lines: " + line + " " + other);
-                                compOther.setNumberToUnbounded();
-                            } else if (compLine.getType() == VCFHeaderLineType.Integer && compOther.getType() == VCFHeaderLineType.Float) {
-                                // promote key to Float
-                                conflictWarner.warn(line, "Promoting Integer to Float in header: " + compOther);
-                                map.put(key, compOther);
-                            } else if (compLine.getType() == VCFHeaderLineType.Float && compOther.getType() == VCFHeaderLineType.Integer) {
-                                // promote key to Float
-                                conflictWarner.warn(line, "Promoting Integer to Float in header: " + compOther);
-                            } else {
-                                throw new IllegalStateException("Incompatible header types, collision between these two types: " + line + " " + other);
-                            }
-                        }
-                        if (!compLine.getDescription().equals(compOther.getDescription()))
-                            conflictWarner.warn(line, "Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine);
-                    } else {
-                        // we are not equal, but we're not anything special either
-                        conflictWarner.warn(line, "Ignoring header line already in map: this header line = " + line + " already present header = " + other);
-                    }
-                } else {
-                    map.put(key, line);
-                }
-            }
-        }
-
-        // returning a LinkedHashSet so that ordering will be preserved. Ensures the contig lines do not get scrambled.
-        return new LinkedHashSet<>(map.values());
-    }
+    /**
+     * Determine if strict VCF version validation is enabled. Defaults to true. Strict version validation
+     * ensures that all VCF contents (header and variant contexts) conforms to the established header version.
+     * This should only be disabled when absolutely necessary.
+     *
+     * @return true if strict version validation is enabled
+     */
+    public static boolean isStrictVCFVersionValidation() { return Defaults.STRICT_VCF_VERSION_VALIDATION; }
 
-    // Reject attempts to merge a VCFv4.3 header with any other version
-    private static void enforceHeaderVersionMergePolicy(
-            final Set<VCFHeaderVersion> headerVersions,
-            final VCFHeaderVersion candidateVersion) {
-        if (candidateVersion != null) {
-            headerVersions.add(candidateVersion);
-            if (headerVersions.size() > 1 && headerVersions.contains(VCFHeaderVersion.VCF4_3)) {
-                throw new IllegalArgumentException(
-                        String.format("Attempt to merge version %s header with incompatible header version %s",
-                                VCFHeaderVersion.VCF4_3.getVersionString(),
-                                headerVersions.stream()
-                                        .filter(hv -> !hv.equals(VCFHeaderVersion.VCF4_3))
-                                        .map(VCFHeaderVersion::getVersionString)
-                                        .collect(Collectors.joining(" "))));
-            }
-        }
+    /**
+     * The headers passed in must be version >= 4.2 (older headers that are read in via AbstractVCFCodecs
+     * are "repaired" and stamped as VCF4.2 when they're read in).
+     *
+     * @param headers the set of headers to merge
+     * @param emitWarnings true if warning should be emitted by the merge
+     * @return
+     * @throws {@link htsjdk.tribble.TribbleException} if any header has a version < vcfV4.2
+     * @throws {@link htsjdk.tribble.TribbleException} if any header cannot be upgraded to the newest version amongst
+     * all headers provided
+     */
+    public static Set<VCFHeaderLine> smartMergeHeaders(
+            final Collection<VCFHeader> headers,
+            final boolean emitWarnings) {
+        return VCFHeaderMerger.getMergedHeaderLines(headers, emitWarnings);
     }
 
     /**
@@ -149,8 +98,8 @@ public static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLi
     public static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict, final boolean referenceNameOnly) {
         final Set<VCFHeaderLine> lines = new LinkedHashSet<>(oldLines.size());
 
-        for (final VCFHeaderLine line : oldLines) {
-            if (line instanceof VCFContigHeaderLine)
+        for ( final VCFHeaderLine line : oldLines ) {
+            if ( line.isIDHeaderLine() && line.getKey().equals(VCFConstants.CONTIG_HEADER_KEY) )
                 continue; // skip old contig lines
             if (line.getKey().equals(VCFHeader.REFERENCE_KEY))
                 continue; // skip the old reference key
@@ -184,17 +133,14 @@ public static List<VCFContigHeaderLine> makeContigHeaderLines(final SAMSequenceD
                                                                   final File referenceFile) {
         final List<VCFContigHeaderLine> lines = new ArrayList<>();
         final String assembly = referenceFile != null ? getReferenceAssembly(referenceFile.getName()) : null;
-        for (final SAMSequenceRecord contig : refDict.getSequences())
-            lines.add(makeContigHeaderLine(contig, assembly));
+        for ( final SAMSequenceRecord contig : refDict.getSequences() )
+            lines.add(new VCFContigHeaderLine(contig, assembly));
         return lines;
     }
 
+    @Deprecated
     private static VCFContigHeaderLine makeContigHeaderLine(final SAMSequenceRecord contig, final String assembly) {
-        final Map<String, String> map = new LinkedHashMap<>(3);
-        map.put("ID", contig.getSequenceName());
-        map.put("length", String.valueOf(contig.getSequenceLength()));
-        if (assembly != null) map.put("assembly", assembly);
-        return new VCFContigHeaderLine(map, contig.getSequenceIndex());
+        return new VCFContigHeaderLine(contig, assembly);
     }
 
     /**
@@ -295,22 +241,4 @@ else if (refPath.contains("hg38"))
         return assembly;
     }
 
-    /**
-     * Only displays a warning if warnings are enabled and an identical warning hasn't been already issued
-     */
-    private static final class HeaderConflictWarner {
-        boolean emitWarnings;
-        Set<String> alreadyIssued = new HashSet<>();
-
-        private HeaderConflictWarner(final boolean emitWarnings) {
-            this.emitWarnings = emitWarnings;
-        }
-
-        public void warn(final VCFHeaderLine line, final String msg) {
-            if (GeneralUtils.DEBUG_MODE_ENABLED && emitWarnings && !alreadyIssued.contains(line.getKey())) {
-                alreadyIssued.add(line.getKey());
-                System.err.println(msg);
-            }
-        }
-    }
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFValidationFailure.java b/src/main/java/htsjdk/variant/vcf/VCFValidationFailure.java
new file mode 100644
index 0000000000..c6f0ad8708
--- /dev/null
+++ b/src/main/java/htsjdk/variant/vcf/VCFValidationFailure.java
@@ -0,0 +1,63 @@
+package htsjdk.variant.vcf;
+
+import htsjdk.utils.ValidationUtils;
+
+/**
+ * A class representing a VCF validation failure.
+ * @param <T> a type representing the object that is being validated
+ */
+class VCFValidationFailure<T> {
+    private final VCFHeaderVersion targetVersion;
+    private final T source;
+    private final String sourceMessage;
+
+    /**
+     * A VCF validation failure.
+     *
+     * @param targetVersion the version for which validation failed.
+     * @param source the source object being validated
+     * @param sourceMessage the validation failure reason
+     */
+    public VCFValidationFailure(final VCFHeaderVersion targetVersion, final T source, final String sourceMessage) {
+        ValidationUtils.nonNull(targetVersion);
+        ValidationUtils.nonNull(source);
+        ValidationUtils.nonNull(sourceMessage);
+
+        this.targetVersion = targetVersion;
+        this.source = source;
+        this.sourceMessage = sourceMessage;
+    }
+
+    /**
+     * @return the source object being validated
+     */
+    public T getSource() {
+        return source;
+    }
+
+    /**
+     * @return The validation failure reason.
+     */
+    public String getSourceMessage() {
+        return sourceMessage;
+    }
+
+    /**
+     * @return A formatted message describing the validation failure reason and target version.
+     */
+    public String getFailureMessage() {
+        return String.format(
+                "Failure validating %s for reason %s, target version %s",
+                source.toString(),
+                sourceMessage,
+                targetVersion);
+    }
+
+    /**
+     * @return The version for which validation failed. May be  null.
+     */
+    public VCFHeaderVersion getTargetVersion() {
+        return targetVersion;
+    }
+
+}
diff --git a/src/test/java/htsjdk/samtools/SAMSequenceDictionaryUtilsTest.java b/src/test/java/htsjdk/samtools/SAMSequenceDictionaryUtilsTest.java
index 37842f8a9a..7167fa8f12 100644
--- a/src/test/java/htsjdk/samtools/SAMSequenceDictionaryUtilsTest.java
+++ b/src/test/java/htsjdk/samtools/SAMSequenceDictionaryUtilsTest.java
@@ -1,11 +1,7 @@
-package org.broadinstitute.hellbender.utils;
-
-import htsjdk.samtools.SAMSequenceDictionary;
-import htsjdk.samtools.SAMSequenceRecord;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-import org.broadinstitute.hellbender.exceptions.UserException;
-import org.broadinstitute.hellbender.GATKBaseTest;
+package htsjdk.samtools;
+
+import htsjdk.HtsjdkTest;
+import htsjdk.samtools.util.Interval;
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
@@ -14,12 +10,10 @@
 import java.util.Arrays;
 import java.util.List;
 
-import static org.broadinstitute.hellbender.utils.SequenceDictionaryUtils.*;
-import static org.broadinstitute.hellbender.utils.SequenceDictionaryUtils.SequenceDictionaryCompatibility.*;
-
-public final class SequenceDictionaryUtilsUnitTest extends GATKBaseTest {
+import static htsjdk.samtools.SAMSequenceDictionaryUtils.*;
+import static htsjdk.samtools.SAMSequenceDictionaryUtils.SequenceDictionaryCompatibility.*;
 
-    private static Logger logger = LogManager.getLogger(SequenceDictionaryUtilsUnitTest.class);
+public final class SAMSequenceDictionaryUtilsTest extends HtsjdkTest {
 
     @DataProvider( name = "testSequenceRecordsAreEquivalentDataProvider" )
     public Object[][] testSequenceRecordsAreEquivalentDataProvider() {
@@ -43,7 +37,7 @@ public Object[][] testSequenceRecordsAreEquivalentDataProvider() {
 
     @Test(dataProvider = "testSequenceRecordsAreEquivalentDataProvider")
     public void testSequenceRecordsAreEquivalent(final SAMSequenceRecord one, final SAMSequenceRecord two, final boolean expected){
-        final boolean actual = SequenceDictionaryUtils.sequenceRecordsAreEquivalent(one, two);
+        final boolean actual = SAMSequenceDictionaryUtils.sequenceRecordsAreEquivalent(one, two);
         Assert.assertEquals(actual, expected);
     }
 
@@ -59,204 +53,157 @@ public Object[][] generateSequenceDictionaryTestData() {
         CHR1_HG19_WITH_ATTRIBUTES.setAttribute("M5", "0dec9660ec1efaaf33281c0d5ea2560f");
         CHR1_HG19_WITH_ATTRIBUTES.setAttribute("UR", "file:/foo/bar");
 
-        final Class<UserException.IncompatibleSequenceDictionaries> NO_COMMON_CONTIGS_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
-        final Class<UserException.IncompatibleSequenceDictionaries> UNEQUAL_COMMON_CONTIGS_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
-        final Class<UserException.LexicographicallySortedSequenceDictionary> NON_CANONICAL_HUMAN_ORDER_EXCEPTION = UserException.LexicographicallySortedSequenceDictionary.class;
-        final Class<UserException.IncompatibleSequenceDictionaries> OUT_OF_ORDER_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
-        final Class<UserException.IncompatibleSequenceDictionaries> DIFFERENT_INDICES_EXCEPTION = UserException.IncompatibleSequenceDictionaries.class;
-
-        final List<SimpleInterval> hg19AllContigsIntervalSet = Arrays.asList(
-                new SimpleInterval("chrM", 1, 1),
-                new SimpleInterval("chr1", 1, 1),
-                new SimpleInterval("chr2", 1, 1),
-                new SimpleInterval("chr10", 1, 1));
-        final List<SimpleInterval> hg19PartialContigsIntervalSet = Arrays.asList(
-                new SimpleInterval("chrM", 1, 1),
-                new SimpleInterval("chr1", 1, 1));
+        final List<Interval> hg19AllContigsIntervalSet = Arrays.asList(
+                new Interval("chrM", 1, 1),
+                new Interval("chr1", 1, 1),
+                new Interval("chr2", 1, 1),
+                new Interval("chr10", 1, 1));
+        final List<Interval> hg19PartialContigsIntervalSet = Arrays.asList(
+                new Interval("chrM", 1, 1),
+                new Interval("chr1", 1, 1));
 
         return new Object[][]  {
                 // Identical dictionaries:
-                {Arrays.asList(CHR1_HG19),                         Arrays.asList(CHR1_HG19),                        IDENTICAL, null, false, false},
-                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_HG19),                        IDENTICAL, null, false, false},
-                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_HG19),                        IDENTICAL, null, true,  false},
-                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_HG19),                        IDENTICAL, null, false, true},
-                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_HG19),                        IDENTICAL, null, true,  true},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), IDENTICAL, null, false, false},
-                { Arrays.asList(CHR1_B37),                         Arrays.asList(CHR1_B37),                         IDENTICAL, null, false, false},
-                { Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37),    Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37),    IDENTICAL, null, false, false},
-                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_HG19_WITH_UNKNOWN_LENGTH),    IDENTICAL, null, false, false},
-                { Arrays.asList(CHR1_HG19_WITH_UNKNOWN_LENGTH),    Arrays.asList(CHR1_HG19),                        IDENTICAL, null, false, false},
+                {Arrays.asList(CHR1_HG19),                         Arrays.asList(CHR1_HG19),                        IDENTICAL, false, false},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_HG19),                        IDENTICAL, false, false},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_HG19),                        IDENTICAL, true,  false},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_HG19),                        IDENTICAL, false, true},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_HG19),                        IDENTICAL, true,  true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), IDENTICAL, false, false},
+                { Arrays.asList(CHR1_B37),                         Arrays.asList(CHR1_B37),                         IDENTICAL, false, false},
+                { Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37),    Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37),    IDENTICAL, false, false},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_HG19_WITH_UNKNOWN_LENGTH),    IDENTICAL, false, false},
+                { Arrays.asList(CHR1_HG19_WITH_UNKNOWN_LENGTH),    Arrays.asList(CHR1_HG19),                        IDENTICAL, false, false},
 
                 // Dictionaries with a common subset:
-                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1),                                   COMMON_SUBSET, null, false, false},
-                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1),                                   COMMON_SUBSET, null, false, true},
+                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1),                                   COMMON_SUBSET, false, false},
+                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1),                                   COMMON_SUBSET, false, true},
                 // If requireSuperset == true, we should get an exception upon COMMON_SUBSET:
-                { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19),                    Arrays.asList(CHRM_HG19, CHR1_HG19, CHR10_HG19),                              COMMON_SUBSET, UserException.IncompatibleSequenceDictionaries.class, true, false},
-                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1),                        Arrays.asList(CHR1_HG19, CHR_NONSTANDARD2),                                   COMMON_SUBSET, null, false, false},
-                { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19),                        Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19),                                   COMMON_SUBSET, null, false, false},
-                { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19),                        Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHRM_HG19),                        COMMON_SUBSET, null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD2),            COMMON_SUBSET, null, false, false},
-                { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHR2_HG19, CHR10_HG19),            COMMON_SUBSET, null, false, false},
-                { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHR2_HG19, CHR10_HG19, CHRM_HG19), COMMON_SUBSET, null, false, false},
-                { Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37, CHR_NONSTANDARD1),    Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37, CHR_NONSTANDARD2),               COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19),                    Arrays.asList(CHRM_HG19, CHR1_HG19, CHR10_HG19),                              COMMON_SUBSET, true, false},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1),                        Arrays.asList(CHR1_HG19, CHR_NONSTANDARD2),                                   COMMON_SUBSET, false, false},
+                { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19),                        Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19),                                   COMMON_SUBSET, false, false},
+                { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19),                        Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHRM_HG19),                        COMMON_SUBSET, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD2),            COMMON_SUBSET, false, false},
+                { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHR2_HG19, CHR10_HG19),            COMMON_SUBSET, false, false},
+                { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG19, CHR2_HG19, CHR10_HG19, CHRM_HG19), COMMON_SUBSET, false, false},
+                { Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37, CHR_NONSTANDARD1),    Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37, CHR_NONSTANDARD2),               COMMON_SUBSET, false, false},
                 // If requireSuperset == true, we should get an exception upon COMMON_SUBSET:
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                              COMMON_SUBSET, UserException.IncompatibleSequenceDictionaries.class, true, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                   Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1),            COMMON_SUBSET, null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                              COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                              COMMON_SUBSET, true, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                   Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1),            COMMON_SUBSET, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                              COMMON_SUBSET, false, false},
                 // If checkContigOrdering == false, ordering of the common contigs should not matter:
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR2_HG19, CHR1_HG19, CHR10_HG19),                              COMMON_SUBSET, null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR10_HG19, CHR2_HG19, CHR1_HG19),                              COMMON_SUBSET, null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR2_HG19, CHR10_HG19, CHR1_HG19),                              COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR2_HG19, CHR1_HG19, CHR10_HG19),                              COMMON_SUBSET, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR10_HG19, CHR2_HG19, CHR1_HG19),                              COMMON_SUBSET, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR2_HG19, CHR10_HG19, CHR1_HG19),                              COMMON_SUBSET, false, false},
 
                 // Dictionaries with no common contigs:
-                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR2_HG19),                     NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, false},
-                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR2_HG19),                     NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR2_HG19),                     NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, true,  false},
-                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR2_HG19),                     NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, true,  true},
-                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_B37),                      NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),             Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), NO_COMMON_CONTIGS, NO_COMMON_CONTIGS_EXCEPTION, false, false},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR2_HG19),                     NO_COMMON_CONTIGS, false, false},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR2_HG19),                     NO_COMMON_CONTIGS, false, true},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR2_HG19),                     NO_COMMON_CONTIGS, true,  false},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR2_HG19),                     NO_COMMON_CONTIGS, true,  true},
+                { Arrays.asList(CHR1_HG19),                        Arrays.asList(CHR1_B37),                      NO_COMMON_CONTIGS, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), NO_COMMON_CONTIGS, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),             Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37), NO_COMMON_CONTIGS, false, false},
 
                 // Dictionaries with unequal common contigs:
-                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG19_WITH_DIFFERENT_LENGTH),                    UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
-                { Arrays.asList(CHR1_HG19_WITH_DIFFERENT_LENGTH),                    Arrays.asList(CHR1_HG19),                                          UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
-                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG18),                                          UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
-                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG18),                                          UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, true,  false},
-                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG18),                                          UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG18),                                          UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, true,  true},
-                { Arrays.asList(CHR1_B36),                                           Arrays.asList(CHR1_B37),                                           UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                   Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18),                   UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
-                { Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37),                      Arrays.asList(CHR1_B36, CHR2_B36, CHR10_B36),                      UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18, CHR_NONSTANDARD2), UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
-                { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG18, CHR2_HG18, CHR10_HG18), UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18),                   UNEQUAL_COMMON_CONTIGS, UNEQUAL_COMMON_CONTIGS_EXCEPTION, false, false},
+                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG19_WITH_DIFFERENT_LENGTH),                    UNEQUAL_COMMON_CONTIGS, false, false},
+                { Arrays.asList(CHR1_HG19_WITH_DIFFERENT_LENGTH),                    Arrays.asList(CHR1_HG19),                                          UNEQUAL_COMMON_CONTIGS, false, false},
+                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG18),                                          UNEQUAL_COMMON_CONTIGS, false, false},
+                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG18),                                          UNEQUAL_COMMON_CONTIGS, true,  false},
+                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG18),                                          UNEQUAL_COMMON_CONTIGS, false, true},
+                { Arrays.asList(CHR1_HG19),                                          Arrays.asList(CHR1_HG18),                                          UNEQUAL_COMMON_CONTIGS, true,  true},
+                { Arrays.asList(CHR1_B36),                                           Arrays.asList(CHR1_B37),                                           UNEQUAL_COMMON_CONTIGS, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                   Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18),                   UNEQUAL_COMMON_CONTIGS, false, false},
+                { Arrays.asList(CHR1_B37, CHR2_B37, CHR10_B37),                      Arrays.asList(CHR1_B36, CHR2_B36, CHR10_B36),                      UNEQUAL_COMMON_CONTIGS, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1), Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18, CHR_NONSTANDARD2), UNEQUAL_COMMON_CONTIGS, false, false},
+                { Arrays.asList(CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR_NONSTANDARD2, CHR1_HG18, CHR2_HG18, CHR10_HG18), UNEQUAL_COMMON_CONTIGS, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                               Arrays.asList(CHR1_HG18, CHR2_HG18, CHR10_HG18),                   UNEQUAL_COMMON_CONTIGS, false, false},
 
                 // One or both dictionaries in non-canonical human order:
-                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, true,  true},
-                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, true,  true},
-                { Arrays.asList(CHR1_HG18, CHR10_HG18, CHR2_HG18), Arrays.asList(CHR1_HG18, CHR10_HG18, CHR2_HG18), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_B37, CHR10_B37, CHR2_B37),    Arrays.asList(CHR1_B37, CHR10_B37, CHR2_B37),    NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_B36, CHR10_B36, CHR2_B36),    Arrays.asList(CHR1_B36, CHR10_B36, CHR2_B36),    NON_CANONICAL_HUMAN_ORDER, NON_CANONICAL_HUMAN_ORDER_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, false, true},
+                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, true,  true},
+                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, false, true},
+                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, true,  true},
+                { Arrays.asList(CHR1_HG18, CHR10_HG18, CHR2_HG18), Arrays.asList(CHR1_HG18, CHR10_HG18, CHR2_HG18), NON_CANONICAL_HUMAN_ORDER, false, true},
+                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), NON_CANONICAL_HUMAN_ORDER, false, true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), NON_CANONICAL_HUMAN_ORDER, false, true},
+                { Arrays.asList(CHR1_B37, CHR10_B37, CHR2_B37),    Arrays.asList(CHR1_B37, CHR10_B37, CHR2_B37),    NON_CANONICAL_HUMAN_ORDER, false, true},
+                { Arrays.asList(CHR1_B36, CHR10_B36, CHR2_B36),    Arrays.asList(CHR1_B36, CHR10_B36, CHR2_B36),    NON_CANONICAL_HUMAN_ORDER, false, true},
                 // If checkContigOrdering == false, we should not get NON_CANONICAL_HUMAN_ORDER:
-                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), IDENTICAL, null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19, CHR_NONSTANDARD1), COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), IDENTICAL, false, false},
+                { Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19), Arrays.asList(CHR1_HG19, CHR10_HG19, CHR2_HG19, CHR_NONSTANDARD1), COMMON_SUBSET, false, false},
 
                 // Dictionaries with a common subset, but different relative ordering within that subset
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19),                              OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19),                              OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, true,  true},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19),                              OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19),                              OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, true,  true},
-                { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19, CHRM_HG19),                   OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true},
-                { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR2_HG19, CHR1_HG19),                   OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true},
-                { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHRM_HG19, CHR1_HG19),                   OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_B37, CHR2_B37),              Arrays.asList(CHR2_B37, CHR1_B37),                                OUT_OF_ORDER, OUT_OF_ORDER_EXCEPTION, false, true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19),                              OUT_OF_ORDER, false, true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19),                              OUT_OF_ORDER, true,  true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19),                              OUT_OF_ORDER, false, true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19),                              OUT_OF_ORDER, true,  true},
+                { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19, CHRM_HG19),                   OUT_OF_ORDER, false, true},
+                { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHRM_HG19, CHR2_HG19, CHR1_HG19),                   OUT_OF_ORDER, false, true},
+                { Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHRM_HG19, CHR1_HG19),                   OUT_OF_ORDER, false, true},
+                { Arrays.asList(CHR1_B37, CHR2_B37),              Arrays.asList(CHR2_B37, CHR1_B37),                                OUT_OF_ORDER, false, true},
                 // If checkContigOrdering == false, we should not get OUT_OF_ORDER:
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19),                              SUPERSET,      null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19, CHR_NONSTANDARD1),            COMMON_SUBSET, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19),                              SUPERSET,     false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),            Arrays.asList(CHR2_HG19, CHR1_HG19, CHR_NONSTANDARD1),            COMMON_SUBSET,false, false},
 
                 // Dictionaries with a common subset in the same relative order, but with different indices.
                 // This will only throw an exception during validation if checkContigOrdering is true
 
                 // These have checkContigOrdering == true, so we expect DIFFERENT_INDICES and an exception:
-                { Arrays.asList(CHRM_HG19, CHR1_HG19),                                                 Arrays.asList(CHR1_HG19),                                          DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
+                { Arrays.asList(CHRM_HG19, CHR1_HG19),                                                 Arrays.asList(CHR1_HG19),                                          DIFFERENT_INDICES, false, true},
                 // Setting requireSuperset == true should make no difference here (we should still get DIFFERENT_INDICES and an exception):
-                { Arrays.asList(CHRM_HG19, CHR1_HG19),                                                 Arrays.asList(CHR1_HG19),                                          DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, true,  true},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19),                    DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1),  DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19),  DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19),                               Arrays.asList(CHR1_HG19, CHR2_HG19),                               DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19),                   Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                   DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19 ),                   Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19),                    DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
-                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), DIFFERENT_INDICES, DIFFERENT_INDICES_EXCEPTION, false, true},
+                { Arrays.asList(CHRM_HG19, CHR1_HG19),                                                 Arrays.asList(CHR1_HG19),                                          DIFFERENT_INDICES, true,  true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19),                    DIFFERENT_INDICES, false, true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1),  DIFFERENT_INDICES, false, true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19),  DIFFERENT_INDICES, false, true},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19),                               Arrays.asList(CHR1_HG19, CHR2_HG19),                               DIFFERENT_INDICES, false, true},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19),                   Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                   DIFFERENT_INDICES, false, true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19 ),                   Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19),                    DIFFERENT_INDICES, false, true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), DIFFERENT_INDICES, false, true},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), DIFFERENT_INDICES, false, true},
 
                 // Same test cases as above, but these have checkContigOrdering == false, so we expect SUPERSET or COMMON_SUBSET instead of DIFFERENT_INDICES, and no exception:
-                { Arrays.asList(CHRM_HG19, CHR1_HG19),                                                 Arrays.asList(CHR1_HG19),                                          SUPERSET,      null, false, false},
-                { Arrays.asList(CHRM_HG19, CHR1_HG19),                                                 Arrays.asList(CHR1_HG19),                                          SUPERSET,      null, true,  false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19),                    COMMON_SUBSET, null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1),  COMMON_SUBSET, null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19),  COMMON_SUBSET, null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19),                               Arrays.asList(CHR1_HG19, CHR2_HG19),                               SUPERSET,      null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19),                   Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                   SUPERSET,      null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19 ),                   Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19),                    SUPERSET,      null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), SUPERSET,      null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), SUPERSET,      null, false, false},
+                { Arrays.asList(CHRM_HG19, CHR1_HG19),                                                 Arrays.asList(CHR1_HG19),                                          SUPERSET,      false, false},
+                { Arrays.asList(CHRM_HG19, CHR1_HG19),                                                 Arrays.asList(CHR1_HG19),                                          SUPERSET,      true,  false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19),                    COMMON_SUBSET, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1),  COMMON_SUBSET, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                                 Arrays.asList(CHRM_HG19, CHR_NONSTANDARD1, CHR1_HG19, CHR2_HG19),  COMMON_SUBSET, false, false},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19),                               Arrays.asList(CHR1_HG19, CHR2_HG19),                               SUPERSET,      false, false},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19),                   Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                   SUPERSET,      false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19 ),                   Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19),                    SUPERSET,      false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR_NONSTANDARD1, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), SUPERSET,      false, false},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), Arrays.asList(CHR1_HG19, CHR2_HG19, CHRM_HG19, CHR_NONSTANDARD2 ), SUPERSET,      false, false},
 
                 // tests for SUPERSET
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                  Arrays.asList(CHR1_HG19),                                                       SUPERSET, null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                  Arrays.asList(CHR1_HG19),                                                       SUPERSET, null, false, true},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                  Arrays.asList(CHR1_HG19),                                                       SUPERSET, null, true,  false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                  Arrays.asList(CHR1_HG19),                                                       SUPERSET, null, true,  true},
-                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1),                           Arrays.asList(CHR1_HG19),                                                       SUPERSET, null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1),    Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                                SUPERSET, null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19),    Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                                SUPERSET, null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19),    Arrays.asList(CHR1_HG19, CHR2_HG19),                                            SUPERSET, null, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                  Arrays.asList(CHR1_HG19),                                                       SUPERSET, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                  Arrays.asList(CHR1_HG19),                                                       SUPERSET, false, true},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                  Arrays.asList(CHR1_HG19),                                                       SUPERSET, true,  false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                  Arrays.asList(CHR1_HG19),                                                       SUPERSET, true,  true},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1),                           Arrays.asList(CHR1_HG19),                                                       SUPERSET, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHR_NONSTANDARD1),    Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                                SUPERSET, false, false},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19),    Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19),                                SUPERSET, false, false},
+                { Arrays.asList(CHR1_HG19, CHR_NONSTANDARD1, CHR2_HG19, CHR10_HG19),    Arrays.asList(CHR1_HG19, CHR2_HG19),                                            SUPERSET, false, false},
                 // Extended attributes should be ignored when determining whether a superset exists:
-                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                  Arrays.asList(CHR1_HG19_WITH_ATTRIBUTES),                                       SUPERSET, null, false, false},
-                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHRM_HG19),           Arrays.asList(CHR1_HG19_WITH_ATTRIBUTES, CHR10_HG19),                           SUPERSET, null, false, false}
+                { Arrays.asList(CHR1_HG19, CHR2_HG19),                                  Arrays.asList(CHR1_HG19_WITH_ATTRIBUTES),                                       SUPERSET, false, false},
+                { Arrays.asList(CHR1_HG19, CHR2_HG19, CHR10_HG19, CHRM_HG19),           Arrays.asList(CHR1_HG19_WITH_ATTRIBUTES, CHR10_HG19),                           SUPERSET, false, false}
         };
     }
 
-    @Test( dataProvider = "SequenceDictionaryDataProvider" )
-    public void testSequenceDictionaryValidation( final List<SAMSequenceRecord> firstDictionaryContigs,
-                                                  final List<SAMSequenceRecord> secondDictionaryContigs,
-                                                  final SequenceDictionaryUtils.SequenceDictionaryCompatibility dictionaryCompatibility, //not needed by this test
-                                                  final Class<? extends UserException> expectedExceptionUponValidation,
-                                                  final boolean requireSuperset,
-                                                  final boolean checkContigOrdering) {
-        final SAMSequenceDictionary firstDictionary = createSequenceDictionary(firstDictionaryContigs);
-        final SAMSequenceDictionary secondDictionary = createSequenceDictionary(secondDictionaryContigs);
-        final String testDescription = String.format("First dictionary: %s  Second dictionary: %s",
-                SequenceDictionaryUtils.getDictionaryAsString(firstDictionary),
-                SequenceDictionaryUtils.getDictionaryAsString(secondDictionary));
-        Exception exceptionThrown = null;
-        try {
-            SequenceDictionaryUtils.validateDictionaries(
-                    "firstDictionary",
-                    firstDictionary,
-                    "secondDictionary",
-                    secondDictionary,
-                    requireSuperset,
-                    checkContigOrdering);
-        }
-        catch ( Exception e ) {
-            exceptionThrown = e;
-        }
-        if ( expectedExceptionUponValidation != null ) {
-            Assert.assertTrue(exceptionThrown != null && expectedExceptionUponValidation.isInstance(exceptionThrown),
-                    String.format("Expected exception %s but saw %s instead. %s",
-                            expectedExceptionUponValidation.getSimpleName(),
-                            exceptionThrown == null ? "no exception" : exceptionThrown.getClass().getSimpleName(),
-                            testDescription));
-        }
-        else {
-            Assert.assertTrue(exceptionThrown == null,
-                    String.format("Expected no exception but saw exception %s instead. %s",
-                            exceptionThrown != null ? exceptionThrown.getClass().getSimpleName() : "none",
-                            testDescription));
-        }
-    }
-
     @Test( dataProvider = "SequenceDictionaryDataProvider" )
     public void testSequenceDictionaryComparison( final List<SAMSequenceRecord> firstDictionaryContigs,
                                                   final List<SAMSequenceRecord> secondDictionaryContigs,
-                                                  final SequenceDictionaryUtils.SequenceDictionaryCompatibility dictionaryCompatibility,
-                                                  final Class<? extends UserException> expectedExceptionUponValidation,
+                                                  final SAMSequenceDictionaryUtils.SequenceDictionaryCompatibility dictionaryCompatibility,
                                                   final boolean requireSuperset,
                                                   final boolean checkContigOrdering) {
 
         final SAMSequenceDictionary firstDictionary = createSequenceDictionary(firstDictionaryContigs);
         final SAMSequenceDictionary secondDictionary = createSequenceDictionary(secondDictionaryContigs);
         final String testDescription = String.format("First dictionary: %s  Second dictionary: %s",
-                SequenceDictionaryUtils.getDictionaryAsString(firstDictionary),
-                SequenceDictionaryUtils.getDictionaryAsString(secondDictionary));
+                SAMSequenceDictionaryUtils.getDictionaryAsString(firstDictionary),
+                SAMSequenceDictionaryUtils.getDictionaryAsString(secondDictionary));
 
-        final SequenceDictionaryUtils.SequenceDictionaryCompatibility reportedCompatibility =
-                SequenceDictionaryUtils.compareDictionaries(firstDictionary, secondDictionary, checkContigOrdering);
+        final SAMSequenceDictionaryUtils.SequenceDictionaryCompatibility reportedCompatibility =
+                SAMSequenceDictionaryUtils.compareDictionaries(firstDictionary, secondDictionary, checkContigOrdering);
 
         Assert.assertTrue(reportedCompatibility == dictionaryCompatibility,
                 String.format("Dictionary comparison should have returned %s but instead returned %s. %s",
@@ -274,64 +221,8 @@ public Object[][] getStandardValidationIgnoresContigOrderData() {
         };
     }
 
-    @Test(dataProvider = "StandardValidationIgnoresContigOrderData")
-    public void testStandardValidationIgnoresContigOrder( final List<SAMSequenceRecord> firstDictionaryContigs, final List<SAMSequenceRecord> secondDictionaryContigs ) {
-        final SAMSequenceDictionary firstDictionary = createSequenceDictionary(firstDictionaryContigs);
-        final SAMSequenceDictionary secondDictionary = createSequenceDictionary(secondDictionaryContigs);
-
-        // Standard validation (the overload of validateDictionaries() that doesn't take any boolean args)
-        // should ignore differences in ordering of common contigs, so we shouldn't get an exception here
-        SequenceDictionaryUtils.validateDictionaries("first", firstDictionary, "second", secondDictionary);
-    }
-
-    @DataProvider(name = "NonSupersetData")
-    public Object[][] getNonSupersetData() {
-        return new Object[][] {
-                { Arrays.asList(CHR1_HG19, CHR2_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19, CHR10_HG19) },
-                { Arrays.asList(CHR1_HG19),            Arrays.asList(CHR10_HG19, CHR2_HG19, CHR1_HG19) }
-        };
-    }
-
-    @Test(dataProvider = "NonSupersetData")
-    public void testStandardValidationDoesNotRequireSuperset( final List<SAMSequenceRecord> firstDictionaryContigs, final List<SAMSequenceRecord> secondDictionaryContigs ) {
-        final SAMSequenceDictionary firstDictionary = createSequenceDictionary(firstDictionaryContigs);
-        final SAMSequenceDictionary secondDictionary = createSequenceDictionary(secondDictionaryContigs);
-
-        // Standard validation (the overload of validateDictionaries() that doesn't take any boolean args)
-        // should not require a superset relationship, so we shouldn't get an exception here
-        SequenceDictionaryUtils.validateDictionaries("first", firstDictionary, "second", secondDictionary);
-    }
-
-    @Test(dataProvider = "NonSupersetData", expectedExceptions = UserException.IncompatibleSequenceDictionaries.class)
-    public void testCRAMValidationDoesRequireSuperset( final List<SAMSequenceRecord> refDictionaryContigs, final List<SAMSequenceRecord> cramDictionaryContigs ) {
-        final SAMSequenceDictionary refDictionary = createSequenceDictionary(refDictionaryContigs);
-        final SAMSequenceDictionary cramDictionary = createSequenceDictionary(cramDictionaryContigs);
-
-        // CRAM validation against the reference SHOULD require a superset relationship, so we should
-        // get an exception here
-        SequenceDictionaryUtils.validateCRAMDictionaryAgainstReference(refDictionary, cramDictionary);
-    }
-
-    @DataProvider(name = "SupersetData")
-    public Object[][] getSupersetData() {
-        return new Object[][] {
-                { Arrays.asList(CHR2_HG19, CHR1_HG19, CHR10_HG19), Arrays.asList(CHR2_HG19, CHR1_HG19, CHR10_HG19)}, //exactly same
-                { Arrays.asList(CHR2_HG19, CHR1_HG19, CHR10_HG19), Arrays.asList(CHR1_HG19, CHR2_HG19) },
-                { Arrays.asList(CHR10_HG19, CHR2_HG19, CHR1_HG19), Arrays.asList(CHR1_HG19) }
-        };
-    }
-
-    @Test(dataProvider = "SupersetData")
-    public void testCRAMValidationDoesAcceptSuperset( final List<SAMSequenceRecord> refDictionaryContigs, final List<SAMSequenceRecord> cramDictionaryContigs ) {
-        final SAMSequenceDictionary refDictionary = createSequenceDictionary(refDictionaryContigs);
-        final SAMSequenceDictionary cramDictionary = createSequenceDictionary(cramDictionaryContigs);
-
-        //In these inputs , cram contigs are subsets of ref contigs and so it should be accepted
-        SequenceDictionaryUtils.validateCRAMDictionaryAgainstReference(refDictionary, cramDictionary);
-    }
-
     private SAMSequenceDictionary createSequenceDictionary( final List<SAMSequenceRecord> contigs ) {
-        final List<SAMSequenceRecord> clonedContigs = new ArrayList<SAMSequenceRecord>(contigs.size());
+        final List<SAMSequenceRecord> clonedContigs = new ArrayList<>(contigs.size());
 
         // Clone the individual SAMSequenceRecords to avoid contig-index issues with shared objects
         // across multiple dictionaries in tests
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
index 91804c48dc..95fb359446 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
@@ -34,9 +34,8 @@
 import htsjdk.variant.vcf.VCFHeaderLine;
 import htsjdk.variant.vcf.VCFHeaderLineCount;
 import htsjdk.variant.vcf.VCFHeaderLineType;
-import htsjdk.variant.vcf.VCFIDHeaderLine;
+import htsjdk.variant.vcf.VCFHeaderVersion;
 import htsjdk.variant.vcf.VCFInfoHeaderLine;
-import htsjdk.variant.vcf.VCFSimpleHeaderLine;
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
@@ -75,22 +74,22 @@ public void testCollapseExpandTest(final List<String> in, final String expectedC
     public void testCreateDictionary() {
         final List<VCFHeaderLine> inputLines = new ArrayList<VCFHeaderLine>();
         int counter = 0;
+        inputLines.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
         inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++)));
         inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++)));
         inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter));
         inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter));
-        inputLines.add(new VCFInfoHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
-        inputLines.add(new VCFInfoHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        inputLines.add(new VCFInfoHeaderLine(String.valueOf("A"+counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        inputLines.add(new VCFInfoHeaderLine(String.valueOf("A"+counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
         inputLines.add(new VCFHeaderLine("x", "misc"));
         inputLines.add(new VCFHeaderLine("y", "misc"));
-        inputLines.add(new VCFSimpleHeaderLine("GATKCommandLine","z","misc"));
-        inputLines.add(new VCFFormatHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
-        inputLines.add(new VCFFormatHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
-        final int inputLineCounter = counter;
-        final VCFHeader inputHeader = new VCFHeader(new LinkedHashSet<VCFHeaderLine>(inputLines));
+        inputLines.add(new VCFFilterHeaderLine("aFilter", "misc"));
+        inputLines.add(new VCFFormatHeaderLine(String.valueOf("A"+counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        inputLines.add(new VCFFormatHeaderLine(String.valueOf("A"+counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        final VCFHeader inputHeader = new VCFHeader(new LinkedHashSet<>(inputLines));
         final ArrayList<String> dict = BCF2Utils.makeDictionary(inputHeader);
         final int dict_size = dict.size();
-        Assert.assertEquals(7,dict_size);
+        Assert.assertEquals(8,dict_size);
     }
 
     /**
@@ -115,6 +114,7 @@ public Object[][] makeHeaderOrderTestProvider() {
         final List<VCFHeaderLine> extraLines = new ArrayList<VCFHeaderLine>();
 
         int counter = 0;
+        inputLines.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
         inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++)));
         inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++)));
         inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter));
@@ -142,7 +142,7 @@ public Object[][] makeHeaderOrderTestProvider() {
             for ( final List<VCFHeaderLine> permutation : permutations ) {
                 for ( int i = -1; i < inputLines.size(); i++ ) {
                     final List<VCFHeaderLine> allLines = new ArrayList<VCFHeaderLine>(inputLines);
-                    if ( i >= 0 )
+                    if ( i >= 0 && !VCFHeaderVersion.isFormatString(allLines.get(i).getKey()) )
                         allLines.remove(i);
                     allLines.addAll(permutation);
                     final VCFHeader testHeader = new VCFHeader(new LinkedHashSet<VCFHeaderLine>(allLines));
@@ -179,8 +179,8 @@ public Object[][] makeHeaderOrderTestProvider() {
     private static boolean expectedConsistent(final VCFHeader combinationHeader, final int minCounterForInputLines) {
         final List<Integer> ids = new ArrayList<Integer>();
         for ( final VCFHeaderLine line : combinationHeader.getMetaDataInInputOrder() ) {
-            if ( line instanceof VCFIDHeaderLine) {
-                ids.add(Integer.valueOf(((VCFIDHeaderLine) line).getID()));
+            if ( line.isIDHeaderLine()) {
+                ids.add(Integer.valueOf(line.getID()));
             }
         }
 
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java
index 7a99916c5b..17e2ae3257 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java
@@ -75,6 +75,7 @@ private static VCFHeader createFakeHeader() {
         final SAMSequenceDictionary sequenceDict = createArtificialSequenceDictionary();
         final Set<VCFHeaderLine> metaData = new HashSet<>();
         final Set<String> additionalColumns = new HashSet<>();
+        metaData.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
         metaData.add(new VCFHeaderLine("two", "2"));
         additionalColumns.add("extra1");
         additionalColumns.add("extra2");
diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java b/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java
index 8cff545f78..e04910eb0e 100644
--- a/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java
+++ b/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java
@@ -221,6 +221,7 @@ private final static void addHeaderLine(final Set<VCFHeaderLine> metaData, final
     private static void createSyntheticHeader() {
         Set<VCFHeaderLine> metaData = new TreeSet<>();
 
+        metaData.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
         addHeaderLine(metaData, "STRING1", 1, VCFHeaderLineType.String);
         addHeaderLine(metaData, "END", 1, VCFHeaderLineType.Integer);
         addHeaderLine(metaData, "STRING3", 3, VCFHeaderLineType.String);
diff --git a/src/test/java/htsjdk/variant/variantcontext/writer/AsyncVariantContextWriterUnitTest.java b/src/test/java/htsjdk/variant/variantcontext/writer/AsyncVariantContextWriterUnitTest.java
index 9e7f7e45cb..379130407c 100644
--- a/src/test/java/htsjdk/variant/variantcontext/writer/AsyncVariantContextWriterUnitTest.java
+++ b/src/test/java/htsjdk/variant/variantcontext/writer/AsyncVariantContextWriterUnitTest.java
@@ -89,7 +89,7 @@ public void testWriteAndReadAsyncVCFHeaderless() throws IOException {
             writer.add(createVC(header));
         }
         final VCFCodec codec = new VCFCodec();
-        codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2);
+        codec.setVCFHeader(header);
 
         try (final FileInputStream fis = new FileInputStream(fakeVCFFile)) {
             final AsciiLineReaderIterator iterator = new AsciiLineReaderIterator(new AsciiLineReader(fis));
@@ -110,6 +110,7 @@ public void testWriteAndReadAsyncVCFHeaderless() throws IOException {
      */
     public static VCFHeader createFakeHeader(final Set<VCFHeaderLine> metaData, final Set<String> additionalColumns,
                                              final SAMSequenceDictionary sequenceDict) {
+        metaData.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
         metaData.add(new VCFHeaderLine("two", "2"));
         additionalColumns.add("extra1");
         additionalColumns.add("extra2");
diff --git a/src/test/java/htsjdk/variant/variantcontext/writer/VCFWriterUnitTest.java b/src/test/java/htsjdk/variant/variantcontext/writer/VCFWriterUnitTest.java
index ca2afcbec0..ceac4f95a8 100644
--- a/src/test/java/htsjdk/variant/variantcontext/writer/VCFWriterUnitTest.java
+++ b/src/test/java/htsjdk/variant/variantcontext/writer/VCFWriterUnitTest.java
@@ -42,11 +42,7 @@
 import htsjdk.variant.variantcontext.GenotypesContext;
 import htsjdk.variant.variantcontext.VariantContext;
 import htsjdk.variant.variantcontext.VariantContextBuilder;
-import htsjdk.variant.vcf.VCFCodec;
-import htsjdk.variant.vcf.VCFFileReader;
-import htsjdk.variant.vcf.VCFHeader;
-import htsjdk.variant.vcf.VCFHeaderLine;
-import htsjdk.variant.vcf.VCFHeaderVersion;
+import htsjdk.variant.vcf.*;
 
 import java.io.File;
 import java.io.FileInputStream;
@@ -154,7 +150,7 @@ public void testWriteAndReadVCFHeaderless(final String extension) throws IOExcep
             writer.add(createVC(header));
         }
         final VCFCodec codec = new VCFCodec();
-        codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2);
+        codec.setVCFHeader(header);
 
         try (BlockCompressedInputStream bcis = new BlockCompressedInputStream(fakeVCFFile);
                 FileInputStream fis = new FileInputStream(fakeVCFFile)) {
@@ -228,7 +224,7 @@ public void testChangeHeaderAfterWritingBody() {
      */
     private static VCFHeader createFakeHeader(final Set<VCFHeaderLine> metaData, final Set<String> additionalColumns,
                                              final SAMSequenceDictionary sequenceDict) {
-        metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString()));
+        metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_2.getFormatString(), VCFHeaderVersion.VCF4_2.getVersionString()));
         metaData.add(new VCFHeaderLine("two", "2"));
         additionalColumns.add("extra1");
         additionalColumns.add("extra2");
@@ -330,6 +326,7 @@ public void TestWritingLargeVCF(final String extension) throws FileNotFoundExcep
     @DataProvider(name = "vcfExtensionsDataProvider")
     public Object[][]vcfExtensionsDataProvider() {
         return new Object[][] {
+                //TODO: fix this BCF problem!
                 // TODO: BCF doesn't work because header is not properly constructed.
                 // {".bcf"},
                 {FileExtensions.VCF},
diff --git a/src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java b/src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java
index 273b0f24af..97e7493a6f 100644
--- a/src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java
+++ b/src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java
@@ -13,6 +13,7 @@
 import java.util.Iterator;
 import java.util.List;
 
+
 public class AbstractVCFCodecTest extends VariantBaseTest {
 
     @Test
@@ -31,11 +32,28 @@ public void shouldPreserveSymbolicAlleleCase() {
     public void TestSpanDelParseAlleles() {
         final List<Allele> list = VCF3Codec.parseAlleles("A", Allele.SPAN_DEL_STRING, 0);
     }
+	@DataProvider(name="AllVCFCodecs")
+	public Object[][] allVCFCodecs() {
+		return new Object[][] {
+				{new VCF3Codec() },
+				{new VCFCodec() },
+		};
+	}
+
+	@Test(dataProvider = "AllVCFCodecs")
+	public void TestSpanDelParseAlleles(final AbstractVCFCodec vcfCodec){
+		// TODO: why is there no Assert here ??
+		vcfCodec.parseAlleles("A", Allele.SPAN_DEL_STRING, 0);
+	}
 
     @Test(expectedExceptions = TribbleException.class)
     public void TestSpanDelParseAllelesException() {
         final List<Allele> list1 = VCF3Codec.parseAlleles(Allele.SPAN_DEL_STRING, "A", 0);
     }
+	@Test(dataProvider = "AllVCFCodecs", expectedExceptions = TribbleException.class)
+	public void TestSpanDelParseAllelesException(final AbstractVCFCodec vcfCodec){
+		vcfCodec.parseAlleles(Allele.SPAN_DEL_STRING, "A", 0);
+	}
 
     @DataProvider(name = "thingsToTryToDecode")
     public Object[][] getThingsToTryToDecode() {
@@ -47,16 +65,49 @@ public Object[][] getThingsToTryToDecode() {
         };
     }
 
-    @Test(dataProvider = "thingsToTryToDecode")
-    public void testCanDecodeFile(String potentialInput, boolean canDecode) {
-        Assert.assertEquals(AbstractVCFCodec.canDecodeFile(potentialInput, VCFCodec.VCF4_MAGIC_HEADER), canDecode);
-    }
+	@Test(dataProvider = "thingsToTryToDecode")
+	public void testCanDecodeFile(String potentialInput, boolean canDecode) {
+		//TODO: add VCF43Codec when available
+		//TODO: its not sufficient to test for ANY v4 prefix since it will succeed on 4.3 as well
+		Assert.assertEquals(AbstractVCFCodec.canDecodeFile(potentialInput, VCFCodec.VCF4_MAGIC_HEADER), canDecode);
+	}
 
-    @Test
-    public void testGetTabixFormat() {
-        Assert.assertEquals(new VCFCodec().getTabixFormat(), TabixFormat.VCF);
-        Assert.assertEquals(new VCF3Codec().getTabixFormat(), TabixFormat.VCF);
-    }
+	@Test(dataProvider = "AllVCFCodecs")
+	public void testGetTabixFormat(final AbstractVCFCodec vcfCodec) {
+		Assert.assertEquals(vcfCodec.getTabixFormat(), TabixFormat.VCF);
+	}
+
+	@DataProvider(name="otherHeaderLines")
+	public Object[][] otherHeaderLines() {
+		return new Object[][] {
+                { "key=<", new VCFHeaderLine("key", "<") },
+                // taken from Funcotator test file as ##ID=<Description="ClinVar Variation ID">
+                // technically, this is invalid due to the lack of an "ID" attribute, but it should still parse
+                // into a VCFHeaderLine (but noa VCFSimpleHeaderLine
+                { "ID=<Description=\"ClinVar Variation ID\">",
+                        new VCFHeaderLine("ID", "<Description=\"ClinVar Variation ID\">") },
+		};
+	}
+
+	@Test(dataProvider="otherHeaderLines")
+	public void testGetOtherHeaderLine(final String headerLineString, final VCFHeaderLine headerLine) {
+		Assert.assertEquals(new VCFCodec().getOtherHeaderLine(headerLineString, VCFHeaderVersion.VCF4_2), headerLine);
+	}
+
+	@DataProvider(name="badOtherHeaderLines")
+	public Object[][] badOtherHeaderLines() {
+		return new Object[][] {
+				{ "=" },
+				{ "=<" },
+                { "=<>" },
+                { "key" },
+		};
+	}
+
+	@Test(dataProvider="badOtherHeaderLines", expectedExceptions=TribbleException.InvalidHeader.class)
+	public void testBadOtherHeaderLine(final String headerLineString) {
+		Assert.assertNull(new VCFCodec().getOtherHeaderLine(headerLineString, VCFHeaderVersion.VCF4_2));
+	}
 
     @Test
     public void testGLnotOverridePL() {
diff --git a/src/test/java/htsjdk/variant/vcf/VCFAltHeaderLineUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFAltHeaderLineUnitTest.java
new file mode 100644
index 0000000000..ed6a1d2b96
--- /dev/null
+++ b/src/test/java/htsjdk/variant/vcf/VCFAltHeaderLineUnitTest.java
@@ -0,0 +1,43 @@
+package htsjdk.variant.vcf;
+
+import htsjdk.HtsjdkTest;
+import htsjdk.tribble.TribbleException;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public class VCFAltHeaderLineUnitTest extends HtsjdkTest {
+
+    @DataProvider(name = "allowedVCFVersions")
+    public Object[][] allowedVCFVersions() {
+        return new Object[][]{
+                {VCFHeaderVersion.VCF4_0},
+                {VCFHeaderVersion.VCF4_1},
+                {VCFHeaderVersion.VCF4_2},
+                {VCFHeaderVersion.VCF4_3}
+        };
+    }
+
+    @DataProvider(name = "rejectedVCFVersions")
+    public Object[][] rejectedVCFVersions() {
+        return new Object[][]{
+                {VCFHeaderVersion.VCF3_2},
+                {VCFHeaderVersion.VCF3_3},
+        };
+    }
+
+    private static final String ALT_STRING = "<ID=id,Description=\"desc\">";
+
+    @Test(dataProvider="allowedVCFVersions")
+    public void testAllowedVersions(final VCFHeaderVersion vcfAllowedVersion) {
+        final VCFAltHeaderLine vcfLine = new VCFAltHeaderLine(ALT_STRING, vcfAllowedVersion);
+        Assert.assertEquals("id", vcfLine.getID());
+        Assert.assertEquals("desc", vcfLine.getGenericFieldValue(VCFSimpleHeaderLine.DESCRIPTION_ATTRIBUTE));
+    }
+
+    @Test(dataProvider="rejectedVCFVersions",expectedExceptions=TribbleException.class)
+    public void testRejectedVersions(final VCFHeaderVersion vcfAllowedVersion) {
+        new VCFAltHeaderLine(ALT_STRING, vcfAllowedVersion);
+    }
+
+}
diff --git a/src/test/java/htsjdk/variant/vcf/VCFCodec43FeaturesTest.java b/src/test/java/htsjdk/variant/vcf/VCFCodec43FeaturesTest.java
index cbc027ab5d..8dbf6dd30d 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFCodec43FeaturesTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFCodec43FeaturesTest.java
@@ -43,23 +43,31 @@ public class VCFCodec43FeaturesTest extends VariantBaseTest {
     private Object[][] allVCF43Files() {
         return new Object[][] {
                 // a .vcf, .vcf.gz, .vcf with UTF8 chars, and .vcf.gz with UTF8 chars
-                { TEST_43_FILE },
-                { TEST_43_UTF8_FILE },
-                { TEST_43_GZ_FILE },
-                { TEST_43_UTF8_GZ_FILE }
+
+                // these first two files have a duplicate INFO header line in them that differ
+                // from each other only by virtue of having different descriptions:
+                //WARNING	2021-02-23 15:37:13	VCFMetaDataLines	Attempt to add header line (INFO=<ID=DP,Number=1,
+                // Type=Integer,Description="Total Depth">) collides with existing line header line (INFO=<ID=DP,
+                // Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">).
+                // The existing line will be retained
+                { TEST_43_FILE, 69 },
+                { TEST_43_UTF8_FILE, 69 },
+
+                { TEST_43_GZ_FILE, 70 },
+                { TEST_43_UTF8_GZ_FILE, 70 }
         };
     }
 
     @Test(dataProvider="all43Files")
-    public void testReadAllVCF43Features(final Path testFile) {
+    public void testReadAllVCF43Features(final Path testFile, int expectedHeaderLineCount) {
         final Tuple<VCFHeader, List<VariantContext>> entireVCF = readEntireVCFIntoMemory(testFile);
 
-        Assert.assertEquals(entireVCF.a.getMetaDataInInputOrder().size(), 70);
+        Assert.assertEquals(entireVCF.a.getMetaDataInInputOrder().size(), expectedHeaderLineCount);
         Assert.assertEquals(entireVCF.b.size(), 25);
     }
 
     @Test(dataProvider="all43Files")
-    public void testVCF43SampleLine(final Path testFile) {
+    public void testVCF43SampleLine(final Path testFile, int ignored) {
         // ##SAMPLE=<ID=NA19238,Assay=WholeGenome,Ethnicity=AFR,Disease=None,Description="Test NA19238 SAMPLE header line",
         // DOI=http://someurl,ExtraSampleField="extra sample">
         final VCFSampleHeaderLine sampleLine = getHeaderLineFromTestFile(
@@ -77,7 +85,7 @@ public void testVCF43SampleLine(final Path testFile) {
     }
 
     @Test(dataProvider="all43Files")
-    public void testVCF43AltLine(final Path testFile) {
+    public void testVCF43AltLine(final Path testFile, int ignored) {
         // ##ALT=<ID=DEL,Description="Deletion",ExtraAltField="extra alt">
         final VCFAltHeaderLine altLine = getHeaderLineFromTestFile(
                 testFile,
@@ -90,7 +98,7 @@ public void testVCF43AltLine(final Path testFile) {
     }
 
     @Test(dataProvider="all43Files")
-    public void testVCF43PedigreeLine(final Path testFile) {
+    public void testVCF43PedigreeLine(final Path testFile, int ignored) {
         // ##PEDIGREE=<ID=ChildID,Father=FatherID,Mother=MotherID,ExtraPedigreeField="extra pedigree">
         final VCFPedigreeHeaderLine pedigreeLine = getHeaderLineFromTestFile(
                 testFile,
@@ -116,7 +124,7 @@ public void testV43PedigreeParsing() {
     }
 
     @Test(dataProvider="all43Files")
-    public void testVCF43MetaLine(final Path testFile) {
+    public void testVCF43MetaLine(final Path testFile, int ignored) {
         // ##META=<ID=Assay,Type=String,Number=.,Values=[WholeGenome or Exome],ExtraMetaField="extra meta">
         final VCFMetaHeaderLine metaLine = getHeaderLineFromTestFile(
                 testFile,
@@ -129,7 +137,7 @@ public void testVCF43MetaLine(final Path testFile) {
     }
 
     @Test(dataProvider="all43Files")
-    public void testVCF43PercentEncoding(final Path testFile) {
+    public void testVCF43PercentEncoding(final Path testFile, int ignored) {
         final Tuple<VCFHeader, List<VariantContext>> entireVCF = readEntireVCFIntoMemory(testFile);
 
         // 1       327     .       T       <*>     666.18  GATK_STANDARD;HARD_TO_VALIDATE
@@ -142,7 +150,7 @@ public void testVCF43PercentEncoding(final Path testFile) {
     }
 
     @Test(dataProvider="all43Files")
-    public void testSymbolicAlternateAllele(final Path testFile) {
+    public void testSymbolicAlternateAllele(final Path testFile, int ignored) {
         final Tuple<VCFHeader, List<VariantContext>> entireVCF = readEntireVCFIntoMemory(testFile);
 
         // 1       327     .       T       <*>     666.18  GATK_STANDARD;HARD_TO_VALIDATE
@@ -241,7 +249,7 @@ public void testVCF43PercentEncodingWithUTF8() {
 
     // given a vcf file, extract a header line with the given key and ID, cast to the target
     // header line type (T) via the transformer function
-    private static <T extends VCFIDHeaderLine> T getHeaderLineFromTestFile(
+    private static <T extends VCFSimpleHeaderLine> T getHeaderLineFromTestFile(
             final Path testVCFFile,
             final String key,
             final String ID,
diff --git a/src/test/java/htsjdk/variant/vcf/VCFCompoundHeaderLineUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFCompoundHeaderLineUnitTest.java
index f94435a833..96924b4e3a 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFCompoundHeaderLineUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFCompoundHeaderLineUnitTest.java
@@ -25,22 +25,245 @@
 
 package htsjdk.variant.vcf;
 
+import htsjdk.tribble.TribbleException;
 import htsjdk.variant.VariantBaseTest;
 import org.testng.Assert;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotNull;
 
 /**
- * User: ebanks
- * Date: Apr 2, 2014
+ * Tests for VCFCompoundHeaderLine.
+ *
+ * NOTE: This class uses VCFInfoHeaderLine instances to test shared VCFCompoundHeaderLine functionality since
+ * VCFCompoundHeaderLine abstract.
  */
 public class VCFCompoundHeaderLineUnitTest extends VariantBaseTest {
 
+    @DataProvider (name = "badOrMissingAttributes")
+    public Object[][] getMissingAttributes() {
+        return new Object[][] {
+                {"<ID=FOO,Number=A,Description=\"foo\">"},                  // no Type
+                {"<ID=FOO,Number=A,Description=\"foo\">"},                  // no Type
+                {"<ID=FOO,Type=Float,Description=\"foo\",Version=3>"},      // no Number
+                {"<ID=FOO,Number=unknown,Type=Float,Description=\"foo\">"}, // bogus Type
+                {"<ID=FOO,Number=A,Type=unknown,Description=\"foo\">"},     // bogus Number
+        };
+    }
+
+    @Test(dataProvider= "badOrMissingAttributes", expectedExceptions=TribbleException.class)
+    public void testBadOrMissingAttributes(final String lineString) {
+        new VCFInfoHeaderLine(lineString, VCFHeader.DEFAULT_VCF_VERSION);
+    }
+
+    @DataProvider (name = "acceptedAttributes")
+    public Object[][] getAcceptedAttributes() {
+        return new Object[][] {
+                {"<ID=FOO,Number=A,Type=Float,Description=\"foo\">", "Description", "foo"},
+                //next two cases from https://github.com/samtools/htsjdk/issues/517
+                {"<ID=FOO,Number=A,Type=Float,Description=\"foo\",Version=3>", "Version", "3"},
+                {"<ID=FOO,Number=R,Type=Float,Description=\"foo\",Source=\"mySource\">", "Source", "mySource"},
+        };
+    }
+
+    @Test(dataProvider= "acceptedAttributes")
+    public void testAcceptedAttributes(final String lineString, final String attribute, final String expectedValue) {
+        final VCFCompoundHeaderLine headerline = new VCFInfoHeaderLine(lineString, VCFHeader.DEFAULT_VCF_VERSION);
+        Assert.assertEquals(headerline.getGenericFieldValue(attribute), expectedValue);
+    }
+
+    @DataProvider (name = "invalidIDs")
+    public Object[][] getInvalidLines() {
+        return new Object[][] {
+            // ID cannot start with number
+            {"<ID=1A,Number=A,Type=Integer,Description=\"foo\">"},
+            // ID cannot start with '.''
+            {"<ID=.A,Number=A,Type=Integer,Description=\"foo\">"},
+            // Test that IDs with the special thousand genomes key as a prefix are rejected
+            // The thousand genomes key is only accepted for VCFInfoHeaderLine and is tested in VCFInfoHeaderLineUnitTest
+            {"<ID=1000GA,Number=A,Type=Integer,Description=\"foo\">"},
+            // Contains invalid character '&'
+            {"<ID=A&,Number=A,Type=Integer,Description=\"foo\">"},
+        };
+    }
+
+    @Test(dataProvider = "invalidIDs", expectedExceptions = TribbleException.VersionValidationFailure.class)
+    public void testGetValidationError(final String lineString) {
+        // TODO change to VCFHeader.DEFAULT_VCF_VERSION
+        new VCFInfoHeaderLine(lineString, VCFHeaderVersion.VCF4_3);
+    }
+
+    @DataProvider (name = "headerLineTypes")
+    public Object[][] getHeaderLineTypes() {
+        return new Object[][] {
+                {"<ID=FOO,Number=A,Type=Float,Description=\"foo\">", VCFHeaderLineType.Float},
+                {"<ID=FOO,Number=A,Type=Integer,Description=\"foo\">", VCFHeaderLineType.Integer},
+                {"<ID=FOO,Number=A,Type=String,Description=\"foo\">", VCFHeaderLineType.String},
+                {"<ID=FOO,Number=A,Type=Character,Description=\"foo\">", VCFHeaderLineType.Character},
+                // Number must be 0 for flag type
+                {"<ID=FOO,Number=0,Type=Flag,Description=\"foo\">", VCFHeaderLineType.Flag},
+        };
+    }
+
+    @Test(dataProvider = "headerLineTypes")
+    public void testGetType(final String lineString, final VCFHeaderLineType expectedType) {
+        final VCFCompoundHeaderLine headerline = new VCFInfoHeaderLine(lineString, VCFHeader.DEFAULT_VCF_VERSION);
+        Assert.assertEquals(headerline.getType(), expectedType);
+    }
+
+    @DataProvider (name = "headerLineCountTypes")
+    public Object[][] getLineCountTypes() {
+        return new Object[][] {
+                {"<ID=FOO,Number=A,Type=Float,Description=\"foo\">", VCFHeaderLineCount.A},
+                {"<ID=FOO,Number=R,Type=Integer,Description=\"foo\">", VCFHeaderLineCount.R},
+                {"<ID=FOO,Number=G,Type=String,Description=\"foo\">", VCFHeaderLineCount.G},
+                {"<ID=FOO,Number=127,Type=Character,Description=\"foo\">", VCFHeaderLineCount.INTEGER},
+                {"<ID=FOO,Number=.,Type=Integer,Description=\"foo\">", VCFHeaderLineCount.UNBOUNDED},
+        };
+    }
+
+    @Test(dataProvider= "headerLineCountTypes")
+    public void testGetLineCountType(final String lineString, final VCFHeaderLineCount expectedCountType) {
+        final VCFCompoundHeaderLine headerline = new VCFInfoHeaderLine(lineString, VCFHeader.DEFAULT_VCF_VERSION);
+        Assert.assertEquals(headerline.getCountType(), expectedCountType);
+        Assert.assertEquals(headerline.isFixedCount(), expectedCountType == VCFHeaderLineCount.INTEGER);
+    }
+
+    @Test(expectedExceptions=TribbleException.class)
+    public void testRejectIntegerTypeWithNegativeCount() {
+        new VCFInfoHeaderLine("<ID=FOO,Number=-1,Type=Integer,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION);
+    }
+
     @Test
-    public void supportsVersionFields() {
-        final String line = "<ID=FOO,Number=1,Type=Float,Description=\"foo\",Version=3>";
-        new VCFInfoHeaderLine(line, VCFHeaderVersion.VCF4_2);
-        // if we don't support version fields then we should fail before we ever get here
-        Assert.assertTrue(true);
+    public void testRepairFlagTypeWithNegativeCount() {
+        final VCFInfoHeaderLine infoLine = new VCFInfoHeaderLine("<ID=FOO,Number=-1,Type=Flag,Description=\"foo\">",
+                VCFHeader.DEFAULT_VCF_VERSION);
+        Assert.assertEquals(infoLine.getCount(), 0);
     }
+
+    @DataProvider (name = "equalsData")
+    public Object[][] getEqualsData() {
+        return new Object[][] {
+                //pos
+                {"<ID=FOO,Number=A,Type=Float,Description=\"foo\">",
+                        "<ID=FOO,Number=A,Type=Float,Description=\"foo\">", true},
+                {"<ID=FOO,Number=R,Type=Integer,Description=\"foo\">",
+                        "<ID=FOO,Number=R,Type=Integer,Description=\"foo\">", true},
+                {"<ID=FOO,Number=G,Type=String,Description=\"foo\">",
+                        "<ID=FOO,Number=G,Type=String,Description=\"foo\">", true},
+                {"<ID=FOO,Number=127,Type=Character,Description=\"foo\">",
+                        "<ID=FOO,Number=127,Type=Character,Description=\"foo\">", true},
+                {"<ID=FOO,Number=.,Type=Integer,Description=\"foo\",Source=source>",
+                        "<ID=FOO,Number=.,Type=Integer,Description=\"foo\",Source=source>", true},
+
+                //neg
+                {"<ID=FOO1,Number=A,Type=Float,Description=\"foo\">",
+                 "<ID=FOO2,Number=A,Type=Float,Description=\"foo\">", false},      // different ID
+                {"<ID=FOO,Number=R,Type=Integer,Description=\"foo\">",
+                "<ID=FOO,Number=R,Type=Float,Description=\"foo\">", false},        // different Type
+                {"<ID=FOO,Number=A,Type=Float,Description=\"foo\">",
+                 "<ID=FOO,Number=R,Type=Float,Description=\"foo\">", false},       // different Number
+                {"<ID=FOO,Number=127,Type=Character,Description=\"foo\">",
+                 "<ID=FOO,Number=119,Type=Character,Description=\"foo\">", false}, // different integer Number
+                {"<ID=FOO,Number=G,Type=String,Description=\"foo\">",
+                 "<ID=FOO,Number=G,Type=String,Description=\"foobar\">", false},   // different description
+                {"<ID=FOO,Number=.,Type=Integer,Description=\"foo\",Source=source>",
+                 "<ID=FOO,Number=.,Type=Integer,Description=\"foo\",", false},     // different extra attributes
+        };
+    }
+
+    @Test(dataProvider= "equalsData")
+    public void testEquals(final String line1, final String line2, final boolean expectedEquals) {
+        final VCFCompoundHeaderLine headerLine1 = new VCFInfoHeaderLine(line1, VCFHeader.DEFAULT_VCF_VERSION);
+        final VCFCompoundHeaderLine headerLine2 = new VCFInfoHeaderLine(line2, VCFHeader.DEFAULT_VCF_VERSION);
+        Assert.assertEquals(headerLine1.equals(headerLine2), expectedEquals);
+    }
+
+    @DataProvider(name = "mergeCompatibleInfoLines")
+    public Object[][] getMergeCompatibleInfoLines() {
+        return new Object[][]{
+                {
+                        new VCFInfoHeaderLine("<ID=FOO,Number=A,Type=Float,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("<ID=FOO,Number=A,Type=Integer,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("<ID=FOO,Number=A,Type=Float,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION),  // merged result, promote to float
+                },
+                {
+                        new VCFInfoHeaderLine("<ID=FOO,Number=A,Type=Integer,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("<ID=FOO,Number=A,Type=Float,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("<ID=FOO,Number=A,Type=Float,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION)  // merged result, promote to float
+                },
+                {
+                        new VCFInfoHeaderLine("<ID=FOO,Number=A,Type=Integer,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("<ID=FOO,Number=G,Type=Integer,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("<ID=FOO,Number=.,Type=Integer,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION)  // merged result, resolve as new unbounded
+                },
+        };
+    }
+
+    @Test(dataProvider = "mergeCompatibleInfoLines")
+    public void testMergeIncompatibleInfoLines(final VCFInfoHeaderLine line1, final VCFInfoHeaderLine line2, final VCFInfoHeaderLine expectedLine) {
+        VCFCompoundHeaderLine mergedLine = VCFCompoundHeaderLine.getMergedCompoundHeaderLine(
+                line1,
+                line2,
+                new VCFHeaderMerger.HeaderMergeConflictWarnings(false),
+                (l1, l2) -> new VCFInfoHeaderLine(
+                        l1.getID(),
+                        VCFHeaderLineCount.UNBOUNDED,
+                        l1.getType(),
+                        l1.getDescription())
+        );
+        Assert.assertEquals(mergedLine, expectedLine);
+    }
+
+    @DataProvider(name = "mergeIncompatibleInfoLines")
+    public Object[][] getMergeIncompatibleInfoLines() {
+        return new Object[][]{
+                {
+                        new VCFInfoHeaderLine("<ID=FOO,Number=A,Type=Integer,Description=\"foo\">",VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("<ID=FOO,Number=0,Type=Flag,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION),
+                },
+                {
+                        new VCFInfoHeaderLine("<ID=FOO,Number=A,Type=String,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("<ID=FOO,Number=37,Type=Integer,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION),
+                },
+        };
+    }
+
+    @Test(dataProvider = "mergeIncompatibleInfoLines", expectedExceptions=TribbleException.class)
+    public void testMergeIncompatibleInfoLines(final VCFInfoHeaderLine line1, final VCFInfoHeaderLine line2) {
+        VCFCompoundHeaderLine.getMergedCompoundHeaderLine(
+                line1,
+                line2,
+                new VCFHeaderMerger.HeaderMergeConflictWarnings(false),
+                (l1, l2) -> { throw new IllegalArgumentException("lambda should never execute - this exception should never be thrown"); }
+        );
+    }
+
+    @Test
+    public void testEncodeWithUnescapedQuotes() {
+
+        VCFFilterHeaderLine unescapedFilterLine = new VCFFilterHeaderLine(
+                "aFilter",
+                "filterName=[ANNOTATION] filterExpression=[ANNOTATION == \"NA\" || ANNOTATION <= 2.0]");
+
+        final String encodedAttributes = unescapedFilterLine.toStringEncoding();
+        assertNotNull(encodedAttributes);
+
+        final String expectedEncoding = "FILTER=<ID=aFilter,Description=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\">";
+        assertEquals(encodedAttributes, expectedEncoding);
+    }
+
+    @Test
+    public void testEncodeWithEscapedQuotes() {
+
+        VCFFilterHeaderLine escapedFilterLine = new VCFFilterHeaderLine("aFilter", "filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]");
+        final String encodedAttributes = escapedFilterLine.toStringEncoding();
+        assertNotNull(encodedAttributes);
+
+        final String expectedEncoding = "FILTER=<ID=aFilter,Description=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\">";
+        assertEquals(encodedAttributes, expectedEncoding);
+    }
+
 }
diff --git a/src/test/java/htsjdk/variant/vcf/VCFContigHeaderLineUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFContigHeaderLineUnitTest.java
new file mode 100644
index 0000000000..ad33575bef
--- /dev/null
+++ b/src/test/java/htsjdk/variant/vcf/VCFContigHeaderLineUnitTest.java
@@ -0,0 +1,184 @@
+package htsjdk.variant.vcf;
+
+import htsjdk.HtsjdkTest;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.tribble.TribbleException;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.TreeSet;
+
+public class VCFContigHeaderLineUnitTest extends HtsjdkTest {
+
+    @DataProvider(name = "allowedIDs")
+    public Object[][] getAllowedIDs() {
+        return new Object[][]{
+                {"<ID=1>",                              "1"},
+                {"<ID=10>",                             "10"},
+                {"<ID=X>",                              "X"},
+                {"<ID=Y>",                              "Y"},
+                {"<ID=MT>",                             "MT"},
+                {"<ID=NC_007605>",                      "NC_007605"},
+                {"<ID=GL000191.1>",                     "GL000191.1"},
+                {"<ID=HLA-A*01:01:01:01,length=100>",   "HLA-A*01:01:01:01"}, //https://github.com/samtools/hts-specs/issues/124
+       };
+    }
+
+    @Test(dataProvider= "allowedIDs")
+    public void testAllowedIDs(final String lineString, final String expectedIDString) {
+        final VCFContigHeaderLine headerline = new VCFContigHeaderLine(lineString, VCFHeader.DEFAULT_VCF_VERSION, 0);
+        Assert.assertEquals(headerline.getID(), expectedIDString);
+    }
+
+    @DataProvider(name = "invalidIDs")
+    public Object[][] getInvalidIDs() {
+        return new Object[][]{
+            // IDs cannot start with '*'
+            {"<ID=*a>"},
+            // IDs cannot start with '='
+            // The parser cannot handle attributes starting with '=' so we cannot express this test case
+            // {"<ID==a>"},
+            // IDs cannot contain '{'
+            {"<ID=1{>"},
+        };
+    }
+
+    @Test(dataProvider = "invalidIDs", expectedExceptions = TribbleException.VersionValidationFailure.class)
+    public void testInvalidIDs(final String lineString) {
+        // TODO change to VCFHeader.DEFAULT_VCF_VERSION
+        new VCFContigHeaderLine(lineString, VCFHeaderVersion.VCF4_3, 1);
+    }
+
+    @Test(expectedExceptions=TribbleException.class)
+    public void testRejectNegativeIndex() {
+        new VCFContigHeaderLine("<ID=contig1,length=100>", VCFHeader.DEFAULT_VCF_VERSION, -1);
+    }
+
+    @DataProvider(name = "allowedAttributes")
+    public Object[][] getAllowedAttributes() {
+        return new Object[][] {
+                {"<ID=contig1>", "ID", "contig1"},  // https://github.com/samtools/htsjdk/issues/389 (no length)
+                {"<ID=contig1,length=100>", "length", "100"},
+                {"<ID=contig1,length=100,taxonomy=\"Homo sapiens\">", "taxonomy", "Homo sapiens"},
+                {"<ID=contig1,length=100,assembly=b37>", "assembly", "b37"},
+                {"<ID=contig1,length=100,assembly=b37,md5=1a258fe76dfc8abd926f81f0e9b82ed7>", "md5", "1a258fe76dfc8abd926f81f0e9b82ed7"},
+                {"<ID=contig1,length=100,assembly=b37,md5=1a258fe76dfc8abd926f81f0e9b82ed7,URL=http://www.refserve.org:8080/path/>",
+                        "URL", "http://www.refserve.org:8080/path/"},
+                {"<ID=contig1,length=100,assembly=b37,md5=1a258fe76dfc8abd926f81f0e9b82ed7,URL=http://www.refserve.org:8080/path/,species=\"Homo sapiens\">",
+                        "species", "Homo sapiens"},
+        };
+    }
+
+    @Test(dataProvider= "allowedAttributes")
+    public void testAllowedAttributes(final String lineString, final String attribute, final String expectedValue) {
+        final VCFContigHeaderLine headerline = new VCFContigHeaderLine(lineString, VCFHeader.DEFAULT_VCF_VERSION, 0);
+        Assert.assertEquals(headerline.getGenericFieldValue(attribute), expectedValue);
+    }
+
+    @Test
+    public void testRoundTripThroughSequenceRecord() {
+        final VCFContigHeaderLine contigLine = new VCFContigHeaderLine(
+                "<ID=contig1,length=100,assembly=b37,md5=1a258fe76dfc8abd926f81f0e9b82ed7,URL=http://www.refserve.org:8080/path/,species=\"Homo sapiens\">",
+                VCFHeader.DEFAULT_VCF_VERSION,
+                0);
+
+        final String lengthString = "100";
+        final String assemblyString = "b37";
+        final String md5String = "1a258fe76dfc8abd926f81f0e9b82ed7";
+        final String URLString = "http://www.refserve.org:8080/path/";
+        final String speciesString = "Homo sapiens";
+
+        final SAMSequenceRecord sequenceRecord = contigLine.getSAMSequenceRecord();
+
+        Assert.assertEquals(Integer.toString(sequenceRecord.getSequenceLength()), lengthString);
+        Assert.assertEquals(contigLine.getGenericFieldValue(VCFContigHeaderLine.LENGTH_ATTRIBUTE), lengthString);
+
+        Assert.assertEquals(sequenceRecord.getAssembly(), assemblyString);
+        Assert.assertEquals(contigLine.getGenericFieldValue(VCFContigHeaderLine.ASSEMBLY_ATTRIBUTE), assemblyString);
+
+        Assert.assertEquals(sequenceRecord.getMd5(), md5String);
+        Assert.assertEquals(contigLine.getGenericFieldValue(VCFContigHeaderLine.MD5_ATTRIBUTE), md5String);
+
+        Assert.assertEquals(sequenceRecord.getAttribute(SAMSequenceRecord.URI_TAG), URLString);
+        Assert.assertEquals(contigLine.getGenericFieldValue(VCFContigHeaderLine.URL_ATTRIBUTE), URLString);
+
+        Assert.assertEquals(sequenceRecord.getAttribute(SAMSequenceRecord.SPECIES_TAG), speciesString);
+        Assert.assertEquals(contigLine.getGenericFieldValue(VCFContigHeaderLine.SPECIES_ATTRIBUTE), speciesString);
+
+        // now turn the SAMSequenceRecord back into a contig line, and compare the result to the
+        // original contig line
+        Assert.assertEquals(
+                new VCFContigHeaderLine(sequenceRecord, assemblyString),
+                contigLine);
+    }
+
+    @DataProvider (name = "hashEqualsCompareData")
+    public Object[][] getHashEqualsCompareData() {
+        return new Object[][] {
+
+                // For contig lines, equals and hash depend on the id, all other attributes, and the contig index,
+                // but compareTo only cares about the index.
+
+                // line, index, line, line, index  -> expected hash equals, expected equals, expected compare,
+                {"<ID=chr1>", 0,    "<ID=chr1>", 0,             true,           true,           0  },   // identical
+                {"<ID=chr1>", 0,    "<ID=chr1>", 1,             false,          false,          -1 },   // identical except contig index
+                {"<ID=chr1>", 1,    "<ID=chr1>", 0,             false,          false,          1  },   // identical except contig index
+
+                {"<ID=chr1, length=10>", 0,    "<ID=chr1>", 0,  false,          false,          0  },   // identical except attributes
+                {"<ID=chr1, length=10>", 0,    "<ID=chr1>", 1,  false,          false,         -1  },   // different attributes, different index
+
+                {"<ID=chr1>", 0,    "<ID=chr2>", 0,             false,          false,          0  },   // identical except ID
+                // different ID, same attributes and index, -> not equal, different hash, compare==0
+                {"<ID=chr1>", 0,    "<ID=chr2,length=10>", 0,   false,          false,          0  },   // different ID, attributes, same index
+        };
+    }
+
+    @Test(dataProvider = "hashEqualsCompareData")
+    public void testHashEqualsCompare(
+            final String line1,
+            final int index1,
+            final String line2,
+            final int index2,
+            final boolean expectedHashEquals,
+            final boolean expectedEquals,
+            final int expectedCompare)
+    {
+        final VCFContigHeaderLine headerLine1 = new VCFContigHeaderLine(line1, VCFHeader.DEFAULT_VCF_VERSION, index1);
+        final VCFContigHeaderLine headerLine2 = new VCFContigHeaderLine(line2, VCFHeader.DEFAULT_VCF_VERSION, index2);
+
+        Assert.assertEquals(headerLine1.hashCode() == headerLine2.hashCode(), expectedHashEquals);
+        Assert.assertEquals(headerLine1.equals(headerLine2), expectedEquals);
+        Assert.assertEquals(headerLine1.compareTo(headerLine2), expectedCompare);
+    }
+
+    @Test
+    public void testSortOrder() {
+
+        final List<VCFContigHeaderLine> expectedLineOrder = new ArrayList<VCFContigHeaderLine>() {{
+            add(new VCFContigHeaderLine("<ID=1>", VCFHeader.DEFAULT_VCF_VERSION, 1));
+            add(new VCFContigHeaderLine("<ID=2>", VCFHeader.DEFAULT_VCF_VERSION, 2));
+            add(new VCFContigHeaderLine("<ID=10>", VCFHeader.DEFAULT_VCF_VERSION, 10));
+            add(new VCFContigHeaderLine("<ID=20>", VCFHeader.DEFAULT_VCF_VERSION, 20));
+        }};
+
+        final TreeSet<VCFContigHeaderLine> sortedLines = new TreeSet<>(
+                new ArrayList<VCFContigHeaderLine>() {{
+                    add(new VCFContigHeaderLine("<ID=20>", VCFHeader.DEFAULT_VCF_VERSION, 20));
+                    add(new VCFContigHeaderLine("<ID=10>", VCFHeader.DEFAULT_VCF_VERSION, 10));
+                    add(new VCFContigHeaderLine("<ID=1>", VCFHeader.DEFAULT_VCF_VERSION, 1));
+                    add(new VCFContigHeaderLine("<ID=2>", VCFHeader.DEFAULT_VCF_VERSION, 2));
+                }}
+        );
+
+        final Iterator<VCFContigHeaderLine> sortedIt = sortedLines.iterator();
+        for (final VCFContigHeaderLine cl : expectedLineOrder) {
+            Assert.assertTrue(sortedIt.hasNext());
+            Assert.assertEquals(cl, sortedIt.next());
+        }
+    }
+
+}
diff --git a/src/test/java/htsjdk/variant/vcf/VCFEncoderTest.java b/src/test/java/htsjdk/variant/vcf/VCFEncoderTest.java
index 547549aa81..f51589783b 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFEncoderTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFEncoderTest.java
@@ -148,6 +148,7 @@ public void testMissingFormatFields(final VCFEncoder encoder, final VariantConte
     private static Set<VCFHeaderLine> createSyntheticMetadata() {
         final Set<VCFHeaderLine> metaData = new TreeSet<>();
 
+        metaData.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
         metaData.add(new VCFContigHeaderLine(Collections.singletonMap("ID", "1"), 0));
 
         metaData.add(new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.String, "x"));
diff --git a/src/test/java/htsjdk/variant/vcf/VCFFormatHeaderLineUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFFormatHeaderLineUnitTest.java
new file mode 100644
index 0000000000..1e07ff9c2d
--- /dev/null
+++ b/src/test/java/htsjdk/variant/vcf/VCFFormatHeaderLineUnitTest.java
@@ -0,0 +1,19 @@
+package htsjdk.variant.vcf;
+
+import htsjdk.HtsjdkTest;
+import htsjdk.tribble.TribbleException;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Test conditions that are unique to FORMAT lines (not covered by VCFCompoundHeaderLineUnitTest).
+ */
+public class VCFFormatHeaderLineUnitTest extends HtsjdkTest {
+
+    // FORMAT lines aren't allowed to have type==Flag
+    @Test(expectedExceptions=TribbleException.class)
+    public void testRejectInfoLineWithFlagField() {
+        new VCFFormatHeaderLine("<ID=FOO,Number=0,Type=Flag,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION);
+    }
+
+}
diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderLineTranslatorUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderLineTranslatorUnitTest.java
index 73116f53f0..94859c8717 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFHeaderLineTranslatorUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderLineTranslatorUnitTest.java
@@ -102,12 +102,14 @@ private Object[][] getInvalidHeaderLines() {
         List<String> sourceVersion = Arrays.asList("Source", "Version");
         return new Object[][]{
                 // to parse, expected, recommended, error message
-                {"<Description=\"Y\",ID=X>", idDesc, none, "Tag Description in wrong order (was #1, expected #2)"},
-                {"<ID=X,Desc=\"Y\">", idDesc, none, "Unexpected tag Desc"},
-                {"<>", idDesc, none, "Unexpected tag  "},
-
-                {"<Source=\"source\",ID=X,Description=\"Y\">", idDesc, sourceVersion, "Recommended tag Source must be listed after all expected tags"},
-                {"<ID=X,Source=\"E\",Description=\"Y\">", idDesc, sourceVersion, "Recommended tag Source must be listed after all expected tags"}
+                {"<Description=\"Y\",ID=X>", idDesc, none, "Unexpected tag or tag order for tag \"Description\""},
+                {"<ID=X,Desc=\"Y\">", idDesc, none, "Unexpected tag or tag order for tag \"Desc\""},
+                {"<>", idDesc, none, "Unexpected tag or tag order for tag \"\""},
+
+                {"<Source=\"source\",ID=X,Description=\"Y\">", idDesc, sourceVersion,
+                        "Unexpected tag or tag order for tag \"Source\""},
+                {"<ID=X,Source=\"E\",Description=\"Y\">", idDesc, sourceVersion,
+                        "Unexpected tag or tag order for tag \"Source\""}
         };
     }
 
@@ -119,7 +121,7 @@ private static void callTranslator(final String line,
             VCFHeaderLineTranslator.parseLine(VCFHeaderVersion.VCF4_2, line, expectedTagOrder);
         }
         else {
-            VCFHeaderLineTranslator.parseLine(VCFHeaderVersion.VCF4_2, line, expectedTagOrder, recommendedTags);
+            VCFHeaderLineTranslator.parseLine(VCFHeaderVersion.VCF4_2, line, expectedTagOrder);
         }
     }
 
@@ -153,13 +155,4 @@ private Object[][] getVcfV3Versions() {
         };
     }
 
-    @Test(dataProvider = "vcfv3", expectedExceptions = TribbleException.class)
-    public void testVcfV3FailsRecommendedTags(final VCFHeaderVersion vcfVersion) {
-        VCFHeaderLineTranslator.parseLine(
-                vcfVersion,
-                "<ID=X,Description=\"Y\">",
-                Arrays.asList("ID"),
-                Arrays.asList("Description")
-        );
-    }
 }
diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderLineUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderLineUnitTest.java
index e04d3c69c8..d5d7e47ec9 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFHeaderLineUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderLineUnitTest.java
@@ -1,6 +1,9 @@
 package htsjdk.variant.vcf;
 
+import htsjdk.tribble.TribbleException;
 import htsjdk.variant.VariantBaseTest;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
 import java.util.LinkedHashMap;
@@ -9,46 +12,146 @@
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertNotNull;
 
+
 public class VCFHeaderLineUnitTest extends VariantBaseTest {
 
     @Test
     public void testEncodeVCFHeaderLineWithUnescapedQuotes() {
-
         final Map<String, String> attributes = new LinkedHashMap<>();
         attributes.put("ID", "VariantFiltration");
         attributes.put("CommandLineOptions", "filterName=[ANNOTATION] filterExpression=[ANNOTATION == \"NA\" || ANNOTATION <= 2.0]");
 
-        final String encodedAttributes = VCFHeaderLine.toStringEncoding(attributes);
+        final VCFSimpleHeaderLine simpleHeaderLine = new VCFSimpleHeaderLine("someKey", attributes);
+        final String encodedAttributes = simpleHeaderLine.toStringEncoding();
         assertNotNull(encodedAttributes);
 
-        final String expectedEncoding = "<ID=VariantFiltration,CommandLineOptions=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\">";
+        final String expectedEncoding = "someKey=<ID=VariantFiltration,CommandLineOptions=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\">";
         assertEquals(encodedAttributes, expectedEncoding);
     }
 
     @Test
     public void testEncodeVCFHeaderLineWithEscapedQuotes() {
-
         final Map<String, String> attributes = new LinkedHashMap<>();
         attributes.put("ID", "VariantFiltration");
         attributes.put("CommandLineOptions", "filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]");
 
-        final String encodedAttributes = VCFHeaderLine.toStringEncoding(attributes);
+        final VCFSimpleHeaderLine simpleHeaderLine = new VCFSimpleHeaderLine("someKey", attributes);
+        final String encodedAttributes = simpleHeaderLine.toStringEncoding();
         assertNotNull(encodedAttributes);
 
-        final String expectedEncoding = "<ID=VariantFiltration,CommandLineOptions=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\">";
+        final String expectedEncoding = "someKey=<ID=VariantFiltration,CommandLineOptions=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\">";
         assertEquals(encodedAttributes, expectedEncoding);
     }
 
-    @Test(expectedExceptions = { IllegalArgumentException.class }, expectedExceptionsMessageRegExp = "Invalid count number, with fixed count the number should be 1 or higher: .*")
-    public void testFormatNumberExeptions() {
+    @Test
+    public void testIsNotStructuredHeaderLine() {
+        VCFHeaderLine hl = new VCFHeaderLine("key", "value");
+        Assert.assertFalse(hl.isIDHeaderLine());
+        Assert.assertNull(hl.getID());
+    }
+
+    @Test
+    public void testStringEncoding() {
+        VCFHeaderLine hl = new VCFHeaderLine("key", "value");
+        Assert.assertEquals(hl.toStringEncoding(), "key=value");
+    }
+
+    @DataProvider(name = "headerLineEquals")
+    public Object[][] headerLineEquals() {
+        return new Object[][]{
+                {
+                        new VCFHeaderLine("key", "value"),
+                        new VCFHeaderLine("key", "value"),
+                        true
+                },
+                {
+                        new VCFHeaderLine("key", "value1"),
+                        new VCFHeaderLine("key", "value2"),
+                        false
+                },
+                {
+                        new VCFHeaderLine("key1", "value"),
+                        new VCFHeaderLine("key2", "value"),
+                        false
+                },
+                {
+                        new VCFHeaderLine("key1", "value1"),
+                        new VCFHeaderLine("key2", "value2"),
+                        false
+                }
+        };
+    }
+
+    @Test(dataProvider = "headerLineEquals")
+    public void testEquals(final VCFHeaderLine hl1, final VCFHeaderLine hl2, final boolean expectedEquals) {
+        Assert.assertEquals(hl1.equals(hl2), expectedEquals);
+    }
+
+    @DataProvider(name = "invalidHeaderLineKeys")
+    public Object[][] invalidHeaderLineKeys() {
+        return new Object[][]{
+                {null},
+                {"embedded<"},
+                {"embedded="}};
+    }
+
+    @Test(dataProvider = "invalidHeaderLineKeys", expectedExceptions=TribbleException.class)
+    public void testInvalidKeys(final String testKey) {
+        new VCFHeaderLine(testKey, "");
+    }
+
+    @Test(dataProvider = "invalidHeaderLineKeys", expectedExceptions=TribbleException.class)
+    public void testValidateAsIdInvalid(final String testKey) {
+        VCFHeaderLine.validateKeyOrID(testKey, "test");
+    }
+
+    @DataProvider(name = "vcfVersions")
+    public Object[][] vcfVersions() {
+        return new Object[][]{
+                {VCFHeaderVersion.VCF3_2},
+                {VCFHeaderVersion.VCF3_3},
+                {VCFHeaderVersion.VCF4_0},
+                {VCFHeaderVersion.VCF4_1},
+                {VCFHeaderVersion.VCF4_2},
+                {VCFHeaderVersion.VCF4_3}
+        };
+    }
+
+    @Test(dataProvider = "vcfVersions")
+    public void testValidateForVersion(final VCFHeaderVersion vcfVersion) {
+        VCFHeaderLine headerLine = new VCFHeaderLine(vcfVersion.getFormatString(), vcfVersion.getVersionString());
+        headerLine.validateForVersion(vcfVersion);
+    }
+
+    @DataProvider(name = "incompatibleVersions")
+    public Object[][] incompatibleVersionPairs() {
+        return new Object[][]{
+                // each pair just has to be different
+                {VCFHeaderVersion.VCF3_2, VCFHeaderVersion.VCF4_3},
+                {VCFHeaderVersion.VCF3_3, VCFHeaderVersion.VCF4_3},
+                {VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF4_3},
+                {VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF4_3},
+                {VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_3},
+                {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_2}
+        };
+    }
+
+    @Test(dataProvider="incompatibleVersions", expectedExceptions= TribbleException.VersionValidationFailure.class)
+    public void testValidateForVersionFails(final VCFHeaderVersion vcfVersion, final VCFHeaderVersion incompatibleVersion) {
+        VCFHeaderLine headerLine = new VCFHeaderLine(vcfVersion.getFormatString(), vcfVersion.getVersionString());
+        headerLine.validateForVersion(incompatibleVersion);
+    }
+
+    @Test(expectedExceptions = { TribbleException.InvalidHeader.class }, expectedExceptionsMessageRegExp = ".*For fixed count, the count number must be 1 or higher.")
+    public void testFormatNumberExceptions() {
         new VCFFormatHeaderLine("test",
                 0,
                 VCFHeaderLineType.Integer,
                 "");
     }
 
-    @Test(expectedExceptions = { IllegalArgumentException.class }, expectedExceptionsMessageRegExp = "Invalid count number, with fixed count the number should be 1 or higher: .*")
-    public void testInfoNumberExeptions() {
+    @Test(expectedExceptions = { TribbleException.InvalidHeader.class }, expectedExceptionsMessageRegExp = ".*For fixed count, the count number must be 1 or higher.")
+    public void testInfoNumberExceptions() {
         new VCFInfoHeaderLine("test",
                 0,
                 VCFHeaderLineType.Integer,
diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderMergerUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderMergerUnitTest.java
new file mode 100644
index 0000000000..1be8bdf085
--- /dev/null
+++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderMergerUnitTest.java
@@ -0,0 +1,554 @@
+package htsjdk.variant.vcf;
+
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.tribble.TribbleException;
+import htsjdk.variant.VariantBaseTest;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.IntStream;
+
+import static htsjdk.variant.vcf.VCFConstants.PEDIGREE_HEADER_KEY;
+
+public class VCFHeaderMergerUnitTest extends VariantBaseTest {
+
+    @DataProvider(name="mergeValidVersions")
+    public Object[][] getMergeValidVersions() {
+
+        // only v4.2+ headers can be merged, merge result version is always the highest version presented
+        return new Object[][] {
+                // headers to merge, expected result version
+                {Arrays.asList(VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_2), VCFHeaderVersion.VCF4_2},
+                {Arrays.asList(VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_3), VCFHeaderVersion.VCF4_3},
+                {Arrays.asList(VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_2), VCFHeaderVersion.VCF4_3},
+                {Arrays.asList(VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_3), VCFHeaderVersion.VCF4_3},
+                {Arrays.asList(VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_2), VCFHeaderVersion.VCF4_2},
+                {Arrays.asList(VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_3), VCFHeaderVersion.VCF4_3},
+                {Arrays.asList(VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_3), VCFHeaderVersion.VCF4_3},
+                {Arrays.asList(VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_2), VCFHeaderVersion.VCF4_3},
+                {Arrays.asList(VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_2), VCFHeaderVersion.VCF4_2 },
+                {Arrays.asList(VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_3), VCFHeaderVersion.VCF4_3},
+                {Arrays.asList(VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_2), VCFHeaderVersion.VCF4_3},
+                {Arrays.asList(VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_3), VCFHeaderVersion.VCF4_3},
+                {Arrays.asList(VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_3), VCFHeaderVersion.VCF4_3},
+        };
+    }
+
+    @DataProvider(name="mergeInvalidVersions")
+    public Object[][] getMergeInvalidVersions() {
+        // only v4.2+ headers can be merged
+        return new Object[][] {
+                {Arrays.asList(VCFHeaderVersion.VCF3_2, VCFHeaderVersion.VCF3_3)},
+                {Arrays.asList(VCFHeaderVersion.VCF3_2, VCFHeaderVersion.VCF4_0)},
+                {Arrays.asList(VCFHeaderVersion.VCF3_2, VCFHeaderVersion.VCF4_1)},
+                {Arrays.asList(VCFHeaderVersion.VCF3_2, VCFHeaderVersion.VCF4_2)},
+                {Arrays.asList(VCFHeaderVersion.VCF3_2, VCFHeaderVersion.VCF4_3)},
+
+                {Arrays.asList(VCFHeaderVersion.VCF3_3, VCFHeaderVersion.VCF3_2)},
+                {Arrays.asList(VCFHeaderVersion.VCF3_3, VCFHeaderVersion.VCF4_0)},
+                {Arrays.asList(VCFHeaderVersion.VCF3_3, VCFHeaderVersion.VCF4_1)},
+                {Arrays.asList(VCFHeaderVersion.VCF3_3, VCFHeaderVersion.VCF4_2)},
+                {Arrays.asList(VCFHeaderVersion.VCF3_3, VCFHeaderVersion.VCF4_3)},
+
+                {Arrays.asList(VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF3_2)},
+                {Arrays.asList(VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF3_3)},
+                {Arrays.asList(VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF4_1)},
+                {Arrays.asList(VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF4_2)},
+                {Arrays.asList(VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF4_3)},
+
+                {Arrays.asList(VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF3_2)},
+                {Arrays.asList(VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF3_3)},
+                {Arrays.asList(VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF4_0)},
+                {Arrays.asList(VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF4_2)},
+                {Arrays.asList(VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF4_3)},
+
+                {Arrays.asList(VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF3_2)},
+                {Arrays.asList(VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF3_3)},
+                {Arrays.asList(VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_0)},
+                {Arrays.asList(VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_1)},
+
+                {Arrays.asList(VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF3_2)},
+                {Arrays.asList(VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF3_3)},
+                {Arrays.asList(VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_0)},
+                {Arrays.asList(VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_1)},
+        };
+    }
+
+    @Test(dataProvider="mergeValidVersions")
+    public void testMergeValidVersions(final List<VCFHeaderVersion> headerVersions, final VCFHeaderVersion expectedVersion) {
+        // merge the headers, and then verify that the merged lines have the expected version by
+        // instantiating a VCFMetaDataLines instance to determine the resulting version
+        final Set<VCFHeaderLine> mergedHeaderLines = doHeaderMergeForVersions(headerVersions);
+        final VCFMetaDataLines metaDataLines = new VCFMetaDataLines();
+        metaDataLines.addMetaDataLines(mergedHeaderLines);
+        final VCFHeaderLine versionLine = metaDataLines.getFileFormatLine();
+        Assert.assertEquals(VCFHeaderVersion.toHeaderVersion(versionLine.getValue()), expectedVersion);
+
+        // now create a new header using the merged VersionLines, and make sure *it* has the expected version
+        final VCFHeader mergedHeader = new VCFHeader(mergedHeaderLines);
+        Assert.assertEquals(mergedHeader.getVCFHeaderVersion(), expectedVersion);
+
+        // also verify that all the header lines in the merged set are also in the resulting header
+        Assert.assertEquals(mergedHeader.getMetaDataInInputOrder(), mergedHeaderLines);
+    }
+
+    @Test(dataProvider="mergeInvalidVersions", expectedExceptions = TribbleException.class)
+    public void testMergeInvalidVersions(final List<VCFHeaderVersion> headerVersions) {
+        doHeaderMergeForVersions(headerVersions);
+    }
+
+    @Test(expectedExceptions = TribbleException.class)
+    public void testMergeWithValidationFailure() {
+        // test mixing header versions where the old version header has a line that fails validation
+        // using the resulting (newer) version
+
+        // create a 4.2 header with a 4.2 style pedigree line (one that has no ID)
+        final Set<VCFHeaderLine> oldHeaderLines = VCFHeader.makeHeaderVersionLineSet(VCFHeaderVersion.VCF4_2);
+        oldHeaderLines.add(new VCFHeaderLine(PEDIGREE_HEADER_KEY, "<Name_0=G0-ID,Name_1=G1-ID>"));
+        final VCFHeader oldHeader = new VCFHeader(oldHeaderLines);
+        Assert.assertEquals(oldHeader.getVCFHeaderVersion(), VCFHeaderVersion.VCF4_2);
+
+        // now create a simple 4.3 header; the merge should fail because the old PEDIGREE line isn't valid
+        // for 4.3 (for which pedigree lines mut have an ID)
+        final VCFHeader newHeader = new VCFHeader(VCFHeader.makeHeaderVersionLineSet(VCFHeaderVersion.VCF4_3));
+        Assert.assertEquals(newHeader.getVCFHeaderVersion(), VCFHeaderVersion.VCF4_3);
+
+        VCFHeaderMerger.getMergedHeaderLines(Arrays.asList(oldHeader, newHeader),true);
+    }
+
+    private Set<VCFHeaderLine> doHeaderMergeForVersions(final List<VCFHeaderVersion> headerVersions) {
+        // This is a somewhat sketchy way to write a test...for each header we create here, we're
+        // using the same fixed set of VCF42-conforming VCFHeader lines, and then we add a fileformat
+        // line with whatever VCFVersion the test calls for. Its conceivable that as time goes on
+        // and we add new versions, the VCFHeader constructor could throw if any of the lines don't
+        // conform to the requested version.
+        final List<VCFHeader> headerList = new ArrayList<>(headerVersions.size());
+        for (final VCFHeaderVersion version : headerVersions) {
+            final Set<VCFHeaderLine> metaDataSet = VCFHeaderUnitTestData.getV42HeaderLinesWITHOUTFormatString();
+            metaDataSet.add(VCFHeader.makeHeaderVersionLine(version));
+            final VCFHeader header = new VCFHeader(metaDataSet);
+            Assert.assertEquals(header.getVCFHeaderVersion(), version);
+            headerList.add(header);
+        }
+
+        return VCFUtils.smartMergeHeaders(headerList, false);
+    }
+
+    @DataProvider(name = "subsetHeaders")
+    public Iterator<Object[]> getSubsetHeaders() {
+        final List<VCFHeaderLine> headerLineList = new ArrayList<>(new VCFHeaderUnitTestData().getTestMetaDataLinesSet());
+        final Collection<Object[]> mergeTestCase = new ArrayList<>();
+        // For each header line in the list of test lines, create a test case consisting of a pair of headers,
+        // one of which is a header created with all of the lines, and one of which is a subset of the full header
+        // with one line removed. Skip the case where the line to be removed is a fileformat line, since thats
+        // required to create a header.
+        for (int i = 0; i < headerLineList.size(); i++) {
+            // take the header line set and remove the ith line, unless its a fileformat line, since if we remove
+            // that, then we won't be able to create a header using the resulting lines at all.
+            final VCFHeaderLine candidateLine = headerLineList.get(i);
+            if (!VCFHeaderVersion.isFormatString(candidateLine.getKey())) {
+                List<VCFHeaderLine> subsetList = new ArrayList<>(headerLineList);
+                subsetList.remove(i);
+                mergeTestCase.add(
+                        new Object[] {
+                                new VCFHeader(VCFHeaderUnitTestData.getTestMetaDataLinesSet()),
+                                new VCFHeader(new LinkedHashSet<>(subsetList))
+                        });
+            }
+        }
+
+        return mergeTestCase.iterator();
+    }
+
+    @Test(dataProvider = "subsetHeaders")
+    public void testMergeSubsetHeaders(
+            final VCFHeader fullHeader,
+            final VCFHeader subsetHeader)
+    {
+        final List<VCFHeader> headerList = new ArrayList<VCFHeader>() {{
+            add(fullHeader);
+            add(subsetHeader);
+            add(subsetHeader);
+        }};
+        Assert.assertEquals(
+                VCFHeaderMerger.getMergedHeaderLines(headerList, false),
+                fullHeader.getMetaDataInSortedOrder());
+
+        // now again, in the reverse order
+        final List<VCFHeader> reverseHeaderList = new ArrayList<VCFHeader>() {{
+            add(subsetHeader);
+            add(subsetHeader);
+            add(fullHeader);
+        }};
+        Assert.assertEquals(
+                VCFHeaderMerger.getMergedHeaderLines(reverseHeaderList, false),
+                fullHeader.getMetaDataInSortedOrder());
+    }
+
+    @Test
+    public void testDictionaryMergeDuplicateFile() {
+        final VCFHeader headerOne = new VCFFileReader(new File(variantTestDataRoot + "diagnosis_targets_testfile.vcf"), false).getFileHeader();
+        final VCFHeader headerTwo = new VCFHeader(headerOne); // deep copy
+        final List<String> sampleList = new ArrayList<>();
+        sampleList.addAll(headerOne.getSampleNamesInOrder());
+
+        // Check that the two dictionaries start out the same
+        headerOne.getSequenceDictionary().assertSameDictionary(headerTwo.getSequenceDictionary());
+
+        // Run the merge command
+        final VCFHeader mergedHeader = new VCFHeader(VCFHeaderMerger.getMergedHeaderLines(Arrays.asList(headerOne, headerTwo), false), sampleList);
+
+        // Check that the mergedHeader's sequence dictionary matches the first two
+        mergedHeader.getSequenceDictionary().assertSameDictionary(headerOne.getSequenceDictionary());
+    }
+
+    @DataProvider(name="dictionaryMergePositive")
+    private Object[][] getDictionaryMergePositive() {
+        return new Object[][] {
+                // input dictionary list, expected merged dictionary
+                {
+                    // one dictionary
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(1, 2))
+                        ),
+                        createTestSAMDictionary(1, 2)
+                },
+                {
+                    // two identical dictionaries
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(1, 2)),
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(1, 2))
+                        ),
+                        createTestSAMDictionary(1, 2)
+                },
+                {
+                    // three different subsets; superset first
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(1, 10)),
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(7, 2)),
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(3, 2))
+                        ),
+                        createTestSAMDictionary(1, 10)
+                },
+                {
+                    // three different subsets; superset second
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(7, 2)),
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(1, 10)),
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(3, 2))
+                        ),
+                        createTestSAMDictionary(1, 10)
+                },
+                {
+                    // three different subsets; superset third (requires the merge implementation to sort on dictionary size)
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(7, 2)),
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(3, 2)),
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(1, 10))
+                        ),
+                        createTestSAMDictionary(1, 10)
+                },
+                {
+                    // one non-null dictionary, one null
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(1, 2)),
+                                createTestVCFHeaderWithSAMDictionary(null)
+                        ),
+                        createTestSAMDictionary(1, 2)
+                },
+                {
+                    // one non-null dictionary, one null, in reverse direction
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(null),
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(1, 2))
+                        ),
+                        createTestSAMDictionary(1, 2)
+                },
+                {
+                    // three dictionaries: non-null, null, null
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(1, 2)),
+                                createTestVCFHeaderWithSAMDictionary(null),
+                                createTestVCFHeaderWithSAMDictionary(null)
+                        ),
+                        createTestSAMDictionary(1, 2)
+                },
+                {
+                    // three dictionaries: null, non-null, null
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(null),
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(1, 2)),
+                                createTestVCFHeaderWithSAMDictionary(null)
+                        ),
+                        createTestSAMDictionary(1, 2)
+                },
+                {
+                    // three dictionaries: null, null, non-null
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(null),
+                                createTestVCFHeaderWithSAMDictionary(null),
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(1, 2))
+                        ),
+                        createTestSAMDictionary(1, 2)
+                },
+                {
+                        // three dictionaries: non-null, null, non-null
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(1, 2)),
+                                createTestVCFHeaderWithSAMDictionary(null),
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(1, 2))
+                        ),
+                        createTestSAMDictionary(1, 2)
+                },
+                {
+                    // three dictionaries: subset, null, superset
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(1, 2)),
+                                createTestVCFHeaderWithSAMDictionary(null),
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(1, 10))
+                        ),
+                        createTestSAMDictionary(1, 10)
+                },
+                {
+                    // all null dictionaries
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(null),
+                                createTestVCFHeaderWithSAMDictionary(null)
+                        ),
+                        null
+                }
+        };
+    }
+
+    @Test(dataProvider = "dictionaryMergePositive")
+    private void testDictionaryMergePositive(
+            final List<VCFHeader> sourceHeaders, final SAMSequenceDictionary expectedDictionary) {
+        final Set<VCFHeaderLine> mergedHeaderLines = VCFHeaderMerger.getMergedHeaderLines(sourceHeaders, false);
+        final VCFHeader mergedHeader = new VCFHeader(mergedHeaderLines);
+        Assert.assertEquals(mergedHeader.getSequenceDictionary(), expectedDictionary);
+    }
+
+    @DataProvider(name="dictionaryMergeNegative")
+    private Object[][] getDictionaryMergeNegative() {
+        final SAMSequenceDictionary forwardDictionary = createTestSAMDictionary(1, 2);
+        final SAMSequenceDictionary reverseDictionary = createReverseDictionary(forwardDictionary);
+
+        return new Object[][] {
+                {
+                    // SequenceDictionaryCompatibility.NO_COMMON_CONTIGS
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(1, 2)),
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(5, 2)))
+                },
+                {
+                    // SequenceDictionaryCompatibility.OUT_OF_ORDER
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(forwardDictionary),
+                                createTestVCFHeaderWithSAMDictionary(reverseDictionary))
+                },
+                {
+                    // SequenceDictionaryCompatibility.UNEQUAL_COMMON_CONTIGS common subset has contigs that have the same name but different lengths
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(createDictionaryWithLengths(100)),
+                                createTestVCFHeaderWithSAMDictionary(createDictionaryWithLengths(200)))
+                },
+                {
+                    // SequenceDictionaryCompatibility.NON_CANONICAL_HUMAN_ORDER human reference detected but the order of the contigs is non-standard (lexicographic, for example)
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(createDictionaryInCanonicalHumanOrder()),
+                                createTestVCFHeaderWithSAMDictionary(createDictionaryInNonCanonicalHumanOrder()))
+                },
+                {
+                    // three mutually disjoint dictionaries, no superset
+                        Arrays.asList(
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(5, 2)),
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(4, 2)),
+                                createTestVCFHeaderWithSAMDictionary(createTestSAMDictionary(6, 2))
+                        )
+                },
+        };
+    }
+
+    @Test(dataProvider = "dictionaryMergeNegative", expectedExceptions = TribbleException.class)
+    private void testDictionaryMergeNegative(final List<VCFHeader> sourceHeaders) {
+        VCFHeaderMerger.getMergedHeaderLines(sourceHeaders, false);
+    }
+
+    @Test
+    final void testDuplicateNonStructuredKeys() {
+        // merge 2 headers, one has "##sample=foo", one has "##sample=bar", both should survive the merge
+        final VCFHeaderLine fooLine = new VCFHeaderLine("sample", "foo");
+        final Set<VCFHeaderLine> fooLines = VCFHeader.makeHeaderVersionLineSet(VCFHeader.DEFAULT_VCF_VERSION);
+        fooLines.add(fooLine);
+        final VCFHeader fooHeader = new VCFHeader(fooLines);
+
+        final VCFHeaderLine barLine = new VCFHeaderLine("sample", "bar");
+        final Set<VCFHeaderLine> barLines = VCFHeader.makeHeaderVersionLineSet(VCFHeader.DEFAULT_VCF_VERSION);
+        barLines.add(barLine);
+        final VCFHeader barHeader = new VCFHeader(barLines);
+
+        final Set<VCFHeaderLine> mergedLines = VCFHeaderMerger.getMergedHeaderLines(Arrays.asList(fooHeader, barHeader), false);
+        Assert.assertEquals(mergedLines.size(), 3);
+        Assert.assertTrue(mergedLines.contains(fooLine));
+        Assert.assertTrue(mergedLines.contains(barLine));
+    }
+
+    @DataProvider(name = "compatibleInfoLines")
+    public Object[][] getMergerData() {
+        return new Object[][]{
+                // 2 lines to merge, expected result
+                {
+                    // mixed number, promote to "."
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=1,Type=Float,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=A,Type=Float,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=.,Type=Float,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION),
+                        "AB"
+                },
+                {
+                    // mixed number type, promote to float
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=1,Type=Float,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=1,Type=Integer,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=1,Type=Float,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION),
+                        "AB"
+                },
+                {
+                        // mixed number type in reverse direction, promote to float
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=1,Type=Integer,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=1,Type=Float,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=1,Type=Float,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION),
+                        "AB"
+                },
+        };
+    }
+
+    @Test(dataProvider = "compatibleInfoLines")
+    public void testMergeCompatibleInfoLines(final VCFInfoHeaderLine line1, final VCFInfoHeaderLine line2, final VCFInfoHeaderLine expectedLine, final String id) {
+        final VCFHeader hdr1 = new VCFHeader(VCFHeader.makeHeaderVersionLineSet(VCFHeader.DEFAULT_VCF_VERSION), Collections.EMPTY_SET);
+        hdr1.addMetaDataLine(line1);
+
+        final VCFHeader hdr2 = new VCFHeader(VCFHeader.makeHeaderVersionLineSet(VCFHeader.DEFAULT_VCF_VERSION), Collections.EMPTY_SET);
+        hdr2.addMetaDataLine(line2);
+
+        final VCFHeader mergedHeader = new VCFHeader(VCFHeaderMerger.getMergedHeaderLines(Arrays.asList(hdr1, hdr2), true));
+        Assert.assertEquals(mergedHeader.getInfoHeaderLine(id), expectedLine);
+    }
+
+    @DataProvider(name = "mergeIncompatibleInfoLines")
+    public Object[][] getMergeIncompatibleInfoLines() {
+        return new Object[][]{
+                // 2 lines to merge, expected result
+                {
+                        // mixed number AND number type (multiple different attributes)
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=A,Type=Float,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=1,Type=Integer,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=.,Type=Float,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION),
+                        "AB"
+                },
+                {
+                        // mixed number AND number type  (multiple different attributes), reverse direction
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=1,Type=Integer,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=A,Type=Float,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=.,Type=Float,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION),
+                        "AB"
+                },
+        };
+    }
+
+    @Test(dataProvider = "mergeIncompatibleInfoLines", expectedExceptions=TribbleException.class)
+    public void testMergeIncompatibleInfoLines(final VCFInfoHeaderLine line1, final VCFInfoHeaderLine line2, final VCFInfoHeaderLine expectedLine, final String id) {
+        final VCFHeader hdr1 = new VCFHeader(VCFHeader.makeHeaderVersionLineSet(VCFHeader.DEFAULT_VCF_VERSION), Collections.EMPTY_SET);
+        hdr1.addMetaDataLine(line1);
+        final VCFHeader hdr2 = new VCFHeader(VCFHeader.makeHeaderVersionLineSet(VCFHeader.DEFAULT_VCF_VERSION), Collections.EMPTY_SET);
+        hdr2.addMetaDataLine(line2);
+        new VCFHeader(VCFHeaderMerger.getMergedHeaderLines(Arrays.asList(hdr1, hdr2), true));
+    }
+
+    private final SAMSequenceDictionary createTestSAMDictionary(final int startSequence, final int numSequences) {
+        final SAMSequenceDictionary samDictionary = new SAMSequenceDictionary();
+        IntStream.range(startSequence, startSequence + numSequences).forEachOrdered(
+                i -> samDictionary.addSequence(new SAMSequenceRecord(Integer.toString(i), i)));
+        return samDictionary;
+    }
+
+    private final VCFHeader createTestVCFHeaderWithSAMDictionary(final SAMSequenceDictionary samDictionary) {
+        final VCFHeader vcfHeader = createTestVCFHeader();
+        vcfHeader.setSequenceDictionary(samDictionary);
+        return vcfHeader;
+    }
+
+    private SAMSequenceDictionary createDictionaryInNonCanonicalHumanOrder() {
+        final List<SAMSequenceRecord> sequences = new ArrayList<>();
+        sequences.add(new SAMSequenceRecord("1", 100));
+        sequences.add(new SAMSequenceRecord("10", 100));
+        sequences.add(new SAMSequenceRecord("2", 100));
+        return new SAMSequenceDictionary(sequences);
+    }
+
+    private SAMSequenceDictionary createDictionaryInCanonicalHumanOrder() {
+        final List<SAMSequenceRecord> sequences = new ArrayList<>();
+        sequences.add(new SAMSequenceRecord("1", 100));
+        sequences.add(new SAMSequenceRecord("2", 100));
+        sequences.add(new SAMSequenceRecord("10", 100));
+        return new SAMSequenceDictionary(sequences);
+    }
+
+    private SAMSequenceDictionary createDictionaryWithLengths(final int length) {
+        final List<SAMSequenceRecord> sequences = new ArrayList<>();
+        sequences.add(new SAMSequenceRecord("1", length));
+        sequences.add(new SAMSequenceRecord("2", length));
+        sequences.add(new SAMSequenceRecord("3", length));
+        return new SAMSequenceDictionary(sequences);
+    }
+
+    private SAMSequenceDictionary createReverseDictionary(final SAMSequenceDictionary forwardDictionary){
+        // its not sufficient to reuse the existing sequences by just reordering them, since
+        // SAMSequenceDictionary *mutates* the sequence indices to match the input order. So we need
+        // to create the new sequence dictionary using entirely new sequence records, and let
+        // SAMSequenceDictionary assign them indices that match the input order.
+        final List<SAMSequenceRecord> reverseSequences = new ArrayList<>(forwardDictionary.getSequences());
+        Collections.reverse(reverseSequences);
+        final SAMSequenceDictionary reverseDictionary = new SAMSequenceDictionary();
+
+        int count = 0;
+        for (final SAMSequenceRecord samSequenceRecord : reverseSequences) {
+            final SAMSequenceRecord newSequenceRecord = new SAMSequenceRecord(
+                    samSequenceRecord.getSequenceName(),
+                    samSequenceRecord.getSequenceLength());
+            reverseDictionary.addSequence(newSequenceRecord);
+            Assert.assertEquals(newSequenceRecord.getSequenceIndex(), count);
+            count++;
+        }
+        return reverseDictionary;
+    }
+
+    private final VCFHeader createTestVCFHeader() {
+        return new VCFHeader(VCFHeader.makeHeaderVersionLineSet(VCFHeader.DEFAULT_VCF_VERSION));
+    }
+
+}
diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
index e4d5099eda..8ee9ccab26 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
@@ -29,7 +29,6 @@
 import htsjdk.samtools.SAMSequenceRecord;
 import htsjdk.samtools.util.CloseableIterator;
 import htsjdk.samtools.util.FileExtensions;
-import htsjdk.samtools.util.IOUtil;
 import htsjdk.samtools.util.TestUtil;
 import htsjdk.tribble.TribbleException;
 import htsjdk.tribble.readers.AsciiLineReader;
@@ -42,66 +41,64 @@
 import htsjdk.variant.variantcontext.writer.VariantContextWriter;
 import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
 import org.testng.Assert;
-import org.testng.annotations.AfterClass;
-import org.testng.annotations.BeforeClass;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
 import java.io.*;
-import java.math.BigInteger;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
 import java.util.*;
 import java.util.stream.Collectors;
 
-/**
- * Created by IntelliJ IDEA.
- * User: aaron
- * Date: Jun 30, 2010
- * Time: 3:32:08 PM
- * To change this template use File | Settings | File Templates.
- */
 public class VCFHeaderUnitTest extends VariantBaseTest {
 
-    private File tempDir;
-
-    private VCFHeader createHeader(String headerStr) {
-        VCFCodec codec = new VCFCodec();
-        VCFHeader header = (VCFHeader) codec.readActualHeader(new LineIteratorImpl(new SynchronousLineReader(
-                new StringReader(headerStr))));
-        Assert.assertEquals(header.getMetaDataInInputOrder().size(), VCF4headerStringCount);
-        return header;
-    }
-
-    @BeforeClass
-    private void createTemporaryDirectory() {
-        tempDir = TestUtil.getTempDirectory("VCFHeader", "VCFHeaderTest");
+    @DataProvider(name="headerRoundTrip")
+    private Object[][] getHeaderRoundTrip() {
+        return new Object[][] {
+                { VCFHeaderUnitTestData.getVCFV42TestHeaderString() },
+                { VCFHeaderUnitTestData.VCF42headerStrings_with_negativeOne }
+        };
     }
 
-    @AfterClass
-    private void deleteTemporaryDirectory() {
-        for (File f : tempDir.listFiles()) {
-            f.delete();
-        }
-        tempDir.delete();
+    @Test(dataProvider = "headerRoundTrip")
+    public void test42HeaderRoundTrip(final String headerString) throws IOException {
+        final VCFHeader header = VCFHeaderUnitTestData.createHeaderFromString(headerString);
+        Assert.assertEquals(header.getMetaDataInSortedOrder(), getRoundTripEncoded(header));
     }
 
     @Test
-    public void testVCF4ToVCF4() {
-        VCFHeader header = createHeader(VCF4headerStrings);
-        checkMD5ofHeaderFile(header, "91c33dadb92e01ea349bd4bcdd02d6be");
-    }
+    public void test42FileRoundtrip() throws Exception {
+        // this test validates that source/version fields are round-tripped properly
 
-    @Test
-    public void testVCF4ToVCF4_alternate() {
-        VCFHeader header = createHeader(VCF4headerStrings_with_negativeOne);
-        checkMD5ofHeaderFile(header, "39318d9713897d55be5ee32a2119853f");
+        // read an existing VCF
+        final File expectedFile = new File("src/test/resources/htsjdk/variant/Vcf4.2WithSourceVersionInfoFields.vcf");
+
+        // write the file out into a new copy
+        final File actualFile = File.createTempFile("testVcf4.2roundtrip.", FileExtensions.VCF);
+        actualFile.deleteOnExit();
+
+        try (final VCFFileReader originalFileReader = new VCFFileReader(expectedFile, false);
+             final VariantContextWriter copyWriter = new VariantContextWriterBuilder()
+                     .setOutputFile(actualFile)
+                     .setReferenceDictionary(createArtificialSequenceDictionary())
+                     .setOptions(EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER, Options.INDEX_ON_THE_FLY))
+                     .build()
+        ) {
+            final VCFHeader originalHeader = originalFileReader.getFileHeader();
+
+            copyWriter.writeHeader(originalHeader);
+            for (final VariantContext variantContext : originalFileReader) {
+                copyWriter.add(variantContext);
+            }
+        }
+
+        final String actualContents = new String(Files.readAllBytes(actualFile.toPath()), StandardCharsets.UTF_8);
+        final String expectedContents = new String(Files.readAllBytes(expectedFile.toPath()), StandardCharsets.UTF_8);
+        Assert.assertEquals(actualContents, expectedContents);
     }
 
     @Test
-    public void testVCFHeaderSampleRenamingSingleSampleVCF() throws Exception {
+    public void testSampleRenamingSingleSample() throws Exception {
         final VCFCodec codec = new VCFCodec();
         codec.setRemappedSampleName("FOOSAMPLE");
         final AsciiLineReaderIterator vcfIterator = new AsciiLineReaderIterator(AsciiLineReader.from(new FileInputStream(variantTestDataRoot + "HiSeq.10000.vcf")));
@@ -120,57 +117,25 @@ public void testVCFHeaderSampleRenamingSingleSampleVCF() throws Exception {
         }
     }
 
-    @DataProvider
-    public Object[][] testVCFHeaderDictionaryMergingData() {
+    @DataProvider(name="testSampleRenamingFailsTests")
+    public Object[][] testSampleRenamingFailsTests() {
         return new Object[][]{
-                {"diagnosis_targets_testfile.vcf"},  // numerically ordered contigs
-                {"dbsnp_135.b37.1000.vcf"}          // lexicographically ordered contigs
+                {variantTestDataRoot + "ex2.vcf"},                  // multi sample vcf
+                {variantTestDataRoot + "dbsnp_135.b37.1000.vcf"}    // sites only vcf
         };
     }
 
-    @Test(dataProvider = "testVCFHeaderDictionaryMergingData")
-    public void testVCFHeaderDictionaryMerging(final String vcfFileName) {
-        final VCFHeader headerOne = new VCFFileReader(new File(variantTestDataRoot + vcfFileName), false).getFileHeader();
-        final VCFHeader headerTwo = new VCFHeader(headerOne); // deep copy
-        final List<String> sampleList = new ArrayList<String>();
-        sampleList.addAll(headerOne.getSampleNamesInOrder());
-
-        // Check that the two dictionaries start out the same
-        headerOne.getSequenceDictionary().assertSameDictionary(headerTwo.getSequenceDictionary());
-
-        // Run the merge command
-        final VCFHeader mergedHeader = new VCFHeader(VCFUtils.smartMergeHeaders(Arrays.asList(headerOne, headerTwo), false), sampleList);
-
-        // Check that the mergedHeader's sequence dictionary matches the first two
-        mergedHeader.getSequenceDictionary().assertSameDictionary(headerOne.getSequenceDictionary());
-    }
-
-    @Test(expectedExceptions = TribbleException.class)
-    public void testVCFHeaderSampleRenamingMultiSampleVCF() throws Exception {
-        final VCFCodec codec = new VCFCodec();
-        codec.setRemappedSampleName("FOOSAMPLE");
-        final AsciiLineReaderIterator vcfIterator = new AsciiLineReaderIterator(AsciiLineReader.from(new FileInputStream(variantTestDataRoot + "ex2.vcf")));
-        final VCFHeader header = (VCFHeader) codec.readHeader(vcfIterator).getHeaderValue();
-    }
-
-    @Test(expectedExceptions = TribbleException.class)
-    public void testVCFHeaderSampleRenamingSitesOnlyVCF() throws Exception {
+    @Test(dataProvider = "testSampleRenamingFailsTests", expectedExceptions = TribbleException.class)
+    public void testSampleRenamingFails(final String fileName) throws IOException {
         final VCFCodec codec = new VCFCodec();
         codec.setRemappedSampleName("FOOSAMPLE");
-        final AsciiLineReaderIterator vcfIterator = new AsciiLineReaderIterator(AsciiLineReader.from(new FileInputStream(variantTestDataRoot + "dbsnp_135.b37.1000.vcf")));
-        final VCFHeader header = (VCFHeader) codec.readHeader(vcfIterator).getHeaderValue();
-    }
-
-    private VCFHeader getHiSeqVCFHeader() {
-        final File vcf = new File("src/test/resources/htsjdk/variant/HiSeq.10000.vcf");
-        final VCFFileReader reader = new VCFFileReader(vcf, false);
-        final VCFHeader header = reader.getFileHeader();
-        reader.close();
-        return header;
+        final AsciiLineReaderIterator vcfIterator = new AsciiLineReaderIterator(
+                AsciiLineReader.from(new FileInputStream(fileName)));
+        codec.readHeader(vcfIterator).getHeaderValue();
     }
 
     @Test
-    public void testVCFHeaderAddInfoLine() {
+    public void testAddInfoLine() {
         final VCFHeader header = getHiSeqVCFHeader();
         final VCFInfoHeaderLine infoLine = new VCFInfoHeaderLine("TestInfoLine", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "test info line");
         header.addMetaDataLine(infoLine);
@@ -185,13 +150,8 @@ public void testVCFHeaderAddInfoLine() {
         Assert.assertFalse(header.getOtherHeaderLines().contains(infoLine), "TestInfoLine present in other header lines");
     }
 
-    private static <T extends VCFHeaderLine> Collection<VCFHeaderLine> asCollectionOfVCFHeaderLine(Collection<T> headers) {
-        // create a collection of VCFHeaderLine so that contains tests work correctly
-        return headers.stream().map(h -> (VCFHeaderLine) h).collect(Collectors.toList());
-    }
-
     @Test
-    public void testVCFHeaderAddFormatLine() {
+    public void testAddFormatLine() {
         final VCFHeader header = getHiSeqVCFHeader();
         final VCFFormatHeaderLine formatLine = new VCFFormatHeaderLine("TestFormatLine", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "test format line");
         header.addMetaDataLine(formatLine);
@@ -207,11 +167,11 @@ public void testVCFHeaderAddFormatLine() {
     }
 
     @Test
-    public void testVCFHeaderAddFilterLine() {
+    public void testAddFilterLine() {
         final VCFHeader header = getHiSeqVCFHeader();
         final String filterDesc = "TestFilterLine Description";
-        final VCFFilterHeaderLine filterLine = new VCFFilterHeaderLine("TestFilterLine",filterDesc);
-        Assert.assertEquals(filterDesc,filterLine.getDescription());
+        final VCFFilterHeaderLine filterLine = new VCFFilterHeaderLine("TestFilterLine", filterDesc);
+        Assert.assertEquals(filterDesc, filterLine.getDescription());
         header.addMetaDataLine(filterLine);
 
         Assert.assertTrue(header.getFilterLines().contains(filterLine), "TestFilterLine not found in filter header lines");
@@ -225,10 +185,15 @@ public void testVCFHeaderAddFilterLine() {
     }
 
     @Test
-    public void testVCFHeaderAddContigLine() {
+    public void testAddContigLine() {
         final VCFHeader header = getHiSeqVCFHeader();
+        // no contig lines in this header
+        Assert.assertTrue(header.getContigLines().isEmpty());
+
         final VCFContigHeaderLine contigLine = new VCFContigHeaderLine(
-                "<ID=chr1,length=1234567890,assembly=FAKE,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\">", VCFHeaderVersion.VCF4_0, VCFHeader.CONTIG_KEY, 0);
+                "<ID=chr1,length=1234567890,assembly=FAKE,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\">", VCFHeaderVersion.VCF4_0, 0);
+        Assert.assertEquals(contigLine.getKey(), VCFHeader.CONTIG_KEY);
+        Assert.assertEquals(contigLine.getID(), "chr1");
         header.addMetaDataLine(contigLine);
 
         Assert.assertTrue(header.getContigLines().contains(contigLine), "Test contig line not found in contig header lines");
@@ -241,10 +206,70 @@ public void testVCFHeaderAddContigLine() {
     }
 
     @Test
-    public void testVCFHeaderContigLineMissingLength() {
+    public void testAddContigLineExactDuplicateSilentlyDropped() {
+        final File input = new File("src/test/resources/htsjdk/variant/ex2.vcf");
+
+        final VCFFileReader reader = new VCFFileReader(input, false);
+        final VCFHeader header = reader.getFileHeader();
+
+        final int numContigLinesBefore = header.getContigLines().size();
+        // try to read the first contig line
+        header.addMetaDataLine(header.getContigLines().get(0));
+        final int numContigLinesAfter = header.getContigLines().size();
+
+        // assert that we have the same number of contig lines before and after
+        Assert.assertEquals(numContigLinesBefore, numContigLinesAfter);
+    }
+
+    @Test
+    public void testAddContigLineWithDifferentAttributesSilentlyDropped() {
+        final VCFContigHeaderLine contigOneNoAssembly = new VCFContigHeaderLine(
+                new LinkedHashMap<String, String>() {{
+                    put("ID", "1");
+                    put("length", "123");
+                }},
+                0);
+        final VCFContigHeaderLine contigOneWithAssembly = new VCFContigHeaderLine(
+                new LinkedHashMap<String, String>() {{
+                    put("ID", "1");
+                    put("length", "123");
+                    put("assembly", "b37");
+                }},
+                1);
+        Assert.assertNotEquals(contigOneNoAssembly.hashCode(), contigOneWithAssembly.hashCode());
+
+        final Set<VCFHeaderLine> headerLineSet = VCFHeader.makeHeaderVersionLineSet(VCFHeader.DEFAULT_VCF_VERSION);
+        headerLineSet.add(contigOneNoAssembly);
+        headerLineSet.add(contigOneWithAssembly);
+        Assert.assertEquals(headerLineSet.size(), 3); // one fileformat line, plus 2 contig lines
+
+        // silently drops contigOneNoAssembly since it has the same ID AND contig index as contigOneWithAssembly
+        final VCFHeader vcfHeader = new VCFHeader(headerLineSet);
+        final Set<VCFHeaderLine> allMetaDataInput = vcfHeader.getMetaDataInInputOrder();
+        Assert.assertEquals(allMetaDataInput.size(), 2);
+        final Set<VCFHeaderLine> allMetaDataSorted = vcfHeader.getMetaDataInSortedOrder();
+        Assert.assertEquals(allMetaDataSorted.size(), 2);
+        final List<VCFContigHeaderLine> allContigLines = vcfHeader.getContigLines();
+        Assert.assertEquals(allContigLines.size(), 1);      // one contig
+        Assert.assertEquals(allContigLines.get(0).getGenericFieldValue("assembly"), "b37");
+    }
+
+    @Test(expectedExceptions = TribbleException.class)
+    public void testAddContigLineRejectDuplicateContigIndex() {
+        final VCFHeader header = new VCFHeader();
+        // add two contig lines that share an index, but have different IDs and represetn different contifs
+        final VCFContigHeaderLine contigLine1 = new VCFContigHeaderLine("<ID=chr1,length=10>", VCFHeaderVersion.VCF4_2, 0);
+        final VCFContigHeaderLine contigLine2 = new VCFContigHeaderLine("<ID=chr2,length=10>", VCFHeaderVersion.VCF4_2, 0);
+
+        header.addMetaDataLine(contigLine1);
+        header.addMetaDataLine(contigLine2);
+    }
+
+    @Test
+    public void testAddContigLineMissingLength() {
         final VCFHeader header = getHiSeqVCFHeader();
         final VCFContigHeaderLine contigLine = new VCFContigHeaderLine(
-                "<ID=chr1>", VCFHeaderVersion.VCF4_0, VCFHeader.CONTIG_KEY, 0);
+                "<ID=chr1>", VCFHeaderVersion.VCF4_0, 0);
         header.addMetaDataLine(contigLine);
         Assert.assertTrue(header.getContigLines().contains(contigLine), "Test contig line not found in contig header lines");
         Assert.assertTrue(header.getMetaDataInInputOrder().contains(contigLine), "Test contig line not found in set of all header lines");
@@ -252,58 +277,66 @@ public void testVCFHeaderContigLineMissingLength() {
         final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
         Assert.assertNotNull(sequenceDictionary);
         Assert.assertEquals(sequenceDictionary.getSequence("chr1").getSequenceLength(), SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH);
-
     }
 
-        @Test
-    public void testVCFHeaderHonorContigLineOrder() throws IOException {
+    @Test
+    public void testGetContigLinesHonorsSortOrder() {
+        // NOTE: this test file has *lexicographically* ordered contigs
         try (final VCFFileReader vcfReader = new VCFFileReader(new File(variantTestDataRoot + "dbsnp_135.b37.1000.vcf"), false)) {
             // start with a header with a bunch of contig lines
             final VCFHeader header = vcfReader.getFileHeader();
-            final List<VCFContigHeaderLine> originalHeaderList = header.getContigLines();
-            Assert.assertTrue(originalHeaderList.size() > 0);
-
-            // copy the contig lines to a new list, sticking an extra contig line in the middle
-            final List<VCFContigHeaderLine> orderedList = new ArrayList<>();
-            final int splitInTheMiddle = originalHeaderList.size() / 2;
-            orderedList.addAll(originalHeaderList.subList(0, splitInTheMiddle));
-            final VCFContigHeaderLine outrageousContigLine = new VCFContigHeaderLine(
-                    "<ID=outrageousID,length=1234567890,assembly=FAKE,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\">",
+            final List<VCFContigHeaderLine> originalContigsInSortedOrder = header.getContigLines();
+            Assert.assertTrue(originalContigsInSortedOrder.size() > 0);
+
+            // copy the contig lines to a new list
+            final List<VCFContigHeaderLine> confoundedList = new ArrayList<>();
+            final int midPoint = originalContigsInSortedOrder.size() / 2;
+            confoundedList.addAll(originalContigsInSortedOrder.subList(0, midPoint));
+
+            // deliberately stick an extra contig line in the middle of the list, but using a contig index
+            // that will cause the line to sort to the end
+            final String newContigID = "newContigID";
+            final int newContigIndex = originalContigsInSortedOrder.size();
+            final VCFContigHeaderLine newContigLine = new VCFContigHeaderLine(
+                    String.format(
+                    "<ID=%s,length=1234567890,assembly=FAKE,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\">", newContigID),
                     VCFHeaderVersion.VCF4_2,
-                    VCFHeader.CONTIG_KEY,
-                    0);
-            orderedList.add(outrageousContigLine);
-            // make sure the extra contig line is outrageous enough to not collide with a real contig ID
-            Assert.assertTrue(orderedList.contains(outrageousContigLine));
-            orderedList.addAll(originalHeaderList.subList(splitInTheMiddle, originalHeaderList.size()));
-            Assert.assertEquals(originalHeaderList.size() + 1, orderedList.size());
-
-            // crete a new header from the ordered list, and test that getContigLines honors the input order
-            final VCFHeader orderedHeader = new VCFHeader();
-            orderedList.forEach(hl -> orderedHeader.addMetaDataLine(hl));
-            Assert.assertEquals(orderedList, orderedHeader.getContigLines());
+                    newContigIndex);
+            confoundedList.add(newContigLine);
+            confoundedList.addAll(originalContigsInSortedOrder.subList(midPoint, originalContigsInSortedOrder.size()));
+
+            // make sure the new contig line was actually added
+            Assert.assertEquals(originalContigsInSortedOrder.size() + 1, confoundedList.size());
+            Assert.assertTrue(confoundedList.contains(newContigLine));
+
+            // create a new header from the confounded list, call getContigLines() on the header, and validate
+            // that the new line is included in the resulting list, and is at the end
+            final VCFHeader newHeader = new VCFHeader();
+            confoundedList.forEach(hl -> newHeader.addMetaDataLine(hl));
+            final List<VCFContigHeaderLine> roundTrippedLines = newHeader.getContigLines();
+            Assert.assertEquals(roundTrippedLines.size(), originalContigsInSortedOrder.size() + 1);
+            Assert.assertEquals(roundTrippedLines.get(roundTrippedLines.size() - 1), newContigLine);
+
+            // make sure the sequence dictionary has the contig with the correct contig index, and in
+            // the same relative location in the dictionary (at the end of the list)
+            final SAMSequenceDictionary orderedSeqDict = newHeader.getSequenceDictionary();
+            Assert.assertEquals(
+                    orderedSeqDict.getSequence(newContigID).getSequenceIndex(),
+                    roundTrippedLines.size() - 1);
+            Assert.assertEquals(
+                    orderedSeqDict.getSequences().get(newHeader.getContigLines().size() - 1).getSequenceName(),
+                    newContigID);
         }
     }
 
     @Test
-    public void testVCFSimpleHeaderLineGenericFieldGetter() {
-        VCFHeader header = createHeader(VCF4headerStrings);
-        List<VCFFilterHeaderLine> filters = header.getFilterLines();
-        VCFFilterHeaderLine filterHeaderLine = filters.get(0);
-        Map<String,String> genericFields = filterHeaderLine.getGenericFields();
-        Assert.assertEquals(genericFields.get("ID"),"NoQCALL");
-        Assert.assertEquals(genericFields.get("Description"),"Variant called by Dindel but not confirmed by QCALL");
-    }
-
-    @Test
-    public void testVCFHeaderAddOtherLine() {
+    public void testAddOtherLine() {
         final VCFHeader header = getHiSeqVCFHeader();
         final VCFHeaderLine otherLine = new VCFHeaderLine("TestOtherLine", "val");
         header.addMetaDataLine(otherLine);
 
         Assert.assertTrue(header.getOtherHeaderLines().contains(otherLine), "TestOtherLine not found in other header lines");
         Assert.assertTrue(header.getMetaDataInInputOrder().contains(otherLine), "TestOtherLine not found in set of all header lines");
-        Assert.assertNotNull(header.getOtherHeaderLine("TestOtherLine"), "Lookup for TestOtherLine by key failed");
 
         Assert.assertFalse(asCollectionOfVCFHeaderLine(header.getInfoHeaderLines()).contains(otherLine), "TestOtherLine present in info header lines");
         Assert.assertFalse(asCollectionOfVCFHeaderLine(header.getFormatHeaderLines()).contains(otherLine), "TestOtherLine present in format header lines");
@@ -312,15 +345,16 @@ public void testVCFHeaderAddOtherLine() {
     }
 
     @Test
-    public void testVCFHeaderAddMetaDataLineDoesNotDuplicateContigs() {
-        File input = new File("src/test/resources/htsjdk/variant/ex2.vcf");
+    public void testAddMetaDataLineDoesNotDuplicateContigs() {
+        final File input = new File("src/test/resources/htsjdk/variant/ex2.vcf");
 
-        VCFFileReader reader = new VCFFileReader(input, false);
-        VCFHeader header = reader.getFileHeader();
+        final VCFFileReader reader = new VCFFileReader(input, false);
+        final VCFHeader header = reader.getFileHeader();
 
         final int numContigLinesBefore = header.getContigLines().size();
 
-        VCFInfoHeaderLine newInfoField = new VCFInfoHeaderLine("test", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "test info field");
+        final VCFInfoHeaderLine newInfoField = new VCFInfoHeaderLine(
+                "test", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "test info field");
         header.addMetaDataLine(newInfoField);
 
         // getting the sequence dictionary was failing due to duplicating contigs in issue #214,
@@ -333,109 +367,280 @@ public void testVCFHeaderAddMetaDataLineDoesNotDuplicateContigs() {
     }
 
     @Test
-    public void testVCFHeaderAddDuplicateContigLine() {
-        File input = new File("src/test/resources/htsjdk/variant/ex2.vcf");
-
-        VCFFileReader reader = new VCFFileReader(input, false);
-        VCFHeader header = reader.getFileHeader();
-
-
-        final int numContigLinesBefore = header.getContigLines().size();
-        // try to readd the first contig line
-        header.addMetaDataLine(header.getContigLines().get(0));
-        final int numContigLinesAfter = header.getContigLines().size();
-
-        // assert that we have the same number of contig lines before and after
-        Assert.assertEquals(numContigLinesBefore, numContigLinesAfter);
-    }
-
-    @Test
-    public void testVCFHeaderAddDuplicateHeaderLine() {
-        File input = new File("src/test/resources/htsjdk/variant/ex2.vcf");
+    public void testAddDuplicateKeyValueHeaderLine() {
+        final File input = new File("src/test/resources/htsjdk/variant/ex2.vcf");
 
-        VCFFileReader reader = new VCFFileReader(input, false);
-        VCFHeader header = reader.getFileHeader();
+        final VCFFileReader reader = new VCFFileReader(input, false);
+        final VCFHeader header = reader.getFileHeader();
 
-        VCFHeaderLine newHeaderLine = new VCFHeaderLine("key", "value");
+        final VCFHeaderLine newHeaderLine = new VCFHeaderLine("key", "value");
         // add this new header line
         header.addMetaDataLine(newHeaderLine);
 
         final int numHeaderLinesBefore = header.getOtherHeaderLines().size();
-        // readd the same header line
+        // add the same header line again
         header.addMetaDataLine(newHeaderLine);
         final int numHeaderLinesAfter = header.getOtherHeaderLines().size();
 
-        // assert that we have the same number of other header lines before and after
+        // Note: we don't allow duplicate unstructured lines with the same key unless they have
+        // different content
+        // assert that we have the one more other header line after
         Assert.assertEquals(numHeaderLinesBefore, numHeaderLinesAfter);
     }
 
+    @Test
+    public void testSimpleHeaderLineGenericFieldGetter() {
+        final VCFHeader header = VCFHeaderUnitTestData.createHeaderFromString(VCFHeaderUnitTestData.getVCFV42TestHeaderString());
+        final List<VCFFilterHeaderLine> filters = header.getFilterLines();
+        final VCFFilterHeaderLine filterHeaderLine = filters.get(0);
+        final Map<String,String> genericFields = filterHeaderLine.getGenericFields();
+        Assert.assertEquals(genericFields.get("ID"),"NoQCALL");
+        Assert.assertEquals(genericFields.get("Description"),"Variant called by Dindel but not confirmed by QCALL");
+    }
+
+    @Test
+    public void testSerialization() throws Exception {
+        final VCFFileReader reader = new VCFFileReader(new File("src/test/resources/htsjdk/variant/HiSeq.10000.vcf"), false);
+        final VCFHeader originalHeader = reader.getFileHeader();
+        reader.close();
+
+        final VCFHeader deserializedHeader = TestUtil.serializeAndDeserialize(originalHeader);
+
+        Assert.assertEquals(deserializedHeader.getMetaDataInInputOrder(), originalHeader.getMetaDataInInputOrder(), "Header metadata does not match before/after serialization");
+        Assert.assertEquals(deserializedHeader.getContigLines(), originalHeader.getContigLines(), "Contig header lines do not match before/after serialization");
+        Assert.assertEquals(deserializedHeader.getFilterLines(), originalHeader.getFilterLines(), "Filter header lines do not match before/after serialization");
+        Assert.assertEquals(deserializedHeader.getFormatHeaderLines(), originalHeader.getFormatHeaderLines(), "Format header lines do not match before/after serialization");
+        Assert.assertEquals(deserializedHeader.getIDHeaderLines(), originalHeader.getIDHeaderLines(), "ID header lines do not match before/after serialization");
+        Assert.assertEquals(deserializedHeader.getInfoHeaderLines(), originalHeader.getInfoHeaderLines(), "Info header lines do not match before/after serialization");
+        Assert.assertEquals(deserializedHeader.getOtherHeaderLines(), originalHeader.getOtherHeaderLines(), "Other header lines do not match before/after serialization");
+        Assert.assertEquals(deserializedHeader.getGenotypeSamples(), originalHeader.getGenotypeSamples(), "Genotype samples not the same before/after serialization");
+        Assert.assertEquals(deserializedHeader.samplesWereAlreadySorted(), originalHeader.samplesWereAlreadySorted(), "Sortedness of samples not the same before/after serialization");
+        Assert.assertEquals(deserializedHeader.getSampleNamesInOrder(), originalHeader.getSampleNamesInOrder(), "Sorted list of sample names in header not the same before/after serialization");
+        Assert.assertEquals(deserializedHeader.getSampleNameToOffset(), originalHeader.getSampleNameToOffset(), "Sample name to offset map not the same before/after serialization");
+        Assert.assertEquals(deserializedHeader.toString(), originalHeader.toString(), "String representation of header not the same before/after serialization");
+    }
+
     @DataProvider(name="validHeaderVersionTransitions")
     public Object[][] validHeaderVersionTransitions() {
-        // v4.3 can never transition, all other version transitions are allowed
+        // all (forward) version transitions are allowed
         return new Object[][] {
+                {VCFHeaderVersion.VCF3_2, VCFHeaderVersion.VCF3_3},
+                {VCFHeaderVersion.VCF3_2, VCFHeaderVersion.VCF4_0},
+                {VCFHeaderVersion.VCF3_2, VCFHeaderVersion.VCF4_1},
+                {VCFHeaderVersion.VCF3_2, VCFHeaderVersion.VCF4_2},
+                {VCFHeaderVersion.VCF3_2, VCFHeaderVersion.VCF4_3},
+
+                {VCFHeaderVersion.VCF3_3, VCFHeaderVersion.VCF4_0},
+                {VCFHeaderVersion.VCF3_3, VCFHeaderVersion.VCF4_1},
+                {VCFHeaderVersion.VCF3_3, VCFHeaderVersion.VCF4_2},
+                {VCFHeaderVersion.VCF3_3, VCFHeaderVersion.VCF4_3},
+
                 {VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF4_0},
                 {VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF4_1},
                 {VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF4_2},
+                {VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF4_3},
+
                 {VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF4_1},
                 {VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF4_2},
+                {VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF4_3},
+
                 {VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_2},
+                {VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_3},
+
                 {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_3}
         };
     }
 
     @DataProvider(name="invalidHeaderVersionTransitions")
     public Object[][] invalidHeaderVersionTransitions() {
-        // v4.3 can never transition with, all other version transitions are allowed
         return new Object[][] {
-                {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_0},
-                {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_1},
+                //reject any attempt to go backwards in time
                 {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_2},
-                {VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF4_3},
-                {VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF4_3},
-                {VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_3},
+                {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_1},
+                {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_0},
+                {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF3_3},
+                {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF3_2},
+
+                {VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_1},
+                {VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_0},
+                {VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF3_3},
+                {VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF3_2},
+
+                {VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF4_0},
+                {VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF3_3},
+                {VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF3_2},
+
+                {VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF3_3},
+                {VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF3_2},
+
+                {VCFHeaderVersion.VCF3_3, VCFHeaderVersion.VCF3_2},
         };
     }
 
     @Test(dataProvider="validHeaderVersionTransitions")
-    public void testValidHeaderVersionTransition(final VCFHeaderVersion fromVersion, final VCFHeaderVersion toVersion) {
-        doHeaderTransition(fromVersion, toVersion);
+    public void testAddVersionLineValidTransition(final VCFHeaderVersion fromVersion, final VCFHeaderVersion toVersion) {
+        final VCFHeader vcfHeader = new VCFHeader(VCFHeader.makeHeaderVersionLineSet(fromVersion), Collections.emptySet());
+        vcfHeader.addMetaDataLine(VCFHeader.makeHeaderVersionLine(toVersion));
+        Assert.assertEquals(vcfHeader.getVCFHeaderVersion(), toVersion);
     }
 
     @Test(dataProvider="invalidHeaderVersionTransitions", expectedExceptions = TribbleException.class)
-    public void testInvalidHeaderVersionTransition(final VCFHeaderVersion fromVersion, final VCFHeaderVersion toVersion) {
-        doHeaderTransition(fromVersion, toVersion);
+    public void testAddVersionInvalidTransition(final VCFHeaderVersion fromVersion, final VCFHeaderVersion toVersion) {
+        new VCFHeader(VCFHeader.makeHeaderVersionLineSet(fromVersion), Collections.emptySet())
+                .addMetaDataLine(VCFHeader.makeHeaderVersionLine(toVersion));
+    }
+
+    @DataProvider(name = "vcfVersions")
+    public Object[][] vcfVersions() {
+        return new Object[][]{
+                {VCFHeaderVersion.VCF3_2},
+                {VCFHeaderVersion.VCF3_3},
+                {VCFHeaderVersion.VCF4_0},
+                {VCFHeaderVersion.VCF4_1},
+                {VCFHeaderVersion.VCF4_2},
+                {VCFHeaderVersion.VCF4_3}
+        };
+    }
+
+    @Test(expectedExceptions = TribbleException.class)
+    public void testVersionUpgradeWithValidationFailure() {
+        // test mixing header versions where the old version header has a line that fails validation
+        // using the resulting (newer) version
+
+        // create a 4.2 header with a 4.2 style pedigree line (one that has no ID)
+        final VCFHeader vcfHeader = new VCFHeader(VCFHeader.makeHeaderVersionLineSet(VCFHeaderVersion.VCF4_2));
+        vcfHeader.addMetaDataLine(new VCFHeaderLine(VCFConstants.PEDIGREE_HEADER_KEY, "<Name_0=G0-ID,Name_1=G1-ID>"));
+
+        // now try to force a version upgrade to 4.3, old style pedigree line should cause a failure
+        vcfHeader.addMetaDataLine(VCFHeader.makeHeaderVersionLine(VCFHeaderVersion.VCF4_3));
+    }
+
+    @Test(expectedExceptions = TribbleException.class)
+    public void testAddLineWithValidationFailure() {
+        // create a 4.3 header, and then try to add an old-style pedigree line (one that has no ID)
+        // which should cause a failure
+        final VCFHeader vcfHeader = new VCFHeader(VCFHeader.makeHeaderVersionLineSet(VCFHeaderVersion.VCF4_3));
+        vcfHeader.addMetaDataLine(new VCFHeaderLine(VCFConstants.PEDIGREE_HEADER_KEY, "<Name_0=G0-ID,Name_1=G1-ID>"));
+    }
+
+
+    @Test(expectedExceptions = TribbleException.class)
+    public void testConstructorRequiresFileFormatLine() {
+        final Set<VCFHeaderLine> metaDataSet = VCFHeaderUnitTestData.getV42HeaderLinesWITHOUTFormatString(); // 4.2 header is compatible with all 4.x versions
+        // create a new header from this set (containing no fileformat line), no requested version in constructor
+        new VCFHeader(metaDataSet, Collections.emptySet()); //defaults to v4.2
     }
 
-    private void doHeaderTransition(final VCFHeaderVersion fromVersion, final VCFHeaderVersion toVersion) {
-        final VCFHeader vcfHeader =
-                fromVersion == null ?
-                        new VCFHeader() :
-                        new VCFHeader(fromVersion, Collections.EMPTY_SET, Collections.EMPTY_SET);
-        vcfHeader.setVCFHeaderVersion(toVersion);
+    @Test(dataProvider = "vcfVersions")
+    public void testConstructorWithSingleFileFormatLine(final VCFHeaderVersion vcfVersion) {
+        final Set<VCFHeaderLine> metaDataSet = VCFHeaderUnitTestData.getV42HeaderLinesWITHOUTFormatString(); // 4.2 header is compatible with all 4.x versions
+
+        // add in the corresponding fileformat line; create a new versioned header
+        // since the version requested in the constructor and the format lines are in sync, there is
+        // no conflict, and the resulting header's version should always match the requested version
+        metaDataSet.add(VCFHeader.makeHeaderVersionLine(vcfVersion));
+        final VCFHeader vcfHeader = new VCFHeader(metaDataSet, Collections.emptySet());
+        Assert.assertEquals(vcfHeader.getVCFHeaderVersion(), vcfVersion);
     }
 
     @Test
-    public void testVCFHeaderSerialization() throws Exception {
-        final VCFFileReader reader = new VCFFileReader(new File("src/test/resources/htsjdk/variant/HiSeq.10000.vcf"), false);
-        final VCFHeader originalHeader = reader.getFileHeader();
-        reader.close();
+    public void testConstructorWithMultipleFileFormatLines() {
+        final Set<VCFHeaderLine> metaDataSet = VCFHeaderUnitTestData.getV42HeaderLinesWITHOUTFormatString(); // this (4.2) header is compatible with all 4.x versions
+        final int beforeSize = metaDataSet.size();
 
-        final VCFHeader deserializedHeader = TestUtil.serializeAndDeserialize(originalHeader);
+        // multiple version lines will be ignored, with only the last one retained
+        metaDataSet.add(VCFHeader.makeHeaderVersionLine(VCFHeaderVersion.VCF4_2));
+        metaDataSet.add(VCFHeader.makeHeaderVersionLine(VCFHeaderVersion.VCF4_1));
+        Assert.assertEquals(metaDataSet.size(), beforeSize + 2);
 
-        Assert.assertEquals(deserializedHeader.getMetaDataInInputOrder(), originalHeader.getMetaDataInInputOrder(), "Header metadata does not match before/after serialization");
-        Assert.assertEquals(deserializedHeader.getContigLines(), originalHeader.getContigLines(), "Contig header lines do not match before/after serialization");
-        Assert.assertEquals(deserializedHeader.getFilterLines(), originalHeader.getFilterLines(), "Filter header lines do not match before/after serialization");
-        Assert.assertEquals(deserializedHeader.getFormatHeaderLines(), originalHeader.getFormatHeaderLines(), "Format header lines do not match before/after serialization");
-        Assert.assertEquals(deserializedHeader.getIDHeaderLines(), originalHeader.getIDHeaderLines(), "ID header lines do not match before/after serialization");
-        Assert.assertEquals(deserializedHeader.getInfoHeaderLines(), originalHeader.getInfoHeaderLines(), "Info header lines do not match before/after serialization");
-        Assert.assertEquals(deserializedHeader.getOtherHeaderLines(), originalHeader.getOtherHeaderLines(), "Other header lines do not match before/after serialization");
-        Assert.assertEquals(deserializedHeader.getGenotypeSamples(), originalHeader.getGenotypeSamples(), "Genotype samples not the same before/after serialization");
-        Assert.assertEquals(deserializedHeader.samplesWereAlreadySorted(), originalHeader.samplesWereAlreadySorted(), "Sortedness of samples not the same before/after serialization");
-        Assert.assertEquals(deserializedHeader.getSampleNamesInOrder(), originalHeader.getSampleNamesInOrder(), "Sorted list of sample names in header not the same before/after serialization");
-        Assert.assertEquals(deserializedHeader.getSampleNameToOffset(), originalHeader.getSampleNameToOffset(), "Sample name to offset map not the same before/after serialization");
-        Assert.assertEquals(deserializedHeader.toString(), originalHeader.toString(), "String representation of header not the same before/after serialization");
+        // create a new versioned header from this set
+        final VCFHeader vcfHeader = new VCFHeader(metaDataSet, Collections.emptySet());
+        Assert.assertEquals(vcfHeader.getVCFHeaderVersion(), VCFHeaderVersion.VCF4_1);
+    }
+
+    @Test(expectedExceptions = TribbleException.VersionValidationFailure.class)
+    public void testConstructorWithInvalidLineForVersion() {
+        final Set<VCFHeaderLine> metaDataSet = VCFHeaderUnitTestData.getV42HeaderLinesWITHOUTFormatString(); // this (4.2) header is compatible with all 4.x versions
+        metaDataSet.add(VCFHeader.makeHeaderVersionLine(VCFHeaderVersion.VCF4_2));
+        final Map<String, String> attributes = new LinkedHashMap<>();
+        attributes.put("ID", "id");
+        metaDataSet.add(new VCFPedigreeHeaderLine(attributes));
+        new VCFHeader(metaDataSet, Collections.emptySet());
+    }
+
+    @Test(expectedExceptions = TribbleException.VersionValidationFailure.class)
+    public void testAddMetaDataLineInvalidForVersion() {
+        final Set<VCFHeaderLine> metaDataSet = VCFHeaderUnitTestData.getV42HeaderLinesWITHOUTFormatString(); // this (4.2) header is compatible with all 4.x versions
+        metaDataSet.add(VCFHeader.makeHeaderVersionLine(VCFHeaderVersion.VCF4_2));
+        final VCFHeader header = new VCFHeader(metaDataSet, Collections.emptySet());
+        Assert.assertEquals(header.getVCFHeaderVersion(), VCFHeaderVersion.VCF4_2);
+        final Map<String, String> attributes = new LinkedHashMap<>();
+        attributes.put("ID", "id");
+        header.addMetaDataLine(new VCFPedigreeHeaderLine(attributes));
+    }
+
+    @Test(expectedExceptions = TribbleException.class)
+    public void testAddMetaDataLineFileFormat() {
+        final Set<VCFHeaderLine> metaDataSet = VCFHeaderUnitTestData.getV42HeaderLinesWITHOUTFormatString(); // this (4.2) header is compatible with all 4.x versions
+        final int beforeSize = metaDataSet.size();
+
+        metaDataSet.add(VCFHeader.makeHeaderVersionLine(VCFHeaderVersion.VCF4_1));
+        Assert.assertEquals(metaDataSet.size(), beforeSize + 1);
+
+        // create a new versioned header from this set
+        final VCFHeader vcfHeader = new VCFHeader(metaDataSet, Collections.emptySet());
+        Assert.assertEquals(vcfHeader.getVCFHeaderVersion(), VCFHeaderVersion.VCF4_1);
+
+        // add a new line that uses the same header version already established
+        vcfHeader.addMetaDataLine(VCFHeader.makeHeaderVersionLine(VCFHeaderVersion.VCF4_1));
+
+        // add a new line that tries to move the version forward
+        vcfHeader.addMetaDataLine(VCFHeader.makeHeaderVersionLine(VCFHeaderVersion.VCF4_2));
+
+        // now try to go backwards (throws)
+        vcfHeader.addMetaDataLine(VCFHeader.makeHeaderVersionLine(VCFHeaderVersion.VCF4_1));
+    }
+
+    @Test
+    public void testPreserveSequenceDictionaryAttributes() {
+        // Round trip a SAMSequenceDictionary with attributes, through a VCFHeader, and back
+        // to a SAMSequenceDictionary with the same attributes.
+        // https://github.com/samtools/htsjdk/issues/730
+
+        final String assemblyString = "hg37";
+        final String md5String = "68b329da9893e34099c7d8ad5cb9c940";
+        final String speciesString = "Home Sapiens";
+        final String urlString = "http://www.refserve.org:8080/path/";
+
+        final SAMSequenceDictionary samDict = new SAMSequenceDictionary();
+
+        final SAMSequenceRecord seqRec1 = new SAMSequenceRecord("1", 1);
+        seqRec1.setAssembly(assemblyString);
+        seqRec1.setMd5(md5String);
+        seqRec1.setAttribute(SAMSequenceRecord.URI_TAG, urlString);
+        seqRec1.setSpecies(speciesString);
+        final SAMSequenceRecord seqRec2 = new SAMSequenceRecord("2", 1);
+        samDict.addSequence(seqRec1);
+        samDict.addSequence(seqRec2);
+
+        final VCFHeader vcfHeader = new VCFHeader();
+        vcfHeader.setSequenceDictionary(samDict);
+        final SAMSequenceDictionary roundTrippedDict = vcfHeader.getSequenceDictionary();
+
+        final SAMSequenceRecord rtRec1 = roundTrippedDict.getSequence("1");
+        Assert.assertEquals(assemblyString, rtRec1.getAssembly());
+        Assert.assertEquals(md5String, rtRec1.getMd5());
+        Assert.assertEquals(urlString, rtRec1.getAttribute(SAMSequenceRecord.URI_TAG));
+        Assert.assertEquals(speciesString, rtRec1.getSpecies());
+
+        Assert.assertEquals(seqRec1, roundTrippedDict.getSequence("1")); // somewhat redundant check on full record
+        Assert.assertEquals(seqRec2, roundTrippedDict.getSequence("2"));
     }
 
+    /////////////////////////////////////////////////////////////////
+    ////////////////************************* End new tests block...
+    /////////////////////////////////////////////////////////////////
+
     @Test
     public void testVCFHeaderQuoteEscaping() throws Exception {
         // this test ensures that the end-to-end process of quote escaping is stable when headers are
@@ -449,10 +654,9 @@ public void testVCFHeaderQuoteEscaping() throws Exception {
         final VCFHeader originalHeader = originalFileReader.getFileHeader();
 
         // add a header line with quotes to the header
-        final Map<String, String> attributes = new LinkedHashMap<>();
-        attributes.put("ID", "VariantFiltration");
-        attributes.put("CommandLineOptions", "filterName=[ANNOTATION] filterExpression=[ANNOTATION == \"NA\" || ANNOTATION <= 2.0]");
-        final VCFSimpleHeaderLine addedHeaderLine = new VCFSimpleHeaderLine("GATKCommandLine.Test", attributes);
+        final VCFSimpleHeaderLine addedHeaderLine = new VCFFilterHeaderLine(
+                "FakeFilter",
+                "filterName=[ANNOTATION] filterExpression=[ANNOTATION == \"NA\" || ANNOTATION <= 2.0]");
         originalHeader.addMetaDataLine(addedHeaderLine);
 
         final VCFFilterHeaderLine originalCopyAnnotationLine1 = originalHeader.getFilterHeaderLine("ANNOTATION");
@@ -487,7 +691,7 @@ public void testVCFHeaderQuoteEscaping() throws Exception {
         firstCopyWriter.writeHeader(originalHeader);
         final CloseableIterator<VariantContext> firstCopyVariantIterator = originalFileReader.iterator();
         while (firstCopyVariantIterator.hasNext()) {
-            VariantContext variantContext = firstCopyVariantIterator.next();
+            final VariantContext variantContext = firstCopyVariantIterator.next();
             firstCopyWriter.add(variantContext);
         }
         originalFileReader.close();
@@ -496,7 +700,7 @@ public void testVCFHeaderQuoteEscaping() throws Exception {
         // read the copied file back in
         final VCFFileReader firstCopyReader = new VCFFileReader(firstCopyVCFFile, false);
         final VCFHeader firstCopyHeader = firstCopyReader.getFileHeader();
-        final VCFHeaderLine firstCopyNewHeaderLine = firstCopyHeader.getOtherHeaderLine("GATKCommandLine.Test");
+        final VCFFilterHeaderLine firstCopyNewHeaderLine = firstCopyHeader.getFilterHeaderLine("FakeFilter");
         Assert.assertNotNull(firstCopyNewHeaderLine);
 
         final VCFFilterHeaderLine firstCopyAnnotationLine1 = firstCopyHeader.getFilterHeaderLine("ANNOTATION");
@@ -530,7 +734,7 @@ public void testVCFHeaderQuoteEscaping() throws Exception {
         secondCopyWriter.writeHeader(firstCopyHeader);
         final CloseableIterator<VariantContext> secondCopyVariantIterator = firstCopyReader.iterator();
         while (secondCopyVariantIterator.hasNext()) {
-            VariantContext variantContext = secondCopyVariantIterator.next();
+            final VariantContext variantContext = secondCopyVariantIterator.next();
             secondCopyWriter.add(variantContext);
         }
         secondCopyWriter.close();
@@ -539,7 +743,7 @@ public void testVCFHeaderQuoteEscaping() throws Exception {
         final VCFFileReader secondCopyReader = new VCFFileReader(secondCopyVCFFile, false);
         final VCFHeader secondCopyHeader = secondCopyReader.getFileHeader();
 
-        final VCFHeaderLine secondCopyNewHeaderLine = secondCopyHeader.getOtherHeaderLine("GATKCommandLine.Test");
+        final VCFFilterHeaderLine secondCopyNewHeaderLine = secondCopyHeader.getFilterHeaderLine("FakeFilter");
         Assert.assertNotNull(secondCopyNewHeaderLine);
 
         final VCFFilterHeaderLine secondCopyAnnotationLine1 = secondCopyHeader.getFilterHeaderLine("ANNOTATION");
@@ -549,8 +753,8 @@ public void testVCFHeaderQuoteEscaping() throws Exception {
         Assert.assertNotNull(secondCopyAnnotationLine2);
 
         Assert.assertEquals(firstCopyNewHeaderLine, secondCopyNewHeaderLine);
-        Assert.assertEquals(firstCopyNewHeaderLine.toStringEncoding(), "GATKCommandLine.Test=<ID=VariantFiltration,CommandLineOptions=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\">");
-        Assert.assertEquals(secondCopyNewHeaderLine.toStringEncoding(), "GATKCommandLine.Test=<ID=VariantFiltration,CommandLineOptions=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\">");
+        Assert.assertEquals(firstCopyNewHeaderLine.toStringEncoding(), "FILTER=<ID=FakeFilter,Description=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\">");
+        Assert.assertEquals(secondCopyNewHeaderLine.toStringEncoding(), "FILTER=<ID=FakeFilter,Description=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\">");
 
         Assert.assertEquals(firstCopyAnnotationLine1, secondCopyAnnotationLine1);
         Assert.assertEquals(secondCopyAnnotationLine1.getGenericFieldValue("Description"), "ANNOTATION != \"NA\" || ANNOTATION <= 0.01");
@@ -574,136 +778,153 @@ public void testVCFHeaderQuoteEscaping() throws Exception {
 
     }
 
-    @Test
-    public void testVcf42Roundtrip() throws Exception {
-        // this test ensures that source/version fields are round-tripped properly
+    /////////////////////////////////////////////////////////////////////
+    // Private helper methods
+    /////////////////////////////////////////////////////////////////////
 
-        // read an existing VCF
-        File expectedFile = new File("src/test/resources/htsjdk/variant/Vcf4.2WithSourceVersionInfoFields.vcf");
+    // Serialize/encode the header to a file, read metaData back in
+    private Set<VCFHeaderLine> getRoundTripEncoded(final VCFHeader header) throws IOException {
+        final File myTempFile = File.createTempFile("VCFHeader", "vcf");
+        try (final VariantContextWriter vcfWriter =
+                     new VariantContextWriterBuilder()
+                             .setOutputFile(myTempFile)
+                             .setOutputFileType(VariantContextWriterBuilder.OutputType.VCF)
+                             .setOptions(VariantContextWriterBuilder.NO_OPTIONS)
+                             .build()) {
+            vcfWriter.writeHeader(header);
+        }
+        final VCFHeader vcfHeader = (VCFHeader) new VCFCodec().readActualHeader(new LineIteratorImpl(
+                new SynchronousLineReader(new FileReader(myTempFile.getAbsolutePath()))));
+        return vcfHeader.getMetaDataInSortedOrder();
+    }
 
-        // write the file out into a new copy
-        final File actualFile = File.createTempFile("testVcf4.2roundtrip.", FileExtensions.VCF);
-        actualFile.deleteOnExit();
 
-        try (final VCFFileReader originalFileReader = new VCFFileReader(expectedFile, false);
-             final VariantContextWriter copyWriter = new VariantContextWriterBuilder()
-                     .setOutputFile(actualFile)
-                     .setReferenceDictionary(createArtificialSequenceDictionary())
-                     .setOptions(EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER, Options.INDEX_ON_THE_FLY))
-                     .build()
-        ) {
-            final VCFHeader originalHeader = originalFileReader.getFileHeader();
-            
-            copyWriter.writeHeader(originalHeader);
-            for (final VariantContext variantContext : originalFileReader) {
-                copyWriter.add(variantContext);
-            }
-        }
+    private VCFHeader getHiSeqVCFHeader() {
+        final File vcf = new File("src/test/resources/htsjdk/variant/HiSeq.10000.vcf");
+        final VCFFileReader reader = new VCFFileReader(vcf, false);
+        final VCFHeader header = reader.getFileHeader();
+        reader.close();
+        return header;
+    }
 
-        final String actualContents = new String(Files.readAllBytes(actualFile.toPath()), StandardCharsets.UTF_8);
-        final String expectedContents = new String(Files.readAllBytes(expectedFile.toPath()), StandardCharsets.UTF_8);
-        Assert.assertEquals(actualContents, expectedContents);
+    private static <T extends VCFHeaderLine> Collection<VCFHeaderLine> asCollectionOfVCFHeaderLine(final Collection<T> headers) {
+        // create a collection of VCFHeaderLine so that contains tests work correctly
+        return headers.stream().map(h -> (VCFHeaderLine) h).collect(Collectors.toList());
     }
 
+    @DataProvider(name="duplicateHeaderLineCases")
+    private Object[][] getDuplicateHeaderLineCases() {
+        return new Object[][] {
 
-    /**
-     * a little utility function for all tests to md5sum a file
-     * Shameless taken from:
-     * <p/>
-     * http://www.javalobby.org/java/forums/t84420.html
-     *
-     * @param file the file
-     * @return a string
-     */
-    private static String md5SumFile(File file) {
-        MessageDigest digest;
-        try {
-            digest = MessageDigest.getInstance("MD5");
-        } catch (NoSuchAlgorithmException e) {
-            throw new RuntimeException("Unable to find MD5 digest");
-        }
-        InputStream is;
-        try {
-            is = new FileInputStream(file);
-        } catch (FileNotFoundException e) {
-            throw new RuntimeException("Unable to open file " + file);
-        }
-        byte[] buffer = new byte[8192];
-        int read;
-        try {
-            while ((read = is.read(buffer)) > 0) {
-                digest.update(buffer, 0, read);
-            }
-            byte[] md5sum = digest.digest();
-            BigInteger bigInt = new BigInteger(1, md5sum);
-            return bigInt.toString(16);
-
-        } catch (IOException e) {
-            throw new RuntimeException("Unable to process file for MD5", e);
-        } finally {
-            try {
-                is.close();
-            } catch (IOException e) {
-                throw new RuntimeException("Unable to close input stream for MD5 calculation", e);
-            }
-        }
+                // these tests use VCFAltHeaderLine to test structured/ID lines, but the behavior should be the same
+                // for any header ID line
+
+                // duplicate IDs, duplicate description; line is dropped due to duplicate ID
+                { new VCFAltHeaderLine("X", "description1"),
+                        new VCFAltHeaderLine("X", "description1"), false },
+                // duplicate IDs, different descriptions;  line is dropped due to duplicate ID
+                { new VCFAltHeaderLine("X", "description1"),
+                        new VCFAltHeaderLine("X", "description2"), false },
+                // different IDs, different descriptions;  line is retained
+                { new VCFAltHeaderLine("X", "description1"),
+                        new VCFAltHeaderLine("Y", "description2"), true },
+                // different IDs, duplicate descriptions;  line is retained
+                { new VCFAltHeaderLine("X", "description"),
+                        new VCFAltHeaderLine("Y", "description"), true },
+
+                // .......unstructured header lines........
+
+                // duplicate key, duplicate value, line is dropped
+                { new VCFHeaderLine("CommandLine", "command"), new VCFHeaderLine("CommandLine", "command"), false },
+                // duplicate key, different value, line is retained
+                { new VCFHeaderLine("CommandLine", "command1"), new VCFHeaderLine("CommandLine", "command2"), true },
+
+                ///////////////////////////////////////////////////////////////////////////////////////////
+                // since the VCFHeaderLine constructor is public, it can be used erroneously to model header
+                // lines that have structured syntax, but which will not obey structured header line rules,
+                // since those are enabled via VCFSimpleHeaderLine, and VCFHeaderLine is intended to be used
+                // for non-structured lines. so include some tests that simulate this
+
+                // duplicate key, duplicate value (...duplicate ID), line is dropped
+                { new VCFHeaderLine("KEY", "<ID=ID1>"), new VCFHeaderLine("KEY", "<ID=ID1>"), false },
+                // duplicate key, different value (different ID), line is retained
+                { new VCFHeaderLine("KEY", "<ID=ID1>"), new VCFHeaderLine("KEY", "<ID=ID2>"), true },
+
+                //NOTE: this case illustrates how its possible to use the API to cause two structured lines
+                // with duplicate IDs to be retained if they are not modeled as VCFStructuredHeaderLines
+                // duplicate key, different value (but IDENTICAL ID), line is RETAINED
+                { new VCFHeaderLine("KEY", "<ID=ID1>"), new VCFHeaderLine("KEY", "<ID=ID1,ATTRIBUTE=23>"), true },
+
+                // different key, duplicate value, line is retained
+                { new VCFHeaderLine("KEY1", "<ID=ID1>"), new VCFHeaderLine("KEY2", "<ID=ID1>"), true },
+                // different key, different value, line is retained
+                { new VCFHeaderLine("KEY1", "<ID=ID1>"), new VCFHeaderLine("KEY2", "<ID=ID2>"), true },
+        };
     }
 
-    private void checkMD5ofHeaderFile(VCFHeader header, String md5sum) {
-        File myTempFile = null;
-        PrintWriter pw = null;
-        try {
-            myTempFile = File.createTempFile("VCFHeader", "vcf");
-            myTempFile.deleteOnExit();
-            pw = new PrintWriter(myTempFile);
-        } catch (IOException e) {
-            Assert.fail("Unable to make a temp file!");
-        }
-        for (VCFHeaderLine line : header.getMetaDataInSortedOrder())
-            pw.println(line);
-        pw.close();
-        Assert.assertEquals(md5SumFile(myTempFile), md5sum);
-    }
-
-    public static final int VCF4headerStringCount = 16;
-
-    public static final String VCF4headerStrings =
-            "##fileformat=VCFv4.2\n" +
-                    "##filedate=2010-06-21\n" +
-                    "##reference=NCBI36\n" +
-                    "##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">\n" +
-                    "##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">\n" +
-                    "##INFO=<ID=AF, Number=A, Type=Float, Description=\"Dindel estimated population allele frequency\">\n" +
-                    "##INFO=<ID=CA, Number=1, Type=String, Description=\"Pilot 1 callability mask\">\n" +
-                    "##INFO=<ID=HP, Number=1, Type=Integer, Description=\"Reference homopolymer tract length\">\n" +
-                    "##INFO=<ID=NS, Number=1, Type=Integer, Description=\"Number of samples with data\">\n" +
-                    "##INFO=<ID=DB, Number=0, Type=Flag, Description=\"dbSNP membership build 129 - type match and indel sequence length match within 25 bp\">\n" +
-                    "##INFO=<ID=NR, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on reverse strand\">\n" +
-                    "##INFO=<ID=NF, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on forward strand\">\n" +
-                    "##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">\n" +
-                    "##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">\n" +
-                    "##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">\n" +
-                    "##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">\n" +
-                    "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n";
-
-
-    public static final String VCF4headerStrings_with_negativeOne =
-            "##fileformat=VCFv4.2\n" +
-                    "##filedate=2010-06-21\n" +
-                    "##reference=NCBI36\n" +
-                    "##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">\n" +
-                    "##INFO=<ID=YY, Number=., Type=Integer, Description=\"Some weird value that has lots of parameters\">\n" +
-                    "##INFO=<ID=AF, Number=A, Type=Float, Description=\"Dindel estimated population allele frequency\">\n" +
-                    "##INFO=<ID=CA, Number=1, Type=String, Description=\"Pilot 1 callability mask\">\n" +
-                    "##INFO=<ID=HP, Number=1, Type=Integer, Description=\"Reference homopolymer tract length\">\n" +
-                    "##INFO=<ID=NS, Number=1, Type=Integer, Description=\"Number of samples with data\">\n" +
-                    "##INFO=<ID=DB, Number=0, Type=Flag, Description=\"dbSNP membership build 129 - type match and indel sequence length match within 25 bp\">\n" +
-                    "##INFO=<ID=NR, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on reverse strand\">\n" +
-                    "##INFO=<ID=NF, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on forward strand\">\n" +
-                    "##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">\n" +
-                    "##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">\n" +
-                    "##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">\n" +
-                    "##FORMAT=<ID=TT, Number=., Type=Integer, Description=\"Lots of TTs\">\n" +
-                    "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n";
+    @Test(dataProvider = "duplicateHeaderLineCases")
+    private void testDuplicateHeaderLine(final VCFHeaderLine hl1, final VCFHeaderLine hl2, final boolean expectHL2Retained) {
+        final Set<VCFHeaderLine> lineSet = VCFHeader.makeHeaderVersionLineSet(VCFHeaderVersion.VCF4_2);
+        lineSet.add(hl1);
+        lineSet.add(hl2);
+        final VCFHeader vcfHeader = new VCFHeader(lineSet);
+
+        Assert.assertEquals(vcfHeader.getMetaDataInInputOrder().size(), expectHL2Retained ? 3 : 2);
+    }
+
+    @Test
+    public void testAddOtherHeaderLineUnique() {
+        final String TEST_KEY = "testKey";
+        final VCFHeader vcfHeader = new VCFHeader(VCFHeader.makeHeaderVersionLineSet(VCFHeader.DEFAULT_VCF_VERSION));
+        final VCFHeaderLine otherLine1 = new VCFHeaderLine(TEST_KEY, "Test Value 1");
+        vcfHeader.addMetaDataLine(otherLine1);
+        final List<VCFHeaderLine> otherLines1 = vcfHeader.getOtherHeaderLines(TEST_KEY);
+        Assert.assertEquals(otherLines1.size(), 1);
+        Assert.assertTrue(otherLines1.contains(otherLine1));
+
+        // now add a second line
+        final VCFHeaderLine otherLine2 = new VCFHeaderLine(TEST_KEY, "Test Value 2");
+        vcfHeader.addMetaDataLine(otherLine2);
+        final List<VCFHeaderLine> otherLines2 = vcfHeader.getOtherHeaderLines(TEST_KEY);
+        Assert.assertEquals(otherLines2.size(), 2);
+        Assert.assertTrue(otherLines2.contains(otherLine1));
+        Assert.assertTrue(otherLines2.contains(otherLine2));
+
+        // now call addOtherHeaderLineUnique with a 3rd line, the first two should be removed
+        final VCFHeaderLine otherLine3= new VCFHeaderLine(TEST_KEY, "Test Value 3");
+        vcfHeader.addOtherHeaderLineUnique(otherLine3);
+        final List<VCFHeaderLine> otherLines3 = vcfHeader.getOtherHeaderLines(TEST_KEY);
+        Assert.assertEquals(otherLines3.size(), 1);
+        Assert.assertFalse(otherLines3.contains(otherLine1));
+        Assert.assertFalse(otherLines3.contains(otherLine2));
+        Assert.assertTrue(otherLines3.contains(otherLine3));
+    }
+
+    @Test(expectedExceptions = TribbleException.class)
+    public void testAddOtherHeaderLineUniqueRejectsIDLines() {
+        final VCFHeader vcfHeader = new VCFHeader(VCFHeader.makeHeaderVersionLineSet(VCFHeader.DEFAULT_VCF_VERSION));
+        final VCFSimpleHeaderLine simpleHeaderLine = new VCFSimpleHeaderLine("testKey", "testID","test description");
+        vcfHeader.addOtherHeaderLineUnique(simpleHeaderLine);
+    }
+
+    @Test(expectedExceptions = TribbleException.class)
+    public void testGetOtherHeaderLineUnique() {
+        final String TEST_KEY = "testKey";
+        final VCFHeader vcfHeader = new VCFHeader(VCFHeader.makeHeaderVersionLineSet(VCFHeader.DEFAULT_VCF_VERSION));
+
+        // now add two lines with the same key
+        final VCFHeaderLine otherLine1 = new VCFHeaderLine(TEST_KEY, "Test Value 1");
+        vcfHeader.addMetaDataLine(otherLine1);
+        final VCFHeaderLine otherLine2 = new VCFHeaderLine(TEST_KEY, "Test Value 2");
+        vcfHeader.addMetaDataLine(otherLine2);
+
+        final List<VCFHeaderLine> otherLines = vcfHeader.getOtherHeaderLines(TEST_KEY);
+        Assert.assertEquals(otherLines.size(), 2);
+        Assert.assertTrue(otherLines.contains(otherLine1));
+        Assert.assertTrue(otherLines.contains(otherLine2));
+
+        // now call getOtherHeaderLineUnique, should throw
+        vcfHeader.getOtherHeaderLineUnique(TEST_KEY);
+    }
 
 }
diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTestData.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTestData.java
new file mode 100644
index 0000000000..7b57a19b5a
--- /dev/null
+++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTestData.java
@@ -0,0 +1,203 @@
+package htsjdk.variant.vcf;
+
+import htsjdk.tribble.TribbleException;
+import htsjdk.tribble.readers.LineIteratorImpl;
+import htsjdk.tribble.readers.SynchronousLineReader;
+import org.testng.Assert;
+
+import java.io.StringReader;
+import java.util.*;
+
+// Unit test data used by unit tests for VCFHeader, VCFMetaDataLines, and VCFHeaderLine hierarchy.
+public class VCFHeaderUnitTestData {
+    public final static VCFHeaderVersion TEST_VERSION = VCFHeader.DEFAULT_VCF_VERSION;
+
+    // fileformat line
+    public static List<VCFHeaderLine> getTestDefaultFileFormatLine() {
+        return new ArrayList<VCFHeaderLine>() {{
+            add(VCFHeader.makeHeaderVersionLine(TEST_VERSION));
+        }};
+    }
+
+    // FILTER lines
+    public static List<VCFHeaderLine> getTestFilterLines() {
+        return new ArrayList<VCFHeaderLine>() {{
+            add(new VCFFilterHeaderLine("LowQual", "Description=\"Low quality\""));
+            add(new VCFFilterHeaderLine("highDP", "Description=\"DP < 8\""));
+            add(new VCFFilterHeaderLine("TruthSensitivityTranche98.50to98.80", "Truth sensitivity tranche level at VSQ Lod: -0.1106 <= x < 0.6654"));
+        }};
+    }
+
+    // FORMAT lines
+    public static List<VCFHeaderLine> getTestFormatLines() {
+        return new ArrayList<VCFHeaderLine>() {{
+            add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
+            add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "Genotype Quality"));
+            add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
+            add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
+            add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFHeaderLineCount.R, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed"));
+            add(new VCFFormatHeaderLine(VCFConstants.PHASE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Read-backed phasing quality"));
+            add(new VCFFormatHeaderLine("MLPSAF", VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction"));
+            add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Genotype-level filter"));
+        }};
+    }
+
+    // INFO lines
+    public static List<VCFHeaderLine> getTestInfoLines() {
+        return new ArrayList<VCFHeaderLine>() {{
+            add(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
+            add(new VCFInfoHeaderLine(VCFConstants.DBSNP_KEY, 0, VCFHeaderLineType.Flag, "dbSNP Membership"));
+            add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered"));
+            add(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias"));
+            add(new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"));
+            add(new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"));
+            add(new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes"));
+            add(new VCFInfoHeaderLine(VCFConstants.MAPPING_QUALITY_ZERO_KEY, 1, VCFHeaderLineType.Integer, "Total Mapping Quality Zero Reads"));
+            add(new VCFInfoHeaderLine(VCFConstants.RMS_MAPPING_QUALITY_KEY, 1, VCFHeaderLineType.Float, "RMS Mapping Quality"));
+            add(new VCFInfoHeaderLine(VCFConstants.SOMATIC_KEY, 0, VCFHeaderLineType.Flag, "Somatic event"));
+        }};
+    }
+
+    // CONTIG lines
+    public static List<VCFHeaderLine> getTestContigLines() {
+        return new ArrayList<VCFHeaderLine>() {{
+            add(new VCFContigHeaderLine(Collections.singletonMap("ID", "1"), 0));
+            add(new VCFContigHeaderLine(Collections.singletonMap("ID", "2"), 1));
+            add(new VCFContigHeaderLine(Collections.singletonMap("ID", "3"), 2));
+        }};
+    }
+
+    //misc lines
+    public static List<VCFHeaderLine> getTestMiscellaneousLines() {
+        return new ArrayList<VCFHeaderLine>() {{
+            add(new VCFHeaderLine("reference", "g37"));
+            add(new VCFHeaderLine("GATKCommandLine", "SelectVariants and such."));
+        }};
+    }
+
+    //Return a full set of metadata lines, retaining order in a LinkedHashSet.
+    public static LinkedHashSet<VCFHeaderLine> getTestMetaDataLinesSet() {
+        final LinkedHashSet<VCFHeaderLine> allHeaderLines = new LinkedHashSet<VCFHeaderLine>() {{ //preserve order
+            addAll(getTestDefaultFileFormatLine());
+            addAll(getTestFilterLines());
+            addAll(getTestFormatLines());
+            addAll(getTestInfoLines());
+            addAll(getTestContigLines());
+            addAll(getTestMiscellaneousLines());
+        }};
+        Assert.assertEquals(allHeaderLines.size(),
+                1 + // file format line
+                        getTestFilterLines().size() + getTestFormatLines().size() +
+                        getTestInfoLines().size() + getTestContigLines().size() + getTestMiscellaneousLines().size());
+        return allHeaderLines;
+    }
+
+    //Return a full set of metadata lines as a VCFMetaDataLines.
+    public static VCFMetaDataLines getTestMetaDataLines() {
+        final VCFMetaDataLines md = new VCFMetaDataLines();
+        md.addMetaDataLines(getTestMetaDataLinesSet());
+        return md;
+    }
+
+    private static final int VCF_4_HEADER_STRING_COUNT = 16; // 17 -1 for the #CHROM... line
+
+    public static String getVCFV42TestHeaderString() {
+        return "##fileformat=VCFv4.2\n" +
+                        "##filedate=2010-06-21\n" +
+                        "##reference=NCBI36\n" +
+                        "##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">\n" +
+                        "##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">\n" +
+                        "##INFO=<ID=AF, Number=A, Type=Float, Description=\"Dindel estimated population allele frequency\">\n" +
+                        "##INFO=<ID=CA, Number=1, Type=String, Description=\"Pilot 1 callability mask\">\n" +
+                        "##INFO=<ID=HP, Number=1, Type=Integer, Description=\"Reference homopolymer tract length\">\n" +
+                        "##INFO=<ID=NS, Number=1, Type=Integer, Description=\"Number of samples with data\">\n" +
+                        "##INFO=<ID=DB, Number=0, Type=Flag, Description=\"dbSNP membership build 129 - type match and indel sequence length match within 25 bp\">\n" +
+                        "##INFO=<ID=NR, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on reverse strand\">\n" +
+                        "##INFO=<ID=NF, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on forward strand\">\n" +
+                        "##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">\n" +
+                        "##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">\n" +
+                        "##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">\n" +
+                        "##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">\n" +
+                        "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n";
+    }
+
+    public static final String VCF42headerStrings_with_negativeOne =
+            "##fileformat=VCFv4.2\n" +
+                    "##filedate=2010-06-21\n" +
+                    "##reference=NCBI36\n" +
+                    "##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">\n" +
+                    "##INFO=<ID=YY, Number=., Type=Integer, Description=\"Some weird value that has lots of parameters\">\n" +
+                    "##INFO=<ID=AF, Number=A, Type=Float, Description=\"Dindel estimated population allele frequency\">\n" +
+                    "##INFO=<ID=CA, Number=1, Type=String, Description=\"Pilot 1 callability mask\">\n" +
+                    "##INFO=<ID=HP, Number=1, Type=Integer, Description=\"Reference homopolymer tract length\">\n" +
+                    "##INFO=<ID=NS, Number=1, Type=Integer, Description=\"Number of samples with data\">\n" +
+                    "##INFO=<ID=DB, Number=0, Type=Flag, Description=\"dbSNP membership build 129 - type match and indel sequence length match within 25 bp\">\n" +
+                    "##INFO=<ID=NR, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on reverse strand\">\n" +
+                    "##INFO=<ID=NF, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on forward strand\">\n" +
+                    "##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">\n" +
+                    "##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">\n" +
+                    "##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">\n" +
+                    "##FORMAT=<ID=TT, Number=., Type=Integer, Description=\"Lots of TTs\">\n" +
+                    "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n";
+
+    public static Set<VCFHeaderLine> getV42HeaderLinesWITHOUTFormatString() {
+        // precondition - create a v42 VCFMetaDataLines and make sure its v42
+        final Set<VCFHeaderLine> metaDataSet = getV42HeaderLinesWITHFormatString();
+        final VCFMetaDataLines metaDataLines = new VCFMetaDataLines();
+        metaDataLines.addMetaDataLines(metaDataSet);
+        final VCFHeaderLine versionLine = metaDataLines.getFileFormatLine();
+        Assert.assertEquals(
+                VCFHeaderVersion.toHeaderVersion(versionLine.getValue()),
+                VCFHeaderVersion.VCF4_2);
+
+        // remove the 4.2 version line from the original set, verify, and return the set with no fileformat string
+        metaDataSet.remove(versionLine);
+        Assert.assertNull(getVersionLineFromHeaderLineSet(metaDataSet));
+        return metaDataSet;
+    }
+
+    public static Set<VCFHeaderLine> getV42HeaderLinesWITHFormatString() {
+        // precondition - create a v42 header and make sure its v42
+        final VCFHeader header = createHeaderFromString(getVCFV42TestHeaderString());
+        Assert.assertEquals(
+                header.getVCFHeaderVersion(),
+                VCFHeaderVersion.VCF4_2);
+
+        // return a mutable set for test use
+        return new LinkedHashSet<>(header.getMetaDataInInputOrder());
+    }
+
+    public static VCFHeader createHeaderFromString(final String headerStr) {
+        final VCFCodec codec = new VCFCodec();
+        final VCFHeader header = (VCFHeader) codec.readActualHeader(
+                new LineIteratorImpl(new SynchronousLineReader(new StringReader(headerStr))));
+        Assert.assertEquals(header.getMetaDataInInputOrder().size(), VCF_4_HEADER_STRING_COUNT);
+        return header;
+    }
+
+    /**
+     * Find and return the VCF fileformat/version line
+     *
+     * Return null if no fileformat/version lines are found
+     */
+    private static VCFHeaderLine getVersionLineFromHeaderLineSet(final Set<VCFHeaderLine> metaDataLines) {
+        VCFHeaderLine versionLine = null;
+        final List<VCFHeaderLine> formatLines = new ArrayList<>();
+        for (final VCFHeaderLine headerLine : metaDataLines) {
+            if (VCFHeaderVersion.isFormatString(headerLine.getKey())) {
+                formatLines.add(headerLine);
+            }
+        }
+
+        if (!formatLines.isEmpty()) {
+            if (formatLines.size() > 1) {
+                //throw if there are duplicate version lines
+                throw new TribbleException("Multiple version header lines found in header line list");
+            }
+            return formatLines.get(0);
+        }
+
+        return versionLine;
+    }
+
+}
diff --git a/src/test/java/htsjdk/variant/vcf/VCFInfoHeaderLineUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFInfoHeaderLineUnitTest.java
new file mode 100644
index 0000000000..9e2a82f15a
--- /dev/null
+++ b/src/test/java/htsjdk/variant/vcf/VCFInfoHeaderLineUnitTest.java
@@ -0,0 +1,86 @@
+package htsjdk.variant.vcf;
+
+import htsjdk.HtsjdkTest;
+import htsjdk.tribble.TribbleException;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+/**
+ * Test conditions that are unique to INFO lines (not covered by VCFCompoundHeaderLineUnitTest).
+ */
+public class VCFInfoHeaderLineUnitTest extends HtsjdkTest {
+
+    @Test
+    public void testRepairInfoLineFlagTypeWithNonzeroCount() {
+        final VCFInfoHeaderLine infoLine = new VCFInfoHeaderLine("<ID=FOO,Number=27,Type=Flag,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION);
+        Assert.assertEquals(0, infoLine.getCount());
+    }
+
+    @DataProvider(name = "mergeCompatibleInfoLines")
+    public Object[][] getMergeCompatibleInfoLines() {
+        return new Object[][]{
+                {
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=1,Type=Float,Description=\"Allele Balance for hets (ref/(ref+alt))\">", VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=A,Type=Float,Description=\"Allele Balance for hets (ref/(ref+alt))\">", VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=.,Type=Float,Description=\"Allele Balance for hets (ref/(ref+alt))\">", VCFHeader.DEFAULT_VCF_VERSION)
+                }
+        };
+    }
+
+    @Test(dataProvider = "mergeCompatibleInfoLines")
+    public void testMergeCompatibleInfoLines(
+            final VCFInfoHeaderLine infoHeaderLine1,
+            final VCFInfoHeaderLine infoHeaderLine2,
+            final VCFInfoHeaderLine expectedHeaderLine) {
+        Assert.assertEquals(
+                VCFInfoHeaderLine.getMergedInfoHeaderLine(
+                        infoHeaderLine1,
+                        infoHeaderLine2,
+                        new VCFHeaderMerger.HeaderMergeConflictWarnings(true)),
+                expectedHeaderLine);
+    }
+
+    @DataProvider(name = "mergeIncompatibleInfoLines")
+    public Object[][] getMergeIncompatibleInfoLines() {
+        return new Object[][]{
+                // 2 lines to merge, expected result
+                {
+                        // mixed number AND number type (multiple different attributes)
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=A,Type=Float,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=1,Type=Integer,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION)
+                },
+                {
+                        // mixed number AND number type  (multiple different attributes), reverse direction
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=1,Type=Integer,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION),
+                        new VCFInfoHeaderLine("INFO=<ID=AB,Number=A,Type=Float,Description=\"Allele Balance for hets (ref/(ref+alt))\">",
+                                VCFHeader.DEFAULT_VCF_VERSION)
+                }
+        };
+    }
+
+    @Test
+    public void testAllow1000GKey() {
+        final VCFInfoHeaderLine line = new VCFInfoHeaderLine(
+            "INFO=<ID=1000G,Number=0,Type=Flag,Description=1000G>",
+            VCFHeader.DEFAULT_VCF_VERSION
+        );
+
+        // TODO change to VCFHeader.DEFAULT_VCF_VERSION
+        Assert.assertFalse(line.getValidationFailure(VCFHeaderVersion.VCF4_3).isPresent());
+    }
+
+    @Test(dataProvider = "mergeIncompatibleInfoLines", expectedExceptions= TribbleException.class)
+    public void testMergeIncompatibleInfoLines(
+            final VCFInfoHeaderLine infoHeaderLine1,
+            final VCFInfoHeaderLine infoHeaderLine2) {
+        VCFInfoHeaderLine.getMergedInfoHeaderLine(
+                infoHeaderLine1,
+                infoHeaderLine2,
+                new VCFHeaderMerger.HeaderMergeConflictWarnings(true));
+    }
+
+}
diff --git a/src/test/java/htsjdk/variant/vcf/VCFMetaDataLinesUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFMetaDataLinesUnitTest.java
new file mode 100644
index 0000000000..2e41536abe
--- /dev/null
+++ b/src/test/java/htsjdk/variant/vcf/VCFMetaDataLinesUnitTest.java
@@ -0,0 +1,354 @@
+package htsjdk.variant.vcf;
+
+import htsjdk.HtsjdkTest;
+import htsjdk.tribble.TribbleException;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.*;
+
+public class VCFMetaDataLinesUnitTest extends HtsjdkTest {
+
+    @DataProvider(name="keyCollisions")
+    public Object[][] keyCollisions() {
+        return new Object[][] {
+                // line 1, line 2, expected to collide
+
+                // Unstructured key collisions
+                {       // same key, same value
+                        new VCFHeaderLine("key", "value"),
+                        new VCFHeaderLine("key", "value"), true
+                },
+                {       // same key, different value
+                        new VCFHeaderLine("key", "value"),
+                        new VCFHeaderLine("key", "value1"), false
+                },
+                {       // different key, same value
+                        new VCFHeaderLine("key1", "value"),
+                        new VCFHeaderLine("key2", "value"), false
+                },
+                {       // different key, different value
+                        new VCFHeaderLine("key1", "value1"),
+                        new VCFHeaderLine("key2", "value2"), false
+                },
+
+                // Structured key collisions
+                {       // same key, same ID, same (base VCFSimpleHeaderLine) class
+                        new VCFSimpleHeaderLine("FILTER", Collections.singletonMap("ID", "id")),
+                        new VCFSimpleHeaderLine("FILTER", Collections.singletonMap("ID", "id")), true
+                },
+                {       // same key, same ID, same (derived-VCFSimpleHeaderLine) class, same attributes
+                        new VCFFilterHeaderLine("filterName", "unused description"),
+                        new VCFFilterHeaderLine("filterName", "unused description"), true
+                },
+                {       // same key, same ID, same class, different attributes
+                        new VCFFilterHeaderLine("filterName", "unused description"),
+                        new VCFFilterHeaderLine("filterName", "different unused description"), true
+                },
+                {       // same key, different ID
+                        new VCFFilterHeaderLine("filterName", "unused description"),
+                        new VCFFilterHeaderLine("filterName2", "unused description"), false
+                },
+                {       // This is an unfortunate case that is allowed by the existing permissive VCFHeader
+                        // APIs; two header lines that have identical content, one of which is modeled by the
+                        // VCFSimpleHeaderLine base class, and one of which is modeled by the specialized ,
+                        // derived VCFFilterHeaderLine class
+                        new VCFFilterHeaderLine("id", "unused description"),
+                        new VCFSimpleHeaderLine("FILTER", new LinkedHashMap<String, String>() {{
+                            put("ID", "id");
+                            put("Description", "unused description");
+                        }}), true }
+        };
+    }
+
+    @Test(dataProvider="keyCollisions")
+    public void testKeyCollisions(final VCFHeaderLine line1, final VCFHeaderLine line2, final boolean expectCollision) {
+        final VCFMetaDataLines mdLines = new VCFMetaDataLines();
+        mdLines.addMetaDataLine(line1);
+        mdLines.addMetaDataLine(line2);
+        Assert.assertEquals(mdLines.getMetaDataInInputOrder().size(), expectCollision ? 1 : 2);
+    }
+
+    @Test
+    public void testRetainFullHeaderLines() {
+        final VCFHeaderUnitTestData unitTestData = new VCFHeaderUnitTestData();
+        final VCFMetaDataLines md = unitTestData.getTestMetaDataLines();
+
+        Assert.assertEquals(md.getMetaDataInInputOrder().size(), unitTestData.getTestMetaDataLinesSet().size());
+        Assert.assertEquals(md.getMetaDataInSortedOrder().size(), unitTestData.getTestMetaDataLinesSet().size());
+
+        Assert.assertEquals(unitTestData.getTestFormatLines(), md.getFormatHeaderLines());
+        Assert.assertEquals(unitTestData.getTestFilterLines(), md.getFilterLines());
+        Assert.assertEquals(unitTestData.getTestInfoLines(), md.getInfoHeaderLines());
+        Assert.assertEquals(unitTestData.getTestContigLines(), md.getContigLines());
+        Assert.assertEquals(unitTestData.getTestFilterLines(), md.getFilterLines());
+
+        final Set<VCFHeaderLine> otherLines = new LinkedHashSet<>();
+        otherLines.addAll(unitTestData.getTestDefaultFileFormatLine());
+        otherLines.addAll(unitTestData.getTestMiscellaneousLines());
+        Assert.assertEquals(otherLines, md.getOtherHeaderLines());
+    }
+
+    @Test
+    public void testAddRemoveOtherMetaDataLine() {
+        final VCFHeaderUnitTestData unitTestData = new VCFHeaderUnitTestData();
+        final VCFMetaDataLines md = unitTestData.getTestMetaDataLines();
+
+        int beforeAllSize = md.getMetaDataInInputOrder().size();
+        int beforeStructuredSize = md.getIDHeaderLines().size();
+        int beforeOtherSize = md.getOtherHeaderLines().size();
+
+        final VCFHeaderLine newLine = new VCFHeaderLine("foo", "bar");
+
+        // add one other line
+        md.addMetaDataLine(newLine);
+        Assert.assertEquals(md.getMetaDataInInputOrder().size(), beforeAllSize + 1);
+        Assert.assertEquals(md.getIDHeaderLines().size(), beforeStructuredSize);  // remains the same
+        Assert.assertEquals(md.getOtherHeaderLines().size(), beforeOtherSize + 1);
+
+        // remove the other line and we're back to original size
+        Assert.assertEquals(md.removeMetaDataLine(newLine), newLine);
+        Assert.assertEquals(md.getMetaDataInInputOrder().size(), beforeAllSize);
+        Assert.assertEquals(md.getIDHeaderLines().size(), beforeStructuredSize);  // still remains the same
+        Assert.assertEquals(md.getOtherHeaderLines().size(), beforeOtherSize);
+    }
+
+    @Test
+    public void testAddRemoveUniqueStructuredLine() {
+        final VCFHeaderUnitTestData unitTestData = new VCFHeaderUnitTestData();
+        final VCFMetaDataLines md = unitTestData.getTestMetaDataLines();
+
+        final int beforeAllSize = md.getMetaDataInInputOrder().size();
+        final int beforeStructuredSize = md.getIDHeaderLines().size();
+        final int beforeFilterSize = md.getFilterLines().size();
+        final int beforeOtherSize = md.getOtherHeaderLines().size();
+
+        // add a new, unique, structured line
+        final VCFFilterHeaderLine newLine = new VCFFilterHeaderLine("filterID", "unused desc");
+        md.addMetaDataLine(newLine);
+
+        Assert.assertEquals(md.getMetaDataInInputOrder().size(), beforeAllSize + 1);
+        Assert.assertEquals(md.getIDHeaderLines().size(), beforeStructuredSize + 1);
+        Assert.assertEquals(md.getFilterLines().size(), beforeFilterSize + 1);
+        Assert.assertEquals(md.getOtherHeaderLines().size(), beforeOtherSize); // remains the same
+
+        // remove the new line and we're back to original size
+        Assert.assertEquals(md.removeMetaDataLine(newLine), newLine);
+        Assert.assertEquals(md.getMetaDataInInputOrder().size(), beforeAllSize);
+        Assert.assertEquals(md.getIDHeaderLines().size(), beforeStructuredSize);
+        Assert.assertEquals(md.getFilterLines().size(), beforeFilterSize);
+        Assert.assertEquals(md.getOtherHeaderLines().size(), beforeOtherSize); // still remains the same
+    }
+
+    @Test
+    public void testAddRemoveDuplicateStructuredLine() {
+        final VCFHeaderUnitTestData unitTestData = new VCFHeaderUnitTestData();
+        final VCFMetaDataLines md = unitTestData.getTestMetaDataLines();
+
+        final int beforeAllSize = md.getMetaDataInInputOrder().size();
+        final int beforeStructuredSize = md.getIDHeaderLines().size();
+        final int beforeFilterSize = md.getFilterLines().size();
+
+        // add a new, unique, structured (filter) line
+        final VCFFilterHeaderLine newLine = new VCFFilterHeaderLine("filterID", "unused desc");
+        md.addMetaDataLine(newLine);
+
+        Assert.assertEquals(md.getMetaDataInInputOrder().size(), beforeAllSize + 1);
+        Assert.assertEquals(md.getIDHeaderLines().size(), beforeStructuredSize + 1);
+        Assert.assertEquals(md.getFilterLines().size(), beforeFilterSize + 1);
+
+        // now try to re-add the same structured filter line again, this second one is rejected, count remains the same
+        md.addMetaDataLine(newLine);
+        Assert.assertEquals(md.getMetaDataInInputOrder().size(), beforeAllSize + 1);
+        Assert.assertEquals(md.getIDHeaderLines().size(), beforeStructuredSize + 1);
+        Assert.assertEquals(md.getFilterLines().size(), beforeFilterSize + 1);
+        Assert.assertEquals(md.getFilterHeaderLine("filterID"), newLine);
+
+        // remove the first structured line and we're back to the original size
+        Assert.assertEquals(md.removeMetaDataLine(newLine), newLine);
+        Assert.assertEquals(md.getMetaDataInInputOrder().size(), beforeAllSize);
+        Assert.assertEquals(md.getIDHeaderLines().size(), beforeStructuredSize);
+        Assert.assertEquals(md.getFilterLines().size(), beforeFilterSize);
+    }
+
+//    @Test
+//    public void testAddRemoveContigLine() {
+//        final VCFHeaderUnitTestData unitTestData = new VCFHeaderUnitTestData();
+//    }
+
+    @Test
+    public void testHasEquivalentHeaderLinePositive() {
+        final VCFHeaderUnitTestData unitTestData = new VCFHeaderUnitTestData();
+        final VCFMetaDataLines sourceMetaDataLines = unitTestData.getTestMetaDataLines();
+
+        // for each headerLine in the set, make sure findEquivalentHeaderLine returns it
+        for (final VCFHeaderLine headerLine : sourceMetaDataLines.getMetaDataInInputOrder()) {
+            final VCFHeaderLine equivalentLine = sourceMetaDataLines.findEquivalentHeaderLine(headerLine);
+            Assert.assertTrue(equivalentLine.equals(headerLine));
+        }
+    }
+
+    @Test
+    public void testHasEquivalentHeaderLineNegative() {
+        final VCFMetaDataLines metaDataLines = new VCFMetaDataLines();
+        // add a few test lines
+        metaDataLines.addMetaDataLine(new VCFHeaderLine("testkey1", "test value"));
+        metaDataLines.addMetaDataLine(new VCFHeaderLine("testkey1", "other value"));
+        metaDataLines.addMetaDataLine(new VCFHeaderLine("reference", "assembly37"));
+
+        // for each other headerLine in the starting set, make another header line with the same key but a different
+        // value, and ensure findEquivalentHeaderLine does NOT return it
+        for (final VCFHeaderLine headerLine : metaDataLines.getMetaDataInInputOrder()) {
+            final VCFHeaderLine equivalentLine = metaDataLines.findEquivalentHeaderLine(headerLine);
+            Assert.assertTrue(equivalentLine.equals(headerLine));
+
+            final VCFHeaderLine modifiedHeaderLine = new VCFHeaderLine(headerLine.getKey(), headerLine.getValue() + "zzz");
+            Assert.assertNull(metaDataLines.findEquivalentHeaderLine(modifiedHeaderLine));
+        }
+    }
+
+    @Test
+    public void testGetFilterHeaderLine() {
+        final VCFHeaderUnitTestData unitTestData = new VCFHeaderUnitTestData();
+        final VCFMetaDataLines md = unitTestData.getTestMetaDataLines();
+        Assert.assertEquals(md.getFilterHeaderLine(unitTestData.getTestFilterLines().get(0).getID()), unitTestData.getTestFilterLines().get(0));
+    }
+
+    @Test
+    public void testGetInfoHeaderLine() {
+        final VCFHeaderUnitTestData unitTestData = new VCFHeaderUnitTestData();
+        final VCFMetaDataLines md = unitTestData.getTestMetaDataLines();
+        Assert.assertEquals(md.getInfoHeaderLine(unitTestData.getTestInfoLines().get(0).getID()), unitTestData.getTestInfoLines().get(0));
+    }
+
+    @Test
+    public void testGetFormatHeaderLine() {
+        final VCFHeaderUnitTestData testData = new VCFHeaderUnitTestData();
+        final VCFMetaDataLines md = testData.getTestMetaDataLines();
+        Assert.assertEquals(md.getFormatHeaderLine(testData.getTestFormatLines().get(0).getID()), testData.getTestFormatLines().get(0));
+    }
+
+    @Test
+    public void testAddRemoveVersionLine() {
+        final VCFHeaderUnitTestData unitTestData = new VCFHeaderUnitTestData();
+        final VCFMetaDataLines md = unitTestData.getTestMetaDataLines();
+        Assert.assertEquals(md.getVCFVersion(), unitTestData.TEST_VERSION);
+
+        final int originalMetaDataLineCount = md.getMetaDataInInputOrder().size();
+
+        // now, remove the version line, make sure the removed line is actually the version line, that the
+        // resulting metadataLines version is now null, and the line count drops by 1
+        final VCFHeaderLine queryVersionLine = VCFHeader.makeHeaderVersionLine(unitTestData.TEST_VERSION);
+        final VCFHeaderLine oldVersionLine = md.removeMetaDataLine(queryVersionLine);
+        Assert.assertEquals(oldVersionLine, queryVersionLine);
+        Assert.assertNull(md.getVCFVersion());
+        Assert.assertEquals(md.getMetaDataInInputOrder().size(), originalMetaDataLineCount - 1);
+
+        // now put it back...
+        md.addMetaDataLine(oldVersionLine);
+        Assert.assertEquals(md.getVCFVersion(), unitTestData.TEST_VERSION);
+        Assert.assertEquals(md.getMetaDataInInputOrder().size(), originalMetaDataLineCount);
+    }
+
+    @Test
+    public void testAddContigLineExactDuplicate() {
+        final VCFMetaDataLines md = new VCFMetaDataLines();
+        final Set<VCFHeaderLine> contigLines = new LinkedHashSet<>();
+
+        final VCFContigHeaderLine vcfContigLine1 = new VCFContigHeaderLine(
+                new LinkedHashMap<String, String>() {{
+                    put("ID", "contig1");
+                }}, 0);
+        final VCFContigHeaderLine vcfContigLine2 = new VCFContigHeaderLine(
+                new LinkedHashMap<String, String>() {{
+                    put("ID", "contig2");
+                }}, 1);
+
+        contigLines.add(vcfContigLine1);
+        contigLines.add(vcfContigLine2);
+        md.addMetaDataLines(contigLines);
+        Assert.assertEquals(md.getContigLines(), contigLines);
+
+        // add in the duplicate line
+        md.addMetaDataLine(vcfContigLine1);
+        Assert.assertEquals(md.getContigLines(), contigLines);
+    }
+
+    @Test(expectedExceptions = TribbleException.class)
+    public void testAddContigLineConflicting() {
+        final VCFMetaDataLines md = new VCFMetaDataLines();
+
+        final Set<VCFHeaderLine> contigLines = new LinkedHashSet<>();
+        contigLines.add(new VCFContigHeaderLine(
+                new LinkedHashMap<String, String>() {{
+                    put("ID", "contig1");
+                }}, 0));
+        contigLines.add(new VCFContigHeaderLine(
+                new LinkedHashMap<String, String>() {{
+                    put("ID", "contig2");
+                }}, 1));
+
+        md.addMetaDataLines(contigLines);
+        Assert.assertEquals(md.getContigLines(), contigLines);
+
+        // try to add a contg line with a duplicate index, but with a different name than the existing line with that index
+        md.addMetaDataLine(new VCFContigHeaderLine(
+                new LinkedHashMap<String, String>() {{
+                    put("ID", "contig3");
+                }}, 0));
+    }
+
+    @Test
+    public void testRemoveAndReplaceContigLines() {
+        final VCFMetaDataLines md = new VCFMetaDataLines();
+        final Set<VCFHeaderLine> contigLines = new LinkedHashSet<>();
+
+        final VCFContigHeaderLine vcfContigLine1 = new VCFContigHeaderLine(
+                new LinkedHashMap<String, String>() {{
+                    put("ID", "contig1");
+                }}, 1);
+        final VCFContigHeaderLine vcfContigLine2 = new VCFContigHeaderLine(
+                new LinkedHashMap<String, String>() {{
+                    put("ID", "contig2");
+                }}, 2);
+
+        contigLines.add(vcfContigLine1);
+        contigLines.add(vcfContigLine2);
+        md.addMetaDataLines(contigLines);
+        Assert.assertEquals(md.getContigLines(), contigLines);
+
+        //make sure the initial contig index order is honored; it happens to be the same as the input
+        // order a this point, but check anyway
+        final List<VCFContigHeaderLine> sortedLines1 = md.getContigLines();
+        Assert.assertEquals(sortedLines1.get(0), vcfContigLine1);
+        Assert.assertEquals(sortedLines1.get(1), vcfContigLine2);
+
+        // now  remove the first contig line; only one should remain
+        final VCFHeaderLine removedContigLine = md.removeMetaDataLine(vcfContigLine1);
+        Assert.assertEquals(removedContigLine, vcfContigLine1);
+        final List<VCFContigHeaderLine> sortedContigHeaderLines = md.getContigLines();
+        Assert.assertEquals(sortedContigHeaderLines.size(), 1);
+
+        // now add the first line back in, so the input order is different than the sorted order,
+        // and make sure the order is honored
+        md.addMetaDataLine(vcfContigLine1);
+        final List<VCFContigHeaderLine> sortedLines2 = md.getContigLines();
+        Assert.assertEquals(sortedLines2.get(0), vcfContigLine1);
+        Assert.assertEquals(sortedLines2.get(1), vcfContigLine2);
+
+        // now add in ANOTHER contig line at the end that has an index that puts it BEFORE the existing lines
+        final VCFContigHeaderLine vcfContigLine3 = new VCFContigHeaderLine(
+                new LinkedHashMap<String, String>() {{
+                    put("ID", "contig3");
+                }}, 0);
+        md.addMetaDataLine(vcfContigLine3);
+        final List<VCFContigHeaderLine> sortedLines3 = md.getContigLines();
+        Assert.assertEquals(sortedLines3.size(), 3);
+        Assert.assertEquals(sortedLines3.get(0), vcfContigLine3);
+        Assert.assertEquals(sortedLines3.get(1), vcfContigLine1);
+        Assert.assertEquals(sortedLines3.get(2), vcfContigLine2);
+    }
+
+}
+
diff --git a/src/test/java/htsjdk/variant/vcf/VCFMetaHeaderLineUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFMetaHeaderLineUnitTest.java
new file mode 100644
index 0000000000..518f6a6928
--- /dev/null
+++ b/src/test/java/htsjdk/variant/vcf/VCFMetaHeaderLineUnitTest.java
@@ -0,0 +1,44 @@
+package htsjdk.variant.vcf;
+
+import htsjdk.HtsjdkTest;
+import htsjdk.tribble.TribbleException;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public class VCFMetaHeaderLineUnitTest extends HtsjdkTest {
+
+    @DataProvider(name = "allowedVCFVersions")
+    public Object[][] allowedVCFVersions() {
+        return new Object[][]{
+                {VCFHeaderVersion.VCF4_3}
+        };
+    }
+
+    @DataProvider(name = "rejectedVCFVersions")
+    public Object[][] rejectedVCFVersions() {
+        return new Object[][]{
+                {VCFHeaderVersion.VCF3_2},
+                {VCFHeaderVersion.VCF3_3},
+                {VCFHeaderVersion.VCF4_0},
+                {VCFHeaderVersion.VCF4_1},
+                {VCFHeaderVersion.VCF4_2},
+        };
+    }
+
+    private static final String META_STRING = "<ID=id,Description=desc>";
+
+    @Test(dataProvider="allowedVCFVersions")
+    public void testAllowedVersions(final VCFHeaderVersion vcfAllowedVersion) {
+        final VCFMetaHeaderLine vcfLine = new VCFMetaHeaderLine(META_STRING, vcfAllowedVersion);
+        Assert.assertEquals("id", vcfLine.getID());
+        Assert.assertEquals("desc", vcfLine.getGenericFieldValue(VCFSimpleHeaderLine.DESCRIPTION_ATTRIBUTE));
+    }
+
+    @Test(dataProvider="rejectedVCFVersions",expectedExceptions=TribbleException.class)
+    public void testRejectedVersions(final VCFHeaderVersion vcfAllowedVersion) {
+        new VCFMetaHeaderLine(META_STRING, vcfAllowedVersion);
+    }
+
+
+}
diff --git a/src/test/java/htsjdk/variant/vcf/VCFPedigreeHeaderLineUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFPedigreeHeaderLineUnitTest.java
new file mode 100644
index 0000000000..43179c6862
--- /dev/null
+++ b/src/test/java/htsjdk/variant/vcf/VCFPedigreeHeaderLineUnitTest.java
@@ -0,0 +1,50 @@
+package htsjdk.variant.vcf;
+
+import htsjdk.HtsjdkTest;
+import htsjdk.tribble.TribbleException;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public class VCFPedigreeHeaderLineUnitTest extends HtsjdkTest {
+
+
+    @DataProvider(name = "allowedVCFVersions")
+    public Object[][] allowedVCFVersions() {
+        return new Object[][]{
+                {VCFHeaderVersion.VCF4_3}
+        };
+    }
+
+    @DataProvider(name = "rejectedVCFVersions")
+    public Object[][] rejectedVCFVersions() {
+        return new Object[][]{
+                {VCFHeaderVersion.VCF4_0},
+                {VCFHeaderVersion.VCF4_1},
+                {VCFHeaderVersion.VCF4_2},
+                {VCFHeaderVersion.VCF3_2},
+                {VCFHeaderVersion.VCF3_3},
+        };
+    }
+
+    private static final String PEDIGREE_STRING_4_2 = "PEDIGREE=<Description=desc>";
+    private static final String PEDIGREE_STRING_4_3 = "PEDIGREE=<ID=id,Description=desc>";
+
+    @Test(dataProvider="allowedVCFVersions")
+    public void testAllowedVersions(final VCFHeaderVersion vcfAllowedVersion) {
+        final VCFPedigreeHeaderLine vcfLine = new VCFPedigreeHeaderLine(
+                vcfAllowedVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3) ?
+                        PEDIGREE_STRING_4_3 :
+                        PEDIGREE_STRING_4_2,
+                vcfAllowedVersion);
+        Assert.assertEquals("id", vcfLine.getID());
+        Assert.assertEquals("desc", vcfLine.getGenericFieldValue(VCFSimpleHeaderLine.DESCRIPTION_ATTRIBUTE));
+    }
+
+    @Test(dataProvider="rejectedVCFVersions",expectedExceptions=TribbleException.class)
+    public void testRejectedVersions(final VCFHeaderVersion vcfAllowedVersion) {
+        new VCFPedigreeHeaderLine(PEDIGREE_STRING_4_2, vcfAllowedVersion);
+    }
+
+
+}
diff --git a/src/test/java/htsjdk/variant/vcf/VCFSampleHeaderLineUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFSampleHeaderLineUnitTest.java
new file mode 100644
index 0000000000..355827e27b
--- /dev/null
+++ b/src/test/java/htsjdk/variant/vcf/VCFSampleHeaderLineUnitTest.java
@@ -0,0 +1,43 @@
+package htsjdk.variant.vcf;
+
+import htsjdk.HtsjdkTest;
+import htsjdk.tribble.TribbleException;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public class VCFSampleHeaderLineUnitTest extends HtsjdkTest {
+
+    @DataProvider(name = "allowedVCFVersions")
+    public Object[][] allowedVCFVersions() {
+        return new Object[][]{
+                {VCFHeaderVersion.VCF4_0},
+                {VCFHeaderVersion.VCF4_1},
+                {VCFHeaderVersion.VCF4_2},
+                {VCFHeaderVersion.VCF4_3}
+        };
+    }
+
+    @DataProvider(name = "rejectedVCFVersions")
+    public Object[][] rejectedVCFVersions() {
+        return new Object[][]{
+                {VCFHeaderVersion.VCF3_2},
+                {VCFHeaderVersion.VCF3_3},
+        };
+    }
+
+    private static final String SAMPLE_STRING = "SAMPLE=<ID=id,Description=desc>";
+
+    @Test(dataProvider="allowedVCFVersions")
+    public void testAllowedVersions(final VCFHeaderVersion vcfAllowedVersion) {
+        final VCFSampleHeaderLine vcfLine = new VCFSampleHeaderLine(SAMPLE_STRING, vcfAllowedVersion);
+        Assert.assertEquals("id", vcfLine.getID());
+        Assert.assertEquals("desc", vcfLine.getGenericFieldValue(VCFSimpleHeaderLine.DESCRIPTION_ATTRIBUTE));
+    }
+
+    @Test(dataProvider="rejectedVCFVersions",expectedExceptions=TribbleException.class)
+    public void testRejectedVersions(final VCFHeaderVersion vcfAllowedVersion) {
+        new VCFSampleHeaderLine(SAMPLE_STRING, vcfAllowedVersion);
+    }
+
+}
diff --git a/src/test/java/htsjdk/variant/vcf/VCFSimpleHeaderLineUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFSimpleHeaderLineUnitTest.java
new file mode 100644
index 0000000000..c9f8841d3d
--- /dev/null
+++ b/src/test/java/htsjdk/variant/vcf/VCFSimpleHeaderLineUnitTest.java
@@ -0,0 +1,151 @@
+package htsjdk.variant.vcf;
+
+import htsjdk.HtsjdkTest;
+import htsjdk.tribble.TribbleException;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import java.util.LinkedHashMap;
+
+public class VCFSimpleHeaderLineUnitTest extends HtsjdkTest {
+
+    private VCFSimpleHeaderLine getStructuredHeaderLine() {
+        return new VCFSimpleHeaderLine(
+                "key",
+                new LinkedHashMap<String, String>() {{
+                    put("ID", "id");
+                    put("attr1", "value1");
+                    put("attr2", "value2");
+                }}
+        );
+    }
+
+    @Test
+    public void testConstructorFromStrings() {
+        final VCFSimpleHeaderLine hl = new VCFSimpleHeaderLine("testKey", "testId", "test description");
+        Assert.assertEquals("testKey", hl.getKey());
+        Assert.assertEquals("testId", hl.getID());
+        Assert.assertEquals("test description", hl.getGenericFieldValue(VCFSimpleHeaderLine.DESCRIPTION_ATTRIBUTE));
+        Assert.assertEquals("testKey=<ID=testId,Description=\"test description\">", hl.toStringEncoding());
+    }
+
+    @Test
+    public void testConstructorFromEncodedLine() {
+        final VCFSimpleHeaderLine hLine = new VCFSimpleHeaderLine("key", "<ID=id,attr1=value1>", VCFHeader.DEFAULT_VCF_VERSION);
+        Assert.assertEquals(hLine.getKey(), "key");
+        Assert.assertEquals(hLine.getID(), "id");
+        Assert.assertEquals(hLine.getGenericFieldValue("ID"), "id");
+        Assert.assertEquals(hLine.getGenericFieldValue("attr1"), "value1");
+    }
+
+    @Test
+    public void testConstructorFromAttributeMap() {
+        final VCFSimpleHeaderLine hLine = new VCFSimpleHeaderLine(
+                "key",
+                new LinkedHashMap<String, String>() {{
+                    put("ID", "id");
+                    put("attr1", "value1");
+                    put("attr2", "value2");
+                }});
+
+        Assert.assertEquals(hLine.getKey(), "key");
+        Assert.assertEquals(hLine.getID(), "id");
+        Assert.assertEquals(hLine.getGenericFieldValue("ID"), "id");
+        Assert.assertEquals(hLine.getGenericFieldValue("attr1"), "value1");
+    }
+
+    @Test(expectedExceptions=TribbleException.class)
+    public void testRejectIdMissingFromEncodedLine() {
+        new VCFSimpleHeaderLine("key", "<attr1=value1>", VCFHeader.DEFAULT_VCF_VERSION);
+    }
+
+    @Test(expectedExceptions=TribbleException.class)
+    public void testRejectIdMissingFromAttributeMap() {
+        new VCFSimpleHeaderLine(
+                "key",
+                new LinkedHashMap<String, String>() {{
+                    put("attr1", "value1");
+                    put("attr2", "value2");
+                }});
+    }
+
+    @DataProvider(name = "violateIDRequirements")
+    public Object[][] getViolateIDRequirements() {
+        return new Object[][]{
+                {"<ID>"},
+                {"<ID="},
+                {"<ID=\"\""},
+                {"<ID>"},
+                {"<attr1=value1>"},
+                {"<attr1=value1,attr2=value2>"}
+        };
+    }
+
+    @Test(dataProvider="violateIDRequirements",expectedExceptions=TribbleException.class)
+    public void testViolateIDRequirements(final String headerLine) {
+        new VCFSimpleHeaderLine("key", headerLine, VCFHeader.DEFAULT_VCF_VERSION);
+    }
+
+    @Test
+    public void testGetID() {
+        Assert.assertEquals(getStructuredHeaderLine().getID(), "id");
+    }
+
+    @Test
+    public void testIsIDLine() {
+        Assert.assertTrue(getStructuredHeaderLine().isIDHeaderLine());
+    }
+
+    @Test
+    public void testGetGenericFieldValue() {
+        Assert.assertEquals(getStructuredHeaderLine().getGenericFieldValue("attr1"), "value1");
+    }
+
+    @Test
+    public void testStringEncoding() {
+        final VCFSimpleHeaderLine structuredHL = getStructuredHeaderLine();
+        Assert.assertEquals(structuredHL.toStringEncoding(),"key=<ID=id,attr1=value1,attr2=value2>");
+    }
+
+    @Test
+    public void testUnescapedQuotedStringEncoding() {
+        final VCFSimpleHeaderLine unescapedHeaderLine =  new VCFSimpleHeaderLine(
+                "key",
+                new LinkedHashMap<String, String>() {{
+                    put("ID", "id");
+                    put(VCFSimpleHeaderLine.DESCRIPTION_ATTRIBUTE,
+                            "filterName=[ANNOTATION] filterExpression=[ANNOTATION == \"NA\" || ANNOTATION <= 2.0]");
+                    put(VCFSimpleHeaderLine.SOURCE_ATTRIBUTE,
+                            "filterName=[ANNOTATION] filterExpression=[ANNOTATION == \"NA\" || ANNOTATION <= 2.0]");
+                }}
+        );
+
+        final String encodedAttributes = unescapedHeaderLine.toStringEncoding();
+        Assert.assertNotNull(encodedAttributes);
+
+        final String expectedEncoding = "key=<ID=id,Description=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\",Source=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\">";
+        Assert.assertEquals(encodedAttributes, expectedEncoding);
+    }
+
+    @Test
+    public void testEscapedQuotedStringEncoding() {
+        // test Source and Version attributes
+        final VCFSimpleHeaderLine unescapedHeaderLine =  new VCFSimpleHeaderLine(
+                "key",
+                new LinkedHashMap<String, String>() {{
+                    put("ID", "id");
+                    put(VCFSimpleHeaderLine.DESCRIPTION_ATTRIBUTE,
+                            "filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]");
+                    put(VCFSimpleHeaderLine.SOURCE_ATTRIBUTE,
+                            "filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]");
+                }}
+        );
+
+        final String encodedAttributes = unescapedHeaderLine.toStringEncoding();
+        Assert.assertNotNull(encodedAttributes);
+
+        final String expectedEncoding = "key=<ID=id,Description=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\",Source=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\">";
+        Assert.assertEquals(encodedAttributes, expectedEncoding);
+    }
+
+}
diff --git a/src/test/java/htsjdk/variant/vcf/VCFStandardHeaderLinesUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFStandardHeaderLinesUnitTest.java
index c9efaa59ef..45009ce211 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFStandardHeaderLinesUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFStandardHeaderLinesUnitTest.java
@@ -31,8 +31,9 @@
 import org.testng.annotations.Test;
 
 import java.util.ArrayList;
-import java.util.Collections;
+import java.util.LinkedHashSet;
 import java.util.List;
+import java.util.Set;
 
 /**
  * Created by IntelliJ IDEA.
@@ -188,7 +189,11 @@ public Object[][] makeRepairHeaderTest() {
 
     @Test(dataProvider = "RepairHeaderTest")
     public void testRepairHeaderTest(final RepairHeaderTest cfg) {
-        final VCFHeader toRepair = new VCFHeader(Collections.singleton((VCFHeaderLine)cfg.original));
+        final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>();
+        headerLines.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
+        headerLines.add(cfg.original);
+
+        final VCFHeader toRepair = new VCFHeader(headerLines);
         final VCFHeader repaired = VCFStandardHeaderLines.repairStandardHeaderLines(toRepair);
 
         VCFCompoundHeaderLine repairedLine = (VCFCompoundHeaderLine)repaired.getFormatHeaderLine(cfg.original.getID());
diff --git a/src/test/java/htsjdk/variant/vcf/VCFUtilsTest.java b/src/test/java/htsjdk/variant/vcf/VCFUtilsTest.java
index ed943feac1..5629798c61 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFUtilsTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFUtilsTest.java
@@ -1,6 +1,7 @@
 package htsjdk.variant.vcf;
 
 import htsjdk.HtsjdkTest;
+import htsjdk.tribble.TribbleException;
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
@@ -11,45 +12,55 @@ public class VCFUtilsTest extends HtsjdkTest {
 
     @DataProvider(name="validHeaderVersionMerger")
     public Object[][] validHeaderMergerVersions() {
-        // v4.3 can only merge with v4.3, all other version mergers are allowed
+
+        // header version must be at least v4.2 to merge, result is always highest version
         return new Object[][] {
-                {Arrays.asList("VCFv4.0", "VCFv4.0")},
-                {Arrays.asList("VCFv4.1", "VCFv4.1")},
-                {Arrays.asList("VCFv4.2", "VCFv4.2")},
-                {Arrays.asList("VCFv4.3", "VCFv4.3")},
-                {Arrays.asList("VCFv4.2", "VCFv4.2")},
-                {Arrays.asList("VCFv4.2", "VCFv4.2", "VCFv4.2")},
+                // headers to merge, expected result version
+                {Arrays.asList("VCFv4.2", "VCFv4.2"), VCFHeaderVersion.VCF4_2},
+                {Arrays.asList("VCFv4.3", "VCFv4.3"), VCFHeaderVersion.VCF4_3},
+                {Arrays.asList("VCFv4.2", "VCFv4.3"), VCFHeaderVersion.VCF4_3},
+                {Arrays.asList("VCFv4.3", "VCFv4.2"), VCFHeaderVersion.VCF4_3},
+                {Arrays.asList("VCFv4.2", "VCFv4.2"), VCFHeaderVersion.VCF4_2 },
+                {Arrays.asList("VCFv4.2", "VCFv4.2", "VCFv4.3"), VCFHeaderVersion.VCF4_3},
+                {Arrays.asList("VCFv4.3", "VCFv4.3", "VCFv4.2"), VCFHeaderVersion.VCF4_3},
+                {Arrays.asList("VCFv4.3", "VCFv4.2", "VCFv4.3"), VCFHeaderVersion.VCF4_3},
         };
     }
 
     @DataProvider(name="invalidHeaderVersionMerger")
     public Object[][] invalidHeaderVersionMerger() {
-        // v4.3 can only merge with v4.3, all other version mergers are allowed
+        // header version must be at least v4.2 to merge
         return new Object[][] {
-                {Arrays.asList("VCFv4.0", "VCFv4.3")},
-                {Arrays.asList("VCFv4.1", "VCFv4.3")},
-                {Arrays.asList("VCFv4.2", "VCFv4.3")},
-                {Arrays.asList("VCFv4.0", "VCFv4.0", "VCFv4.2", "VCFv4.3")},
-                {Arrays.asList("VCFv4.3", "VCFv4.0", "VCFv4.1", "VCFv4.2")},
+                {Arrays.asList("VCFv4.0", "VCFv4.2")},
+                {Arrays.asList("VCFv4.1", "VCFv4.2")},
+                {Arrays.asList("VCFv4.0", "VCFv4.1", "VCFv4.2", "VCFv4.3")},
+                {Arrays.asList("VCFv4.3", "VCFv4.2", "VCFv4.1", "VCFv4.0")},
         };
     }
 
     @Test(dataProvider="validHeaderVersionMerger")
-    public void testValidHeaderVersionMerger(final List<String> headerVersions) {
-        final List<VCFHeader> headersToMerge = new ArrayList<>(headerVersions.size());
-        headerVersions.forEach(hv -> headersToMerge.add(
-                new VCFHeader(VCFHeaderVersion.toHeaderVersion(hv), Collections.emptySet(), Collections.emptySet()))
-        );
-        final Set<VCFHeaderLine> resultHeaders = VCFUtils.smartMergeHeaders(headersToMerge, true);
+    public void testValidHeaderVersionMerger(final List<String> headerVersions, final VCFHeaderVersion expectedVersion) {
+        final Set<VCFHeaderLine> mergedHeaderLines = doHeaderMerge(headerVersions);
+
+        final VCFMetaDataLines metaDataLines = new VCFMetaDataLines();
+        metaDataLines.addMetaDataLines(mergedHeaderLines);
+        final VCFHeaderLine versionLine = metaDataLines.getFileFormatLine();
+        Assert.assertEquals(VCFHeaderVersion.toHeaderVersion(versionLine.getValue()), expectedVersion);
     }
 
-    @Test(dataProvider="invalidHeaderVersionMerger", expectedExceptions = IllegalArgumentException.class)
+    @Test(dataProvider="invalidHeaderVersionMerger", expectedExceptions = TribbleException.class)
     public void testInvalidHeaderVersionMerger(final List<String> headerVersions) {
+        doHeaderMerge(headerVersions);
+    }
+
+    private Set<VCFHeaderLine> doHeaderMerge(final List<String> headerVersions) {
         final List<VCFHeader> headersToMerge = new ArrayList<>(headerVersions.size());
         headerVersions.forEach(hv -> headersToMerge.add(
-                new VCFHeader(VCFHeaderVersion.toHeaderVersion(hv), Collections.emptySet(), Collections.emptySet()))
+                new VCFHeader(
+                        VCFHeader.makeHeaderVersionLineSet(VCFHeaderVersion.toHeaderVersion(hv)),
+                        Collections.emptySet()))
         );
-        VCFUtils.smartMergeHeaders(headersToMerge, true);
+        return VCFUtils.smartMergeHeaders(headersToMerge, true);
     }
 
     @DataProvider(name = "caseIntolerantDoubles")
diff --git a/src/test/resources/htsjdk/variant/HiSeq.10000.vcf b/src/test/resources/htsjdk/variant/HiSeq.10000.vcf
index a304ba24da..75c9f9b537 100644
--- a/src/test/resources/htsjdk/variant/HiSeq.10000.vcf
+++ b/src/test/resources/htsjdk/variant/HiSeq.10000.vcf
@@ -9,7 +9,6 @@
 ##FILTER=<ID=HARD_TO_VALIDATE,Description="MQ0 = 4 && ((MQ0 / (1.0 * DP))  0.1)">
 ##FILTER=<ID=Indel,Description="Overlaps a user-input mask">
 ##FILTER=<ID=LowQual,Description="Low quality">
-##FILTER=<ID=LowQual,Description="QUAL  50.0">
 ##FILTER=<ID=SnpCluster,Description="SNPs found in clusters">
 ##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
 ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth (only filtered reads used for calling)">
diff --git a/src/test/resources/htsjdk/variant/VCF4HeaderTest.vcf b/src/test/resources/htsjdk/variant/VCF4HeaderTest.vcf
index 9af0cb3e64..097d0b034f 100644
--- a/src/test/resources/htsjdk/variant/VCF4HeaderTest.vcf
+++ b/src/test/resources/htsjdk/variant/VCF4HeaderTest.vcf
@@ -9,7 +9,6 @@
 ##FILTER=<ID=HARD_TO_VALIDATE,Description="MQ0 = 4 && ((MQ0 / (1.0 * DP))  0.1)">
 ##FILTER=<ID=Indel,Description="Overlaps a user-input mask">
 ##FILTER=<ID=LowQual,Description="Low quality">
-##FILTER=<ID=LowQual,Description="QUAL  50.0">
 ##FILTER=<ID=ANNOTATION,Description="ANNOTATION != \"NA\" || ANNOTATION <= 0.01">
 ##FILTER=<ID=ANNOTATION2,Description="ANNOTATION with quote \" that is unmatched but escaped">
 ##FILTER=<ID=SnpCluster,Description="SNPs found in clusters">

From 3d08ef8aeeac36efe8d1efa11f144b5b27ad9844 Mon Sep 17 00:00:00 2001
From: Chris Norman <cnorman@broadinstitute.org>
Date: Mon, 15 Nov 2021 08:42:06 -0500
Subject: [PATCH 03/22] Eliminate redundant modeling of VCFHeaderVersion in
 VCFHeader.

---
 .../java/htsjdk/variant/vcf/VCFHeader.java    | 42 ++++++++-----------
 .../htsjdk/variant/vcf/VCFMetaDataLines.java  |  5 ++-
 .../htsjdk/variant/vcf/VCFHeaderUnitTest.java | 17 ++++----
 3 files changed, 29 insertions(+), 35 deletions(-)

diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeader.java b/src/main/java/htsjdk/variant/vcf/VCFHeader.java
index 637c04c4fc..1dcb5e07f9 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFHeader.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFHeader.java
@@ -65,9 +65,6 @@ public enum HEADER_FIELDS {
         CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO
     }
 
-    // the VCF version for this header
-    private VCFHeaderVersion vcfHeaderVersion;
-
     // header meta data
     private final VCFMetaDataLines mMetaData = new VCFMetaDataLines();
 
@@ -163,7 +160,7 @@ public VCFHeader(final Set<VCFHeaderLine> metaData, final List<String> genotypeS
         // lines are presented in the set, a warning will be issued, only the last one will be retained,
         // and the header version will be established using the last version line encountered
         mMetaData.addMetaDataLines(metaData);
-        vcfHeaderVersion = initializeHeaderVersion();
+        final VCFHeaderVersion vcfHeaderVersion = initializeHeaderVersion();
         mMetaData.validateMetaDataLines(vcfHeaderVersion);
 
         checkForDeprecatedGenotypeLikelihoodsKey();
@@ -180,7 +177,7 @@ public VCFHeader(final Set<VCFHeaderLine> metaData, final List<String> genotypeS
     * @return the VCFHeaderVersion for this header. will not be null
     */
     public VCFHeaderVersion getVCFHeaderVersion() {
-        return vcfHeaderVersion;
+        return mMetaData.getVCFVersion();
     }
 
     /**
@@ -191,16 +188,12 @@ public VCFHeaderVersion getVCFHeaderVersion() {
      * @param headerLine header line to attempt to add
      */
     public void addMetaDataLine(final VCFHeaderLine headerLine) {
-        // propagate the new line to the metadata lines object
+        // propagate the new line to the metadata lines object, and if the version changed, validate
+        // the lines against the new version
+        final VCFHeaderVersion oldHeaderVersion = mMetaData.getVCFVersion();
         mMetaData.addMetaDataLine(headerLine);
-
-        // update the current version in case this line triggered a version change
         final VCFHeaderVersion newHeaderVersion = mMetaData.getVCFVersion();
-        if (!newHeaderVersion.equals(vcfHeaderVersion)) {
-            validateVersionTransition(vcfHeaderVersion, newHeaderVersion);
-        }
-        vcfHeaderVersion = newHeaderVersion;
-        headerLine.validateForVersion(vcfHeaderVersion);
+        validateVersionTransition(headerLine, oldHeaderVersion, newHeaderVersion);
 
         checkForDeprecatedGenotypeLikelihoodsKey();
     }
@@ -574,7 +567,6 @@ public boolean equals(final Object o) {
         if (samplesWereAlreadySorted != vcfHeader.samplesWereAlreadySorted) return false;
         if (writeEngineHeaders != vcfHeader.writeEngineHeaders) return false;
         if (writeCommandLine != vcfHeader.writeCommandLine) return false;
-        if (vcfHeaderVersion != vcfHeader.vcfHeaderVersion) return false;
         if (!mMetaData.equals(vcfHeader.mMetaData)) return false;
         if (mGenotypeSampleNames != null ? !mGenotypeSampleNames.equals(vcfHeader.mGenotypeSampleNames) :
                 vcfHeader.mGenotypeSampleNames != null)
@@ -588,8 +580,7 @@ public boolean equals(final Object o) {
 
     @Override
     public int hashCode() {
-        int result = vcfHeaderVersion.hashCode();
-        result = 31 * result + mMetaData.hashCode();
+        int result = mMetaData.hashCode();
         result = 31 * result + (mGenotypeSampleNames != null ? mGenotypeSampleNames.hashCode() : 0);
         result = 31 * result + (samplesWereAlreadySorted ? 1 : 0);
         result = 31 * result + (sampleNamesInOrder != null ? sampleNamesInOrder.hashCode() : 0);
@@ -614,26 +605,29 @@ private VCFHeaderVersion initializeHeaderVersion() {
     }
 
     private void validateVersionTransition(
-            final VCFHeaderVersion previousVersion,
+            final VCFHeaderLine newHeaderLine,
+            final VCFHeaderVersion currentVersion,
             final VCFHeaderVersion newVersion) {
-        final int compareTo = newVersion.compareTo(previousVersion);
+        final int compareTo = newVersion.compareTo(currentVersion);
+
+        // We only allow going forward to a newer version, not backwards to an older one, since there
+        // is really no way to validate old header lines (pre vcfV4.2). If the version moved forward,
+        // revalidate all the lines, otherwise only validate the new header line.
         if (compareTo < 0) {
-            // We only allow going forward to a newer version, not backwards to an older one, since there
-            // is really no way to validate old header lines (pre vcfV4.2). The only way to create a header with
-            // an old version is to create it that way from the start.
-            // to be created with the old version from the start.
             throw new TribbleException(String.format(
                     "When changing a header version, the new header version %s must be > the previous version %s",
                     newVersion,
-                    previousVersion));
+                    currentVersion));
         } else if (compareTo > 0) {
             logger.debug(() -> String.format("Updating VCFHeader version from %s to %s",
-                    previousVersion.getVersionString(),
+                    currentVersion.getVersionString(),
                     newVersion.getVersionString()));
 
             // the version moved forward, so validate ALL of the existing lines in the list to ensure
             // that the transition is valid
             mMetaData.validateMetaDataLines(newVersion);
+        } else {
+            newHeaderLine.validateForVersion(newVersion);
         }
     }
 
diff --git a/src/main/java/htsjdk/variant/vcf/VCFMetaDataLines.java b/src/main/java/htsjdk/variant/vcf/VCFMetaDataLines.java
index 843fdf98cc..5f68a61113 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFMetaDataLines.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFMetaDataLines.java
@@ -144,10 +144,11 @@ public VCFHeaderLine findEquivalentHeaderLine(final VCFHeaderLine queryLine) {
     }
 
     /**
-     * Validate all metadata lines except the file format line against a target version.
+     * Validate all metadata lines, excluding the file format line against a target version.
      * Throws {@link TribbleException.VersionValidationFailure} if any line is incompatible with the given version.
      * @param targetVersion the target version to validate against
-     * @throws TribbleException if any existing line fails to validate against {@code targetVersion}
+     * @throws {@link TribbleException.VersionValidationFailure} if any existing line fails to validate against
+     * {@code targetVersion}
      */
     //TODO: we need to tell users how to resolve the case where this fails due to version validation
     //i.e, use a custom upgrade tool
diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
index 8ee9ccab26..b604b91899 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
@@ -515,15 +515,6 @@ public void testVersionUpgradeWithValidationFailure() {
         vcfHeader.addMetaDataLine(VCFHeader.makeHeaderVersionLine(VCFHeaderVersion.VCF4_3));
     }
 
-    @Test(expectedExceptions = TribbleException.class)
-    public void testAddLineWithValidationFailure() {
-        // create a 4.3 header, and then try to add an old-style pedigree line (one that has no ID)
-        // which should cause a failure
-        final VCFHeader vcfHeader = new VCFHeader(VCFHeader.makeHeaderVersionLineSet(VCFHeaderVersion.VCF4_3));
-        vcfHeader.addMetaDataLine(new VCFHeaderLine(VCFConstants.PEDIGREE_HEADER_KEY, "<Name_0=G0-ID,Name_1=G1-ID>"));
-    }
-
-
     @Test(expectedExceptions = TribbleException.class)
     public void testConstructorRequiresFileFormatLine() {
         final Set<VCFHeaderLine> metaDataSet = VCFHeaderUnitTestData.getV42HeaderLinesWITHOUTFormatString(); // 4.2 header is compatible with all 4.x versions
@@ -579,6 +570,14 @@ public void testAddMetaDataLineInvalidForVersion() {
         header.addMetaDataLine(new VCFPedigreeHeaderLine(attributes));
     }
 
+    @Test(expectedExceptions = TribbleException.class)
+    public void testAddMetaDataLineWithValidationFailure() {
+        // create a 4.3 header, and then try to add an old-style pedigree line (one that has no ID)
+        // which should cause a failure
+        final VCFHeader vcfHeader = new VCFHeader(VCFHeader.makeHeaderVersionLineSet(VCFHeaderVersion.VCF4_3));
+        vcfHeader.addMetaDataLine(new VCFHeaderLine(VCFConstants.PEDIGREE_HEADER_KEY, "<Name_0=G0-ID,Name_1=G1-ID>"));
+    }
+
     @Test(expectedExceptions = TribbleException.class)
     public void testAddMetaDataLineFileFormat() {
         final Set<VCFHeaderLine> metaDataSet = VCFHeaderUnitTestData.getV42HeaderLinesWITHOUTFormatString(); // this (4.2) header is compatible with all 4.x versions

From ca31a2b017e069ecbe3ed7b02912a0fbd8eeb8ac Mon Sep 17 00:00:00 2001
From: Chris Norman <cnorman@broadinstitute.org>
Date: Mon, 15 Nov 2021 11:16:20 -0500
Subject: [PATCH 04/22] Eliminate redundant modeling of file format lines in
 VCFMetaDataLines.

---
 .../htsjdk/variant/vcf/VCFMetaDataLines.java  | 131 ++++++++++--------
 .../variant/vcf/VCFHeaderMergerUnitTest.java  |   3 +-
 .../htsjdk/variant/vcf/VCFHeaderUnitTest.java |  20 +++
 .../variant/vcf/VCFHeaderUnitTestData.java    |   2 +-
 .../variant/vcf/VCFMetaDataLinesUnitTest.java |  26 +++-
 5 files changed, 113 insertions(+), 69 deletions(-)

diff --git a/src/main/java/htsjdk/variant/vcf/VCFMetaDataLines.java b/src/main/java/htsjdk/variant/vcf/VCFMetaDataLines.java
index 5f68a61113..97f208e7b4 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFMetaDataLines.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFMetaDataLines.java
@@ -13,16 +13,16 @@
  * Class for managing the set of VCFHeaderLines maintained by a VCFHeader.
  *
  * Since this class is used to incrementally build up a set of header lines for use with a VCFHeader,
- * it does not require that the list always contain a fileformat line (its VCFHeader's job to enforce
+ * it does not require that the list always contain a file format line (its VCFHeader's job to enforce
  * that condition).
  *
  * This class maintains several invariants:
  *
- *  - The list keeps track of the "current version" by tracking whether a version line (a line that
- *    establishes the VCFHeaderVersion, such as format/fileformat line) is contained in the list. If
- *    no version line has been added, the list will have a null current version, and contain 0 version
- *    lines. If a version line has been added, it will have a non-null version, and contain 1 version line.
- *    If the version line is manually removed, the "current version" is reset to null.
+ *  - The "current version" of the lines is tracked by recording whether a version line (a line that
+ *    establishes the VCFHeaderVersion, such as format/fileformat line) has been added to the list. If
+ *    no version line has been added, the list will have a null current version; if a version line has
+ *    been added, it will have a non-null version. If the version line is manually removed, the "current
+ *    version" is reset to null.
  *
  *  - Each contig line that is retained is guaranteed to have a unique contig index. This does
  *    NOT guarantee that the contig indices are contiguous, or ordered, only that they are unique.
@@ -34,7 +34,7 @@
  *    getInfoHeaderLines(), but would still be serialized on write.)
  *
  *    This class does NOT validate that the lines contained are valid for the current version (that is
- *    the caller's responsibilty).
+ *    the caller's responsibility).
  */
 //Visible to allow disq Kryo registration for serialization
 @InternalAPI
@@ -53,9 +53,10 @@ final class VCFMetaDataLines implements Serializable {
     private VCFHeaderVersion vcfVersion;
 
     /**
-     * Add all metadata lines from Set. If a duplicate line is encountered (duplicate content for
-     * unstructured lines with identical keys, or duplicate key/ID pair for structured lines), only
-     * the new line will be retained.
+     * Add all metadata lines from Set. If an equivalent line already exists (any existing file format
+     * line if the new line is an unstructured file format line; any existing identical line if the new
+     * line is an unstructured non-file format line; or any existing line with a duplicate key/ID pair
+     * if the new line is a structured line), only the new line will be retained.
      *
      * @param newMetaData Set of lines to be added to the list.
      * @throws IllegalArgumentException if a version is established or if any line fails validation for that version
@@ -65,9 +66,10 @@ public void addMetaDataLines(final Set<VCFHeaderLine> newMetaData) {
     }
 
     /**
-     * Add a metadata line to the list. If a duplicate line is encountered (duplicate content for
-     * unstructured lines with identical keys, or duplicate key/ID pair for structured lines), only
-     * the newest line will be retained.
+     * Add a metadata line to the list. If an equivalent line already exists (any existing file format
+     * line if the new line is an unstructured file format line; any existing identical line if the new
+     * line is an unstructured non-file format line; or any existing line with a duplicate key/ID pair
+     * if the new line is a structured line), only the new line will be retained.
      *
      * @param newMetaDataLine header line to attempt to add
      * @returns an existing (equivalent) header line that was replaced by newMetaDataLine, if any,
@@ -94,25 +96,32 @@ public VCFHeaderLine addMetaDataLine(final VCFHeaderLine newMetaDataLine) {
     }
 
     /**
-     * Remove a metadata line from the list. This is the inverse of addMetaDataLine - it removes a
-     * line that has an identical key and value as lineToRemove if lineToRemove is an unstructured (non-ID)
-     * but if lineToRemove is a structured line, it will remove the line that has the same key/ID pair as
-     * lineToRemove, regardless of other content.
+     * Remove an equivalent metadata line from the list. This is the inverse of addMetaDataLine, and removes
+     * any equivalent line that already exists (any existing file format line if the line to be removed is
+     * an unstructured file format line; any existing identical line if the line to be removed is an unstructured
+     * non-file format line, or any existing line with a duplicate key/ID pair if the line to be removed is a
+     * structured line).
      *
      * The removed value is returned, and can be used by the caller to determine if the removed line has a
      * different value than the line presented.
      *
      * @param lineToRemove the header line to remove
-     * @return The actual headerline removed, or null of no equivalent headerline was found to remove
+     * @return The actual header line removed, or null of no equivalent header line was found to remove
      */
     public VCFHeaderLine removeMetaDataLine(final VCFHeaderLine lineToRemove) {
-        final VCFHeaderLine removedLine = mMetaData.remove(makeKeyForLine(lineToRemove));
-        if (removedLine != null) {
-            // only synchronize the dependent version and contig map variables if a line was ACTUALLY removed
-            if (VCFHeaderVersion.isFormatString(removedLine.getKey())) {
+        VCFHeaderLine removedLine = null;
+        if (VCFHeaderVersion.isFormatString(lineToRemove.getKey()) && vcfVersion != null) {
+            final VCFHeaderVersion versionToRemove = VCFHeaderVersion.toHeaderVersion(lineToRemove.getValue());
+            if (versionToRemove.equals(vcfVersion)) {
+                // simulate "removal" of the line by recreating the line that we're dropping as the return value
+                removedLine = VCFHeader.makeHeaderVersionLine(versionToRemove);
                 vcfVersion = null;
-            } else if (lineToRemove.isIDHeaderLine() && lineToRemove.getKey().equals(VCFHeader.CONTIG_KEY)) {
-                removeFromContigIndexMap((VCFContigHeaderLine) lineToRemove);
+            }
+        } else {
+            removedLine = mMetaData.remove(makeKeyForLine(lineToRemove));
+            // only synchronize the dependent contig map variables if a line was ACTUALLY removed
+            if (removedLine != null && lineToRemove.isIDHeaderLine() && lineToRemove.getKey().equals(VCFHeader.CONTIG_KEY)) {
+                removeFromContigIndexMap((VCFContigHeaderLine) removedLine);
             }
         }
         return removedLine;
@@ -128,19 +137,29 @@ public VCFHeaderVersion getVCFVersion() {
 
     /**
      * Return the existing line from the list that is "equivalent" to the query line, where
-     * equivalent is defined as having the same key and value for unstructured header lines, or the
-     * same key and ID, but not necessarily the same value (for structured header lines). The
-     * "equivalent" line returned by this method is not guaranteed to be equal to the queryLine,
-     * in the case where the queryLine is an ID line.
+     * equivalent is defined as having the same key and value for unstructured header lines,
+     * or the same key and ID, but not necessarily the same value, for structured header lines.
+     * The "equivalent" line returned by this method is not guaranteed to be equal to the
+     * queryLine, in the case where the queryLine is an ID line.
      *
-     * The method is a way to ask "if the queryLine were added to this object via addMetaDataLine, what
-     * line, if any, would it replace".
+     * The method is a way to ask "if the queryLine were added to this object via addMetaDataLine,
+     * what line, if any, would it replace".
+     *
+     * Note that for file format (VCF version) lines, this returns an existing file format line
+     * if there is one, even if the key is different than the query line (since that behavior
+     * mirrors the behavior of addMetaDataLine and removeMetaDataLine).
      *
      * @param queryLine the source line to use to check for equivalents
      * @return The existing header line of the type/key provided, otherwise NULL.
      */
     public VCFHeaderLine findEquivalentHeaderLine(final VCFHeaderLine queryLine) {
-        return mMetaData.get(makeKeyForLine(queryLine));
+        if (VCFHeaderVersion.isFormatString(queryLine.getKey())) {
+            return vcfVersion == null ?
+                    null :
+                    VCFHeader.makeHeaderVersionLine(vcfVersion);
+        } else {
+            return mMetaData.get(makeKeyForLine(queryLine));
+        }
     }
 
     /**
@@ -183,7 +202,7 @@ public Collection<VCFValidationFailure> getValidationErrors(final VCFHeaderVersi
      * @return a set of the meta data
      */
     public Set<VCFHeaderLine> getMetaDataInInputOrder() {
-        return Collections.unmodifiableSet(new LinkedHashSet<>(mMetaData.values()));
+        return makeMetaDataLineSet(mMetaData.values());
     }
 
     /**
@@ -197,7 +216,7 @@ public Set<VCFHeaderLine> getMetaDataInSortedOrder() {
         // `contains` implementation based on comparator equality that can lead to inconsistent
         // results for header line types like VCFContigHeaderLine that have a compareTo
         // implementation that is inconsistent with equals.
-        return Collections.unmodifiableSet(new LinkedHashSet<>(new TreeSet<>(mMetaData.values())));
+        return makeMetaDataLineSet(new TreeSet<>(mMetaData.values()));
     }
 
     /**
@@ -286,7 +305,7 @@ public VCFFilterHeaderLine getFilterHeaderLine(final String id) {
      * VCFHeaderLine that is not a contig, info, format or filter header line.
      */
     public Collection<VCFHeaderLine> getOtherHeaderLines() {
-        return mMetaData.values().stream().filter(
+        return getMetaDataInInputOrder().stream().filter(
             hl ->
                 !hl.getKey().equals(VCFConstants.CONTIG_HEADER_KEY) &&
                 !hl.getKey().equals(VCFConstants.INFO_HEADER_KEY) &&
@@ -297,31 +316,11 @@ public Collection<VCFHeaderLine> getOtherHeaderLines() {
     }
 
     /**
-     * The version/fileformat header line if one exists, otherwise null.
-     * @return The version/fileformat header line if one exists, otherwise null.
+     * A version/fileformat header line representing the version for these lines, otherwise null.
+     * @return The version file format header line if a version has been established, otherwise null.
      */
     public VCFHeaderLine getFileFormatLine() {
-        // find any existing version line(s). since there are multiple possible keys that
-        // represent version lines (old V3 specs used "format" instead of "fileformat")
-        final List<VCFHeaderLine> existingVersionLines = mMetaData.values()
-                .stream()
-                .filter(line -> VCFHeaderVersion.isFormatString(line.getKey()))
-                .collect(Collectors.toList());
-
-        // This class doesn't mandate that the list it maintains always contains a fileformat line
-        // (its VCFHeader's job to maintain that condition for the header).
-        if (!existingVersionLines.isEmpty()) {
-            if (existingVersionLines.size() > 1) {
-                throw new IllegalStateException(
-                        String.format("The metadata lines class contains more than one version line (%s)",
-                                existingVersionLines.stream()
-                                        .map(VCFHeaderLine::toString)
-                                        .collect(Collectors.joining(","))));
-            }
-            return existingVersionLines.get(0);
-        } else {
-            return null;
-        }
+        return vcfVersion == null ? null : VCFHeader.makeHeaderVersionLine(vcfVersion);
     }
 
     @Override
@@ -469,7 +468,6 @@ private final VCFHeaderLine updateVersion(final VCFHeaderLine newMetaDataLine) {
                 VCFHeaderVersion.isFormatString(newMetaDataLine.getKey()),
                 "a file format line is required");
 
-        final VCFHeaderLine currentVersionLine = getFileFormatLine();
         final VCFHeaderVersion newVCFVersion = VCFHeaderVersion.toHeaderVersion(newMetaDataLine.getValue());
 
         if (vcfVersion == null) {
@@ -480,12 +478,23 @@ private final VCFHeaderLine updateVersion(final VCFHeaderLine newMetaDataLine) {
                     vcfVersion +
                     " to " +
                     newVCFVersion);
-            removeFromMapOrThrow(currentVersionLine);
         }
 
-        mMetaData.put(makeKeyForLine(newMetaDataLine), newMetaDataLine);
+        final VCFHeaderLine oldVersionLine = getFileFormatLine();
         vcfVersion = newVCFVersion;
-        return currentVersionLine;
+        return oldVersionLine;
+    }
+
+    // make a new metadata line set to hand out to callers that includes
+    private Set<VCFHeaderLine> makeMetaDataLineSet(final Collection<VCFHeaderLine> orderedLines) {
+        if (vcfVersion != null) {
+            final Set<VCFHeaderLine> orderedSet = new LinkedHashSet<>(orderedLines.size() + 1);
+            orderedSet.add(VCFHeader.makeHeaderVersionLine(vcfVersion));
+            orderedSet.addAll(orderedLines);
+            return Collections.unmodifiableSet(orderedSet);
+        } else {
+            return Collections.unmodifiableSet(new LinkedHashSet<>(orderedLines));
+        }
     }
 
     // composite keys used by the metadata lines map
diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderMergerUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderMergerUnitTest.java
index 1be8bdf085..818aae84a0 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFHeaderMergerUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderMergerUnitTest.java
@@ -92,8 +92,7 @@ public void testMergeValidVersions(final List<VCFHeaderVersion> headerVersions,
         final Set<VCFHeaderLine> mergedHeaderLines = doHeaderMergeForVersions(headerVersions);
         final VCFMetaDataLines metaDataLines = new VCFMetaDataLines();
         metaDataLines.addMetaDataLines(mergedHeaderLines);
-        final VCFHeaderLine versionLine = metaDataLines.getFileFormatLine();
-        Assert.assertEquals(VCFHeaderVersion.toHeaderVersion(versionLine.getValue()), expectedVersion);
+        Assert.assertEquals(metaDataLines.getVCFVersion(), expectedVersion);
 
         // now create a new header using the merged VersionLines, and make sure *it* has the expected version
         final VCFHeader mergedHeader = new VCFHeader(mergedHeaderLines);
diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
index b604b91899..9f51901f91 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
@@ -600,6 +600,26 @@ public void testAddMetaDataLineFileFormat() {
         vcfHeader.addMetaDataLine(VCFHeader.makeHeaderVersionLine(VCFHeaderVersion.VCF4_1));
     }
 
+    @Test
+    public void testFileFormatLineFirstInSet() {
+        final Set<VCFHeaderLine> orderedLineSet = new LinkedHashSet<>();
+        orderedLineSet.addAll(VCFHeaderUnitTestData.getV42HeaderLinesWITHOUTFormatString());
+        orderedLineSet.stream().forEach(l -> Assert.assertFalse(VCFHeaderVersion.isFormatString(l.getKey())));
+        // add the file format line last
+        orderedLineSet.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
+        final VCFHeader vcfHeader = new VCFHeader(orderedLineSet, Collections.EMPTY_SET);
+
+        final Collection<VCFHeaderLine> inputOrderLines = vcfHeader.getMetaDataInInputOrder();
+        final Optional<VCFHeaderLine> optFirstInputOrderLine = inputOrderLines.stream().findFirst();
+        Assert.assertTrue(optFirstInputOrderLine.isPresent());
+        Assert.assertTrue(VCFHeaderVersion.isFormatString(optFirstInputOrderLine.get().getKey()));
+
+        final Collection<VCFHeaderLine> sortedOrderLines = vcfHeader.getMetaDataInSortedOrder();
+        final Optional<VCFHeaderLine> optFirstSortedOrderLine = sortedOrderLines.stream().findFirst();
+        Assert.assertTrue(optFirstSortedOrderLine.isPresent());
+        Assert.assertTrue(VCFHeaderVersion.isFormatString(optFirstSortedOrderLine.get().getKey()));
+    }
+
     @Test
     public void testPreserveSequenceDictionaryAttributes() {
         // Round trip a SAMSequenceDictionary with attributes, through a VCFHeader, and back
diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTestData.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTestData.java
index 7b57a19b5a..286fcecfa6 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTestData.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTestData.java
@@ -147,7 +147,7 @@ public static Set<VCFHeaderLine> getV42HeaderLinesWITHOUTFormatString() {
         metaDataLines.addMetaDataLines(metaDataSet);
         final VCFHeaderLine versionLine = metaDataLines.getFileFormatLine();
         Assert.assertEquals(
-                VCFHeaderVersion.toHeaderVersion(versionLine.getValue()),
+                metaDataLines.getVCFVersion(),
                 VCFHeaderVersion.VCF4_2);
 
         // remove the 4.2 version line from the original set, verify, and return the set with no fileformat string
diff --git a/src/test/java/htsjdk/variant/vcf/VCFMetaDataLinesUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFMetaDataLinesUnitTest.java
index 2e41536abe..f79331a7eb 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFMetaDataLinesUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFMetaDataLinesUnitTest.java
@@ -172,11 +172,6 @@ public void testAddRemoveDuplicateStructuredLine() {
         Assert.assertEquals(md.getFilterLines().size(), beforeFilterSize);
     }
 
-//    @Test
-//    public void testAddRemoveContigLine() {
-//        final VCFHeaderUnitTestData unitTestData = new VCFHeaderUnitTestData();
-//    }
-
     @Test
     public void testHasEquivalentHeaderLinePositive() {
         final VCFHeaderUnitTestData unitTestData = new VCFHeaderUnitTestData();
@@ -350,5 +345,26 @@ public void testRemoveAndReplaceContigLines() {
         Assert.assertEquals(sortedLines3.get(2), vcfContigLine2);
     }
 
+    @Test
+    public void testFileFormatLineFirstInSet() {
+        final Set<VCFHeaderLine> orderedLineSet = new LinkedHashSet<>();
+        orderedLineSet.addAll(VCFHeaderUnitTestData.getV42HeaderLinesWITHOUTFormatString());
+        orderedLineSet.stream().forEach(l -> Assert.assertFalse(VCFHeaderVersion.isFormatString(l.getKey())));
+        // add the file format line last
+        orderedLineSet.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
+        final VCFMetaDataLines metaDataLines = new VCFMetaDataLines();
+        metaDataLines.addMetaDataLines(orderedLineSet);
+
+        final Collection<VCFHeaderLine> inputOrderLines = metaDataLines.getMetaDataInInputOrder();
+        final Optional<VCFHeaderLine> optFirstInputOrderLine = inputOrderLines.stream().findFirst();
+        Assert.assertTrue(optFirstInputOrderLine.isPresent());
+        Assert.assertTrue(VCFHeaderVersion.isFormatString(optFirstInputOrderLine.get().getKey()));
+
+        final Collection<VCFHeaderLine> sortedOrderLines = metaDataLines.getMetaDataInInputOrder();
+        final Optional<VCFHeaderLine> optFirstSortedOrderLine = sortedOrderLines.stream().findFirst();
+        Assert.assertTrue(optFirstSortedOrderLine.isPresent());
+        Assert.assertTrue(VCFHeaderVersion.isFormatString(optFirstSortedOrderLine.get().getKey()));
+    }
+
 }
 

From d23389121ee87245c38c2737599dc3a4b37b8e0d Mon Sep 17 00:00:00 2001
From: Chris Norman <cnorman@broadinstitute.org>
Date: Mon, 15 Nov 2021 15:16:54 -0500
Subject: [PATCH 05/22] More code review comments.

---
 src/main/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java | 4 ++--
 src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/main/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java
index 60eb4fc90f..7f0f255883 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java
@@ -323,11 +323,11 @@ private int decodeCount(final String countString, final VCFHeaderLineCount reque
                     // This check is here on behalf of INFO lines (which are the only header line type allowed to have Flag
                     // type). A Flag type with a count value other than 0 violates the spec (at least v4.2 and v4.3), but
                     // to retain backward compatibility with previous implementations, we accept (and repair) and the line here.
-                    updateGenericField(NUMBER_ATTRIBUTE, "0");
-                    lineCount = 0;
                     logger.warn(String.format("FLAG fields must have a count value of 0, but saw count %d for header line %s. A value of 0 will be used",
                             lineCount,
                             getID()));
+                    updateGenericField(NUMBER_ATTRIBUTE, "0");
+                    lineCount = 0;
                 }
             } else if (lineCount <= 0) {
                 throw new TribbleException.InvalidHeader(
diff --git a/src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java b/src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java
index 97e7493a6f..9709af8cc6 100644
--- a/src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java
+++ b/src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java
@@ -83,7 +83,7 @@ public Object[][] otherHeaderLines() {
                 { "key=<", new VCFHeaderLine("key", "<") },
                 // taken from Funcotator test file as ##ID=<Description="ClinVar Variation ID">
                 // technically, this is invalid due to the lack of an "ID" attribute, but it should still parse
-                // into a VCFHeaderLine (but noa VCFSimpleHeaderLine
+                // into a VCFHeaderLine (just not a VCFSimpleHeaderLine)
                 { "ID=<Description=\"ClinVar Variation ID\">",
                         new VCFHeaderLine("ID", "<Description=\"ClinVar Variation ID\">") },
 		};

From e9178005150be55c81baa6438a6852407c8ea85c Mon Sep 17 00:00:00 2001
From: Anders Leung <anders.leung@ga4gh.org>
Date: Thu, 15 Apr 2021 13:23:09 -0400
Subject: [PATCH 06/22] Add VCF 4.3 writing

---
 src/main/java/htsjdk/samtools/Defaults.java   |  11 +
 .../variantcontext/GenotypeBuilder.java       |  62 +++++-
 .../variantcontext/VariantContext.java        | 116 +++++++---
 .../variantcontext/VariantContextBuilder.java |  23 +-
 .../variantcontext/writer/BCF2Writer.java     |   3 +-
 .../writer/VCFVersionUpgradePolicy.java       |  40 ++++
 .../variantcontext/writer/VCFWriter.java      |  70 ++----
 .../writer/VariantContextWriterBuilder.java   |  24 +-
 .../htsjdk/variant/vcf/AbstractVCFCodec.java  | 113 ++++++----
 .../variant/vcf/VCFCompoundHeaderLine.java    |  22 +-
 .../java/htsjdk/variant/vcf/VCFEncoder.java   |  94 +++++---
 .../htsjdk/variant/vcf/VCFFileReader.java     |  63 +++++-
 .../java/htsjdk/variant/vcf/VCFHeader.java    |   6 +-
 .../htsjdk/variant/vcf/VCFHeaderLine.java     |  48 ++--
 .../htsjdk/variant/vcf/VCFInfoHeaderLine.java |  13 +-
 .../htsjdk/variant/vcf/VCFMetaDataLines.java  |  25 ++-
 .../vcf/VCFPassThruTextTransformer.java       |  10 +
 .../vcf/VCFPercentEncodedTextTransformer.java | 207 +++++++++++++++---
 .../variant/vcf/VCFSimpleHeaderLine.java      |  62 +++++-
 .../variant/vcf/VCFTextTransformer.java       |   8 +
 .../variant/vcf/VCFVersionUpgrader.java       |  30 +++
 .../java/htsjdk/variant/VariantBaseTest.java  |  18 +-
 .../variant/bcf2/BCF2UtilsUnitTest.java       |  36 +--
 .../variantcontext/GenotypeBuilderTest.java   |  69 ++++++
 .../VariantContextBuilderTest.java            |   5 +-
 .../VariantContextUnitTest.java               |   5 +-
 .../variant/vcf/VCFCodec43FeaturesTest.java   |  89 +++++++-
 .../vcf/VCFCompoundHeaderLineUnitTest.java    |   3 +-
 .../vcf/VCFContigHeaderLineUnitTest.java      |   3 +-
 .../variant/vcf/VCFHeaderLineUnitTest.java    |   5 -
 .../htsjdk/variant/vcf/VCFHeaderUnitTest.java |  98 ++++++---
 .../variant/vcf/VCFHeaderUnitTestData.java    |   2 +
 .../vcf/VCFInfoHeaderLineUnitTest.java        |   3 +-
 .../variant/vcf/VCFMetaDataLinesUnitTest.java |  19 ++
 .../vcf/VCFStandardHeaderLinesUnitTest.java   |   4 +-
 .../variant/vcf/VCFTextTransformerTest.java   |  91 +++++---
 .../variant/diagnosis_targets_testfile.vcf    |   2 +-
 .../vcf43/42AutomaticallyConvertible.vcf      |  90 ++++++++
 .../htsjdk/variant/vcf43/42Pedigree.vcf       |  91 ++++++++
 .../variant/vcf43/invalid43ContigName.vcf     |  90 ++++++++
 .../variant/vcf43/valid43ContigName.vcf       |  90 ++++++++
 41 files changed, 1485 insertions(+), 378 deletions(-)
 create mode 100644 src/main/java/htsjdk/variant/variantcontext/writer/VCFVersionUpgradePolicy.java
 create mode 100644 src/main/java/htsjdk/variant/vcf/VCFVersionUpgrader.java
 create mode 100644 src/test/resources/htsjdk/variant/vcf43/42AutomaticallyConvertible.vcf
 create mode 100644 src/test/resources/htsjdk/variant/vcf43/42Pedigree.vcf
 create mode 100644 src/test/resources/htsjdk/variant/vcf43/invalid43ContigName.vcf
 create mode 100644 src/test/resources/htsjdk/variant/vcf43/valid43ContigName.vcf

diff --git a/src/main/java/htsjdk/samtools/Defaults.java b/src/main/java/htsjdk/samtools/Defaults.java
index b3db211e20..5aa3e9052e 100644
--- a/src/main/java/htsjdk/samtools/Defaults.java
+++ b/src/main/java/htsjdk/samtools/Defaults.java
@@ -1,6 +1,7 @@
 package htsjdk.samtools;
 
 import htsjdk.samtools.util.Log;
+import htsjdk.variant.variantcontext.writer.VCFVersionUpgradePolicy;
 
 import java.io.File;
 import java.util.Collections;
@@ -115,6 +116,11 @@ public class Defaults {
      */
     public static final boolean STRICT_VCF_VERSION_VALIDATION;
 
+    /**
+     * How to treat files from VCF versions older than the current version. Default = UPGRADE_OR_FALLBACK
+     */
+    public static final VCFVersionUpgradePolicy VCF_VERSION_TRANSITION_POLICY;
+
 
     public static final String SAMJDK_PREFIX = "samjdk.";
     static {
@@ -140,6 +146,10 @@ public class Defaults {
         SRA_LIBRARIES_DOWNLOAD = getBooleanProperty("sra_libraries_download", false);
         DISABLE_SNAPPY_COMPRESSOR = getBooleanProperty(DISABLE_SNAPPY_PROPERTY_NAME, false);
         STRICT_VCF_VERSION_VALIDATION = getBooleanProperty("strict_version_validation", true);
+        VCF_VERSION_TRANSITION_POLICY = VCFVersionUpgradePolicy.valueOf(getStringProperty(
+            "vcf_version_transition_policy",
+            VCFVersionUpgradePolicy.UPGRADE_OR_FALLBACK.name()
+        ));
     }
 
     /**
@@ -163,6 +173,7 @@ public static SortedMap<String, Object> allDefaults(){
         result.put("CUSTOM_READER_FACTORY", CUSTOM_READER_FACTORY);
         result.put("SAM_FLAG_FIELD_FORMAT", SAM_FLAG_FIELD_FORMAT);
         result.put("DISABLE_SNAPPY_COMPRESSOR", DISABLE_SNAPPY_COMPRESSOR);
+        result.put("VCF_VERSION_TRANSITION_POLICY", VCF_VERSION_TRANSITION_POLICY);
         return Collections.unmodifiableSortedMap(result);
     }
 
diff --git a/src/main/java/htsjdk/variant/variantcontext/GenotypeBuilder.java b/src/main/java/htsjdk/variant/variantcontext/GenotypeBuilder.java
index 483e1c617d..fd6bdd1fe6 100644
--- a/src/main/java/htsjdk/variant/variantcontext/GenotypeBuilder.java
+++ b/src/main/java/htsjdk/variant/variantcontext/GenotypeBuilder.java
@@ -25,15 +25,18 @@
 
 package htsjdk.variant.variantcontext;
 
-import htsjdk.tribble.util.ParsingUtils;
+import htsjdk.tribble.TribbleException;
 import htsjdk.variant.vcf.VCFConstants;
 
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
 
 /**
  * A builder class for genotypes
@@ -71,7 +74,7 @@ public final class GenotypeBuilder {
     private int[] AD = null;
     private int[] PL = null;
     private Map<String, Object> extendedAttributes = null;
-    private String filters = null;
+    private Set<String> filters;
     private int initialAttributeMapSize = 5;
 
     private final static Map<String, Object> NO_ATTRIBUTES =
@@ -199,7 +202,7 @@ public final void reset(final boolean keepSampleName) {
      */
     public Genotype make() {
         final Map<String, Object> ea = (extendedAttributes == null) ? NO_ATTRIBUTES : extendedAttributes;
-        return new FastGenotype(sampleName, alleles, isPhased, GQ, DP, AD, PL, filters, ea);
+        return new FastGenotype(sampleName, alleles, isPhased, GQ, DP, AD, PL, buildFilterString(), ea);
     }
 
     /**
@@ -216,7 +219,7 @@ public Genotype makeWithShallowCopy() {
         final List<Allele> al = new ArrayList<>(alleles);
         final int[] copyAD = (AD == null) ? null : Arrays.copyOf(AD, AD.length);
         final int[] copyPL = (PL == null) ? null : Arrays.copyOf(PL, PL.length);
-        return new FastGenotype(sampleName, al, isPhased, GQ, DP, copyAD, copyPL, filters, ea);
+        return new FastGenotype(sampleName, al, isPhased, GQ, DP, copyAD, copyPL, buildFilterString(), ea);
     }
 
     /**
@@ -373,12 +376,32 @@ public GenotypeBuilder attribute(final String key, final Object value) {
      * @return this builder
      */
     public GenotypeBuilder filters(final List<String> filters) {
-        if ( filters.isEmpty() )
-            return filter(null);
-        else if ( filters.size() == 1 )
-            return filter(filters.get(0));
-        else
-            return filter(ParsingUtils.join(";", ParsingUtils.sortList(filters)));
+        for (final String filter : filters) {
+            if (!VariantContext.VALID_FILTER.matcher(filter).matches()) {
+                throw new TribbleException("Filter '" + filter +
+                    "' contains an illegal character. It must conform to the regex ;'" + VariantContext.VALID_FILTER);
+            } else if (filter.equals("0")) {
+                throw new TribbleException("Filter cannot use reserved string '0'");
+            }
+        }
+        // Filters must be unique
+        final Set<String> uniqueFilters = new HashSet<>(filters.size());
+        for (final String filter : filters) {
+            if (uniqueFilters.contains(filter)) {
+                throw new TribbleException("BUG: Attempting to add duplicate filter " + filter + " at " + this);
+            } else {
+                uniqueFilters.add(filter);
+            }
+        }
+
+        final boolean hasUnfilteredString = uniqueFilters.contains(VCFConstants.UNFILTERED);
+        final boolean hasPassesString = uniqueFilters.contains(VCFConstants.PASSES_FILTERS_v4);
+        if ((hasUnfilteredString || hasPassesString) && uniqueFilters.size() > 1) {
+            throw new TribbleException("Filters cannot contain missing value '.' or passing value 'PASS' in addition to filters");
+        }
+
+        this.filters = hasPassesString ? null : uniqueFilters;
+        return this;
     }
 
     /**
@@ -397,10 +420,27 @@ public GenotypeBuilder filters(final String ... filters) {
      * @return
      */
     public GenotypeBuilder filter(final String filter) {
-        this.filters = VCFConstants.PASSES_FILTERS_v4.equals(filter) ? null : filter;
+        // TODO should this split the string on semicolon, or should it be in the function's contract
+        //  that only one filter and no semicolons can be included in the passed in string
+        if (filter == null || filter.isEmpty() || VCFConstants.PASSES_FILTERS_v4.equals(filter)) {
+            this.filters = null;
+        } else {
+            // Internal adjacent separators such as a;;b produce an empty string in the split array, which is
+            // handled by the valid filter regular expression, which rejects empty filter strings
+            if (filter.startsWith(";") || filter.endsWith(";")) {
+                throw new TribbleException("Filter string cannot start or end with filter separator ';'");
+            }
+            filters(filter.split(";"));
+        }
         return this;
     }
 
+    private String buildFilterString() {
+        return this.filters == null || this.filters.isEmpty()
+            ? null
+            : this.filters.stream().sorted().collect(Collectors.joining(";"));
+    }
+
     /**
      * This genotype is unfiltered
      *
diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantContext.java b/src/main/java/htsjdk/variant/variantcontext/VariantContext.java
index 2d50955bd1..a63d940670 100644
--- a/src/main/java/htsjdk/variant/variantcontext/VariantContext.java
+++ b/src/main/java/htsjdk/variant/variantcontext/VariantContext.java
@@ -1,6 +1,6 @@
 /*
 * Copyright (c) 2012 The Broad Institute
-* 
+*
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
@@ -9,10 +9,10 @@
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
-* 
+*
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
-* 
+*
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@@ -26,6 +26,7 @@
 package htsjdk.variant.variantcontext;
 
 import htsjdk.beta.plugin.HtsRecord;
+import htsjdk.samtools.util.QualityUtil;
 import htsjdk.tribble.Feature;
 import htsjdk.tribble.TribbleException;
 import htsjdk.tribble.util.ParsingUtils;
@@ -47,7 +48,7 @@
 import java.util.stream.Collectors;
 
 /**
- * 
+ *
  * <h3> High-level overview </h3>
  *
  * The VariantContext object is a single general class system for representing genetic variation data composed of:
@@ -89,7 +90,7 @@
  *<p>
  * A [ref] / T at 10
  *</p>
- *<pre> 
+ *<pre>
  * GenomeLoc snpLoc = GenomeLocParser.createGenomeLoc("chr1", 10, 10);
  *</pre>
  *<p>
@@ -208,7 +209,7 @@
  *
  * <!-- comment by jdenvir: not sure what this tag is supposed to do:-->
  * <!-- <s3> -->
- *     <h3>Fully decoding.</h3>  
+ *     <h3>Fully decoding.</h3>
  *     Currently <code>VariantContext</code>s support some fields, particularly those
  *     stored as generic attributes, to be of any type.  For example, a field AB might
  *     be naturally a floating point number, 0.51, but when it's read into a VC its
@@ -266,6 +267,8 @@ public class VariantContext implements HtsRecord, Feature, Serializable {
     /* cached monomorphic value: null -> not yet computed, False, True */
     private Boolean monomorphic = null;
 
+    private final VCFHeaderVersion version;
+
     /*
      * Determine which genotype fields are in use in the genotypes in VC
      * @return an ordered list of genotype fields in use in VC.  If vc has genotypes this will always include GT first
@@ -321,7 +324,7 @@ public List<String> calcVCFGenotypeKeys(final VCFHeader header) {
     //
     // ---------------------------------------------------------------------------------------------------------
 
-    //no controls and white-spaces characters, no semicolon.
+    // No controls and white-spaces characters, no semicolon, filter string cannot be empty
     public static final Pattern VALID_FILTER = Pattern.compile("^[!-:<-~]+$");
 
     public enum Validation {
@@ -396,13 +399,15 @@ private static void validateFilters(final VariantContext variantContext) {
                 return;
             }
 
-            for (String filter : filters) {
+            for (final String filter : filters) {
                 if ( filter == null) {
-                    throw new IllegalStateException("'null' is not a valid filter string.");
+                    throw new TribbleException("'null' is not a valid filter string.");
                 }
                 if (!VALID_FILTER.matcher(filter).matches()) {
-                    throw new IllegalStateException("Filter '" + filter +
+                    throw new TribbleException("Filter '" + filter +
                             "' contains an illegal character. It must conform to the regex ;'" + VALID_FILTER);
+                } else if (filter.equals("0")) {
+                    throw new TribbleException("Filter cannot use reserved string '0'");
                 }
             }
         }
@@ -421,12 +426,14 @@ private static void validateFilters(final VariantContext variantContext) {
      *
      * @param other the VariantContext to copy
      */
-    protected VariantContext(VariantContext other) {
+    protected VariantContext(final VariantContext other) {
         this(other.getSource(), other.getID(), other.getContig(), other.getStart(), other.getEnd(),
                 other.getAlleles(), other.getGenotypes(), other.getLog10PError(),
                 other.getFiltersMaybeNull(),
                 other.getAttributes(),
-                other.fullyDecoded, NO_VALIDATION);
+                other.fullyDecoded,
+                other.version,
+                NO_VALIDATION);
     }
 
     /**
@@ -454,8 +461,9 @@ protected VariantContext(final String source,
                              final Set<String> filters,
                              final Map<String, Object> attributes,
                              final boolean fullyDecoded,
+                             final VCFHeaderVersion version,
                              final EnumSet<Validation> validationToPerform ) {
-        if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
+        if ( contig == null || contig.isEmpty() ) { throw new IllegalArgumentException("Contig cannot be null or the empty string"); }
         this.contig = contig;
         this.start = start;
         this.stop = stop;
@@ -488,6 +496,7 @@ protected VariantContext(final String source,
         }
 
         this.fullyDecoded = fullyDecoded;
+        this.version = version;
 
         if ( ! validationToPerform.isEmpty() ) {
             validate(validationToPerform);
@@ -617,7 +626,7 @@ private final Set<Allele> allelesOfGenotypes(Collection<Genotype> genotypes) {
      * <li><strong>Mixed</strong></li>
      * <li>Mix of other classes</li>
      * </ul>
-     * 
+     *
      * Also supports NO_VARIATION type, used to indicate that the site isn't polymorphic in the population
      *
      *
@@ -814,6 +823,10 @@ public String getID() {
         return ID;
     }
 
+    public VCFHeaderVersion getVersion() {
+        return version;
+    }
+
 
     // ---------------------------------------------------------------------------------------------------------
     //
@@ -1654,7 +1667,7 @@ private final Map<String, Object> fullyDecodeAttributes(final Map<String, Object
                 continue; // gross, FT is part of the extended attributes
 
             final VCFCompoundHeaderLine format = VariantContextUtils.getMetaDataForField(header, field);
-            final Object decoded = decodeValue(field, attr.getValue(), format);
+            final Object decoded = decodeValue(field, attr.getValue(), format, header.getVCFHeaderVersion());
 
             if ( decoded != null &&
                     ! lenientDecoding
@@ -1675,26 +1688,36 @@ private final Map<String, Object> fullyDecodeAttributes(final Map<String, Object
         return newAttributes;
     }
 
-    private final Object decodeValue(final String field, final Object value, final VCFCompoundHeaderLine format) {
+    private Object decodeValue(
+        final String field,
+        final Object value,
+        final VCFCompoundHeaderLine format,
+        final VCFHeaderVersion version
+    ) {
+        final boolean percentDecode = version.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3);
         if ( value instanceof String ) {
+            final String string = (String) value;
             if ( field.equals(VCFConstants.GENOTYPE_PL_KEY) )
-                return GenotypeLikelihoods.fromPLField((String)value);
+                return GenotypeLikelihoods.fromPLField(string);
+
+            if (field.equals(VCFConstants.GENOTYPE_POSTERIORS_KEY)) {
+                return decodeGPKey(string, version);
+            }
 
-            final String string = (String)value;
             if ( string.indexOf(',') != -1 ) {
                 final String[] splits = string.split(",");
                 final List<Object> values = new ArrayList<>(splits.length);
-                for ( int i = 0; i < splits.length; i++ )
-                    values.add(decodeOne(field, splits[i], format));
+                for (final String split : splits)
+                    values.add(decodeOne(field, split, format, percentDecode));
                 return values;
             } else {
-                return decodeOne(field, string, format);
+                return decodeOne(field, string, format, percentDecode);
             }
-        } else if ( value instanceof List && (((List) value).get(0)) instanceof String ) {
+        } else if ( value instanceof List && (((List<?>) value).get(0)) instanceof String ) {
             final List<String> asList = (List<String>)value;
             final List<Object> values = new ArrayList<>(asList.size());
             for ( final String s : asList )
-                values.add(decodeOne(field, s, format));
+                values.add(decodeOne(field, s, format, percentDecode));
             return values;
         } else {
             return value;
@@ -1703,7 +1726,7 @@ private final Object decodeValue(final String field, final Object value, final V
         // allowMissingValuesComparedToHeader
     }
 
-    private final Object decodeOne(final String field, final String string, final VCFCompoundHeaderLine format) {
+    private Object decodeOne(final String field, final String string, final VCFCompoundHeaderLine format, final boolean percentDecode) {
         try {
             if ( string.equals(VCFConstants.MISSING_VALUE_v4) )
                 return null;
@@ -1711,12 +1734,12 @@ private final Object decodeOne(final String field, final String string, final VC
                 switch ( format.getType() ) {
                     case Character: return string;
                     case Flag:
-                        final boolean b = Boolean.valueOf(string) || string.equals("1");
-                        if ( b == false )
+                        final boolean b = Boolean.parseBoolean(string) || string.equals("1");
+                        if (!b)
                             throw new TribbleException("VariantContext FLAG fields " + field + " cannot contain false values"
                                     + " as seen at " + getContig() + ":" + getStart());
-                        return b;
-                    case String:    return string;
+                        return true;
+                    case String:    return percentDecode ? VCFPercentEncodedTextTransformer.percentDecode(string) : string;
                     case Integer:   return Integer.valueOf(string);
                     case Float:     return VCFUtils.parseVcfDouble(string);
                     default: throw new TribbleException("Unexpected type for field" + field);
@@ -1727,7 +1750,36 @@ private final Object decodeOne(final String field, final String string, final VC
         }
     }
 
-    private final void fullyDecodeGenotypes(final VariantContextBuilder builder, final VCFHeader header) {
+    private static List<Double> decodeGPKey(final String value, final VCFHeaderVersion version) {
+        final String[] splits = value.split(",");
+        // We need to special-case GP because there is a discrepancy in the scale used to record
+        // its values between pre-4.3 and 4.3+ VCF. Pre-4.3 GP is phred scale encoded while
+        // 4.3+ GP is a linear probability, bringing it in line with other standard keys that
+        // use the P suffix (c.f. VCF 4.3 spec section 7.2).
+
+        // Some tools in the wild apparently already use linear scaled GP, so we have to
+        // be careful about converting inputs. We check whether GP values are already linear
+        // scaled by seeing if the values' sum is approximately equal to 1, like we
+        // would expect if the values were linear scale probabilities.
+        // c.f. https://sourceforge.net/p/vcftools/mailman/vcftools-spec/thread/CEBCD558.FA29%25browning%40u.washington.edu/
+        double sum = 0;
+
+        final List<Double> rawGPValues = new ArrayList<>(splits.length);
+        for (final String s : splits) {
+            final double GP = VCFUtils.parseVcfDouble(s);
+            rawGPValues.add(GP);
+            sum += GP;
+        }
+
+        final boolean wasLinearScale = GeneralUtils.compareDoubles(sum, 1, VCFConstants.VCF_ENCODING_EPSILON) == 0;
+        if (!wasLinearScale && version.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) {
+            rawGPValues.replaceAll(GP -> QualityUtil.getErrorProbabilityFromPhredScore((int) Math.round(GP)));
+        }
+        return rawGPValues;
+
+    }
+
+    private void fullyDecodeGenotypes(final VariantContextBuilder builder, final VCFHeader header) {
         final GenotypesContext gc = new GenotypesContext();
         for ( final Genotype g : getGenotypes() ) {
             gc.add(fullyDecodeGenotypes(g, header));
@@ -1862,9 +1914,9 @@ public int[] getGLIndicesOfAlternateAllele(Allele targetAllele) {
         return GenotypeLikelihoods.getPLIndicesOfAlleles(0, index);
     }
 
-    /** 
-     * Search for the INFO=SVTYPE and return the type of Structural Variant 
-     * @return the StructuralVariantType of null if there is no property SVTYPE 
+    /**
+     * Search for the INFO=SVTYPE and return the type of Structural Variant
+     * @return the StructuralVariantType of null if there is no property SVTYPE
      * */
     public StructuralVariantType getStructuralVariantType() {
         final String svType = this.getAttributeAsString(VCFConstants.SVTYPE, null);
diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantContextBuilder.java b/src/main/java/htsjdk/variant/variantcontext/VariantContextBuilder.java
index fae8d81514..b52ed0a936 100644
--- a/src/main/java/htsjdk/variant/variantcontext/VariantContextBuilder.java
+++ b/src/main/java/htsjdk/variant/variantcontext/VariantContextBuilder.java
@@ -25,7 +25,10 @@
 
 package htsjdk.variant.variantcontext;
 
+import htsjdk.variant.vcf.VCFCodec;
 import htsjdk.variant.vcf.VCFConstants;
+import htsjdk.variant.vcf.VCFHeader;
+import htsjdk.variant.vcf.VCFHeaderVersion;
 
 import java.io.Serializable;
 import java.util.ArrayList;
@@ -80,6 +83,7 @@ public class VariantContextBuilder {
     private Map<String, Object> attributes = null;
     private boolean attributesCanBeModified = false;
     private boolean filtersCanBeModified = false;
+    private VCFHeaderVersion version = VCFHeader.DEFAULT_VCF_VERSION;
 
     /** enum of what must be validated */
     final private EnumSet<VariantContext.Validation> toValidate = EnumSet.noneOf(VariantContext.Validation.class);
@@ -161,6 +165,10 @@ public Map<String, Object> getAttributes() {
         return attributes;
     }
 
+    public VCFHeaderVersion getVersion() {
+        return version;
+    }
+
     /**
      * Returns a new builder based on parent -- the new VC will have all fields initialized
      * to their corresponding values in parent.  This is the best way to create a derived VariantContext
@@ -181,6 +189,7 @@ public VariantContextBuilder(final VariantContext parent) {
         this.start = parent.getStart();
         this.stop = parent.getEnd();
         this.fullyDecoded = parent.isFullyDecoded();
+        this.version = parent.getVersion();
 
         this.attributes(parent.getAttributes());
         if (parent.filtersWereApplied()) {
@@ -205,6 +214,7 @@ public VariantContextBuilder(final VariantContextBuilder parent) {
         this.start = parent.start;
         this.stop = parent.stop;
         this.fullyDecoded = parent.fullyDecoded;
+        this.version = parent.version;
 
         this.attributes(parent.attributes);
         this.filters(parent.filters);
@@ -214,6 +224,17 @@ public VariantContextBuilder copy() {
         return new VariantContextBuilder(this);
     }
 
+    /**
+     * Tells this builder to create a VariantContext conforming to this version of VCF
+     *
+     * @param version the version of VCF to which the VariantContext produced by this builder conforms
+     * @return this builder
+     */
+    public VariantContextBuilder version(final VCFHeaderVersion version) {
+        this.version = version;
+        return this;
+    }
+
     /**
      * Tells this builder to use this collection of alleles for the resulting VariantContext
      *
@@ -646,6 +667,6 @@ public VariantContext make(final boolean leaveModifyableAsIs) {
 
         return new VariantContext(source, ID, contig, start, stop, alleles,
                 genotypes, log10PError, filters, attributes,
-                fullyDecoded, toValidate);
+                fullyDecoded, version, toValidate);
     }
 }
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
index 07b2d0d41e..78990f5f3f 100644
--- a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
+++ b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
@@ -43,6 +43,7 @@
 import htsjdk.variant.vcf.VCFConstants;
 import htsjdk.variant.vcf.VCFContigHeaderLine;
 import htsjdk.variant.vcf.VCFHeader;
+import htsjdk.variant.vcf.VCFHeaderVersion;
 import htsjdk.variant.vcf.VCFUtils;
 
 import java.io.ByteArrayOutputStream;
@@ -169,7 +170,7 @@ public void writeHeader(VCFHeader header) {
             // write out the header into a byte stream, get its length, and write everything to the file
             final ByteArrayOutputStream capture = new ByteArrayOutputStream();
             final OutputStreamWriter writer = new OutputStreamWriter(capture);
-            this.header = VCFWriter.writeHeader(this.header, writer, VCFWriter.getVersionLine(), "BCF2 stream");
+            VCFWriter.writeHeader(this.header, writer, "BCF2 stream");
             writer.append('\0'); // the header is null terminated by a byte
             writer.close();
 
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/VCFVersionUpgradePolicy.java b/src/main/java/htsjdk/variant/variantcontext/writer/VCFVersionUpgradePolicy.java
new file mode 100644
index 0000000000..1d43b1c486
--- /dev/null
+++ b/src/main/java/htsjdk/variant/variantcontext/writer/VCFVersionUpgradePolicy.java
@@ -0,0 +1,40 @@
+package htsjdk.variant.variantcontext.writer;
+
+/**
+ * The policy {@link VCFWriter} will use to determine the version of VCF to write from a given VCF file.
+ * <p>
+ * htsjdk's behavior to this point has been to stamp the most recent version of VCF onto all VCF files
+ * written by VCFWriter regardless of the input VCF's original version. This had been possible as new versions
+ * of VCF were backwards compatible and version upgrading was infallible. VCF 4.3 is stricter than previous versions,
+ * meaning that some previously valid files are invalid 4.3 and upgrading from pre-4.3 to 4.3+ can sometimes fail.
+ * <p>
+ * This class is a temporary workaround to allow opt-in 4.3 writing support in a way that does not break
+ * workflows that may process pre-4.3 files that are invalid 4.3, but should be removed once proper versioning
+ * support for VCF is incorporated into htsjdk.
+ */
+public enum VCFVersionUpgradePolicy {
+    /**
+     * Interpret VCF files with exactly the version that they have on read. The VCF is assumed to be valid
+     * for its version and no version validation will be performed. The written VCF will have the same version
+     * as the one which was read.
+     */
+    DO_NOT_UPGRADE,
+
+    /**
+     * Write pre-4.3 files as 4.2, to which automatic upgrading should always be possible, and
+     * write 4.3+ files as 4.3.
+     */
+    ONLY_INFALLIBLE_UPGRADE,
+
+    /**
+     * Inspect the headers of pre-4.3 files to determine if they can be automatically upgraded to 4.3,
+     * and if automatic upgrade is possible write them as 4.3, or else write them as 4.2.
+     */
+    UPGRADE_OR_FALLBACK,
+
+    /**
+     * Inspect the headers of pre 4.3 files to determine if they can be automatically upgraded to 4.3,
+     * and abort with an error if automatic upgrade is not possible
+     */
+    UPGRADE_OR_FAIL,
+}
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/VCFWriter.java b/src/main/java/htsjdk/variant/variantcontext/writer/VCFWriter.java
index 1b6edae1d8..d9977a66d8 100644
--- a/src/main/java/htsjdk/variant/variantcontext/writer/VCFWriter.java
+++ b/src/main/java/htsjdk/variant/variantcontext/writer/VCFWriter.java
@@ -29,9 +29,7 @@
 import htsjdk.samtools.util.IOUtil;
 import htsjdk.samtools.util.Log;
 import htsjdk.samtools.util.RuntimeIOException;
-import htsjdk.tribble.TribbleException;
 import htsjdk.tribble.index.IndexCreator;
-import htsjdk.utils.ValidationUtils;
 import htsjdk.variant.variantcontext.VariantContext;
 import htsjdk.variant.variantcontext.VariantContextBuilder;
 import htsjdk.variant.vcf.VCFConstants;
@@ -39,7 +37,6 @@
 import htsjdk.variant.vcf.VCFHeader;
 import htsjdk.variant.vcf.VCFHeaderLine;
 import htsjdk.variant.vcf.VCFHeaderVersion;
-import htsjdk.variant.vcf.VCFUtils;
 
 import java.io.BufferedWriter;
 import java.io.ByteArrayOutputStream;
@@ -57,8 +54,6 @@
 class VCFWriter extends IndexingVariantContextWriter {
     protected final static Log logger = Log.getInstance(VCFWriter.class);
 
-    private static final String DEFAULT_VERSION_LINE = VCFHeader.DEFAULT_VCF_VERSION.toHeaderVersionLine();
-
 	// Initialized when the header is written to the output stream
 	private VCFEncoder vcfEncoder = null;
 
@@ -155,12 +150,11 @@ private void writeAndResetBuffer() throws IOException {
 
     @Override
     public void writeHeader(final VCFHeader header) {
-
-        // note we need to update the mHeader object after this call because they header
+        // note we need to update the mHeader object after this call because the header
         // may have genotypes trimmed out of it, if doNotWriteGenotypes is true
         setHeader(header);
         try {
-            writeHeader(this.mHeader, writer, getVersionLine(), getStreamName());
+            writeHeader(this.mHeader, writer, getStreamName());
             writeAndResetBuffer();
             outputHasBeenWritten = true;
         } catch ( IOException e ) {
@@ -168,24 +162,26 @@ public void writeHeader(final VCFHeader header) {
         }
     }
 
-    public static String getVersionLine() {
-        return DEFAULT_VERSION_LINE;
-    }
-
-    public static VCFHeader writeHeader(VCFHeader header,
+    @Deprecated // starting after version 2.24.1
+    public static VCFHeader writeHeader(final VCFHeader header,
                                         final Writer writer,
                                         final String versionLine,
                                         final String streamNameForError) {
+        // Determine requested version from versionLine
+        final VCFHeaderVersion requestedVersion = VCFHeaderVersion.fromHeaderVersionLine(versionLine);
+        final VCFHeaderLine requestedVersionLine = VCFHeader.makeHeaderVersionLine(requestedVersion);
+        // Set version inside header and validate lines
+        header.addMetaDataLine(requestedVersionLine);
+        return writeHeader(header, writer, streamNameForError);
+    }
 
+    public static VCFHeader writeHeader(final VCFHeader header,
+                                        final Writer writer,
+                                        final String streamNameForError) {
         try {
-            rejectVCFV43Headers(header);
-
-            // Validate that the file version we're writing is version-compatible this header's version.
-            validateHeaderVersion(header, versionLine);
-
             // The file format field needs to be written first; below any file format lines
             // embedded in the header will be removed
-            writer.write(versionLine + "\n");
+            writer.write(header.getVCFHeaderVersion().toHeaderVersionLine() + "\n");
 
             for (final VCFHeaderLine line : header.getMetaDataInSortedOrder() ) {
                 // Remove the fileformat header lines
@@ -201,8 +197,8 @@ public static VCFHeader writeHeader(VCFHeader header,
             // write out the column line
             writer.write(VCFHeader.HEADER_INDICATOR);
             writer.write(header.getHeaderFields().stream()
-                    .map(f -> f.name())
-                    .collect(Collectors.joining(VCFConstants.FIELD_SEPARATOR)).toString());
+                .map(Enum::name)
+                .collect(Collectors.joining(VCFConstants.FIELD_SEPARATOR)));
 
             if ( header.hasGenotypingData() ) {
                 writer.write(VCFConstants.FIELD_SEPARATOR);
@@ -266,42 +262,10 @@ public void add(final VariantContext context) {
 
     @Override
     public void setHeader(final VCFHeader header) {
-        rejectVCFV43Headers(header);
-
         if (outputHasBeenWritten) {
             throw new IllegalStateException("The header cannot be modified after the header or variants have been written to the output stream.");
         }
         this.mHeader = doNotWriteGenotypes ? new VCFHeader(header.getMetaDataInSortedOrder()) : header;
         this.vcfEncoder = new VCFEncoder(this.mHeader, this.allowMissingFieldsInHeader, this.writeFullFormatField);
     }
-
-    // writing vcf v4.3 is not implemented
-    private static void rejectVCFV43Headers(final VCFHeader targetHeader) {
-        if (targetHeader.getVCFHeaderVersion() != null && targetHeader.getVCFHeaderVersion().isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) {
-            throw new IllegalArgumentException(String.format("Writing VCF version %s is not implemented", targetHeader.getVCFHeaderVersion()));
-        }
-    }
-
-    // Given a header and a requested target output version, see if the header's version is compatible with the
-    // requested version (where compatible means its ok to just declare that the header has the requested
-    // version).
-    private static void validateHeaderVersion(final VCFHeader header, final String requestedVersionLine) {
-        ValidationUtils.nonNull(header);
-        ValidationUtils.nonNull(requestedVersionLine);
-
-        final VCFHeaderVersion vcfCurrentVersion = header.getVCFHeaderVersion();
-        final VCFHeaderVersion vcfRequestedVersion = VCFHeaderVersion.fromHeaderVersionLine(requestedVersionLine);
-        if (!vcfCurrentVersion.equals(vcfRequestedVersion)) {
-            if (!VCFHeaderVersion.versionsAreCompatible(VCFHeaderVersion.fromHeaderVersionLine(requestedVersionLine), vcfCurrentVersion)) {
-                final String message = String.format("Attempting to write a %s VCF header to a %s VCFWriter",
-                        vcfRequestedVersion,
-                        vcfCurrentVersion.getVersionString());
-                if (VCFUtils.isStrictVCFVersionValidation()) {
-                    throw new TribbleException(message);
-                }
-                logger.warn(message);
-            }
-        }
-    }
-
 }
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java b/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java
index 67656fbe03..215eaf996b 100644
--- a/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java
+++ b/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java
@@ -571,17 +571,19 @@ private static boolean isCompressedVCF(final Path outPath) {
     private VariantContextWriter createVCFWriter(final Path writerPath, final OutputStream writerStream) {
         if (idxCreator == null) {
             return new VCFWriter(writerPath, writerStream, refDict,
-                    options.contains(Options.INDEX_ON_THE_FLY),
-                    options.contains(Options.DO_NOT_WRITE_GENOTYPES),
-                    options.contains(Options.ALLOW_MISSING_FIELDS_IN_HEADER),
-                    options.contains(Options.WRITE_FULL_FORMAT_FIELD));
-        }
-        else {
-            return new VCFWriter(writerPath, writerStream, refDict, idxCreator,
-                    options.contains(Options.INDEX_ON_THE_FLY),
-                    options.contains(Options.DO_NOT_WRITE_GENOTYPES),
-                    options.contains(Options.ALLOW_MISSING_FIELDS_IN_HEADER),
-                    options.contains(Options.WRITE_FULL_FORMAT_FIELD));
+                options.contains(Options.INDEX_ON_THE_FLY),
+                options.contains(Options.DO_NOT_WRITE_GENOTYPES),
+                options.contains(Options.ALLOW_MISSING_FIELDS_IN_HEADER),
+                options.contains(Options.WRITE_FULL_FORMAT_FIELD)
+            );
+        } else {
+            return new VCFWriter(
+                writerPath, writerStream, refDict, idxCreator,
+                options.contains(Options.INDEX_ON_THE_FLY),
+                options.contains(Options.DO_NOT_WRITE_GENOTYPES),
+                options.contains(Options.ALLOW_MISSING_FIELDS_IN_HEADER),
+                options.contains(Options.WRITE_FULL_FORMAT_FIELD)
+            );
         }
     }
 
diff --git a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java
index 1a1267e5c8..cacff036b5 100644
--- a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java
+++ b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java
@@ -25,9 +25,11 @@
 
 package htsjdk.variant.vcf;
 
+import htsjdk.samtools.Defaults;
 import htsjdk.samtools.util.BlockCompressedInputStream;
 import htsjdk.samtools.util.Log;
 import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.QualityUtil;
 import htsjdk.tribble.AsciiFeatureCodec;
 import htsjdk.tribble.Feature;
 import htsjdk.tribble.NameAwareCodec;
@@ -38,6 +40,7 @@
 import htsjdk.utils.ValidationUtils;
 import htsjdk.variant.utils.GeneralUtils;
 import htsjdk.variant.variantcontext.*;
+import htsjdk.variant.variantcontext.writer.VCFVersionUpgradePolicy;
 
 import java.io.FileNotFoundException;
 import java.io.IOException;
@@ -45,6 +48,7 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.*;
+import java.util.stream.Collectors;
 import java.util.zip.GZIPInputStream;
 
 public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext> implements NameAwareCodec {
@@ -58,11 +62,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
     protected VCFHeader header = null;
     protected VCFHeaderVersion version = null;
 
-    private final static VCFTextTransformer percentEncodingTextTransformer = new VCFPercentEncodedTextTransformer();
-    private final static VCFTextTransformer passThruTextTransformer = new VCFPassThruTextTransformer();
-    //by default, we use the passThruTextTransformer (assume pre v4.3)
-    private VCFTextTransformer vcfTextTransformer = passThruTextTransformer;
-
     // a mapping of the allele
     protected final Map<String, List<Allele>> alleleMap = new HashMap<>(3);
 
@@ -86,7 +85,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
     /**
      * If true, then we'll magically fix up VCF headers on the fly when we read them in
      */
-    protected boolean doOnTheFlyModifications = true;
+    protected VCFVersionUpgradePolicy policy = Defaults.VCF_VERSION_TRANSITION_POLICY;
 
     /**
      * If non-null, we will replace the sample name read from the VCF header with this sample name. This feature works
@@ -191,8 +190,6 @@ protected VCFHeaderVersion readFormatVersionLine(final LineIterator headerLineIt
      * @return a VCFHeader object
      */
     protected VCFHeader parseHeaderFromLines( final List<String> headerStrings, final VCFHeaderVersion sourceVersion ) {
-        this.version = sourceVersion;
-
         final Set<VCFHeaderLine> metaData = new LinkedHashSet<>();
         Set<String> sampleNames = new LinkedHashSet<>();
         int contigCounter = 0;
@@ -471,22 +468,49 @@ public VCFHeader setVCFHeader(final VCFHeader newHeader, final VCFHeaderVersion
      */
     public VCFHeader setVCFHeader(final VCFHeader newHeader) {
         ValidationUtils.nonNull(newHeader);
-
-        if (this.doOnTheFlyModifications) {
-            // calling this with a header that has any pre-v4.3 version will always result in a header
-            // with version vcfV4.2, no matter what the header version originally was, since the "repair"
-            // operation is essentially a transform of the header so that it conforms with header line rules
-            // as of 4.2
-            this.header = VCFStandardHeaderLines.repairStandardHeaderLines(newHeader);
-        } else {
-            this.header = newHeader;
+        final VCFHeaderVersion originalVersion = newHeader.getVCFHeaderVersion();
+
+        switch(this.policy) {
+            case DO_NOT_UPGRADE:
+                this.header = newHeader;
+                break;
+            case ONLY_INFALLIBLE_UPGRADE:
+                // Upgrade pre-4.3 versions to 4.2, and keep 4.3 at 4.3
+                // calling this with a header that has any pre-v4.3 version will always result in a header
+                // with version vcfV4.2, no matter what the header version originally was, since the "repair"
+                // operation is essentially a transform of the header so that it conforms with header line rules
+                // as of 4.2
+                this.header = VCFStandardHeaderLines.repairStandardHeaderLines(newHeader);
+                break;
+            case UPGRADE_OR_FAIL:
+            case UPGRADE_OR_FALLBACK:
+                this.header = VCFStandardHeaderLines.repairStandardHeaderLines(newHeader);
+                final Collection<VCFValidationFailure<VCFHeaderLine>> errors = this.header.getValidationErrors(VCFHeader.DEFAULT_VCF_VERSION);
+                if (!errors.isEmpty()) {
+                    final String message = String.format(
+                        "Version transition from VCF version %s to %s failed with validation error(s):\n%s%s",
+                        originalVersion.getVersionString(), VCFHeader.DEFAULT_VCF_VERSION.getVersionString(),
+                        errors.stream()
+                            .limit(5)
+                            .map(VCFValidationFailure::getSourceMessage)
+                            .collect(Collectors.joining("\n")),
+                        errors.size() > 5 ? "\n+ " + (errors.size() - 5) + " additional error(s)" : ""
+                    );
+                    if (this.policy == VCFVersionUpgradePolicy.UPGRADE_OR_FAIL) {
+                        throw new TribbleException(message);
+                    } else {
+                        logger.info(message + ", header will be kept at original version: " + originalVersion.getVersionString());
+                    }
+                } else {
+                    // Only upgrade if no errors resulting from version upgrading would occur
+                    this.header.addMetaDataLine(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
+                }
+                break;
+            default:
+                throw new TribbleException("Unrecognized VCF Version Upgrade Policy: " + this.policy);
         }
-		this.version = this.header.getVCFHeaderVersion();
-        // Obtain a text transformer (technically, this should be based on the ORIGINAL header version, not
-        // the updated version after repairStandardHeaderLines is called), but it doesn't matter in practice
-        // since the transformer only differs starting with 4.3.
-        this.vcfTextTransformer = getTextTransformerForVCFVersion(this.version);
 
+        this.version = this.header.getVCFHeaderVersion();
 		return this.header;
 	}
 
@@ -509,18 +533,6 @@ public VariantContext decode(String line) {
         return decodeLine(line, true);
     }
 
-    /**
-     * For v4.3 up, attribute values can contain embedded percent-encoded characters which must be decoded
-     * on read. Return a version-aware text transformer that can decode encoded text.
-     * @param targetVersion the version for which a transformer is bing requested
-     * @return a {@link VCFTextTransformer} suitable for the targetVersion
-     */
-    private VCFTextTransformer getTextTransformerForVCFVersion(final VCFHeaderVersion targetVersion) {
-        return targetVersion != null && targetVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3) ?
-                percentEncodingTextTransformer :
-                passThruTextTransformer;
-    }
-
     private VariantContext decodeLine(final String line, final boolean includeGenotypes) {
         // the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
         if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
@@ -551,6 +563,7 @@ private VariantContext decodeLine(final String line, final boolean includeGenoty
      */
     private VariantContext parseVCFLine(final String[] parts, final boolean includeGenotypes) {
         VariantContextBuilder builder = new VariantContextBuilder();
+        builder.version(version);
         builder.source(getName());
 
         // increment the line count
@@ -728,16 +741,16 @@ protected Map<String, Object> parseInfo(String infoField) {
                     String valueString = infoFields.get(i).substring(eqI + 1);
 
                     // split on the INFO field separator
-                    List<String> infoValueSplit = ParsingUtils.split(valueString, VCFConstants.INFO_FIELD_ARRAY_SEPARATOR_CHAR);
+                    final List<String> infoValueSplit = ParsingUtils.split(valueString, VCFConstants.INFO_FIELD_ARRAY_SEPARATOR_CHAR);
                     if ( infoValueSplit.size() == 1 ) {
-                        value = vcfTextTransformer.decodeText(infoValueSplit.get(0));
+                        value = infoValueSplit.get(0);
                         final VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key);
                         if ( headerLine != null && headerLine.getType() == VCFHeaderLineType.Flag && value.equals("0") ) {
                             // deal with the case where a flag field has =0, such as DB=0, by skipping the add
                             continue;
                         }
                     } else {
-                        value = vcfTextTransformer.decodeText(infoValueSplit);
+                        value = infoValueSplit;
                     }
                 } else {
                     key = infoFields.get(i);
@@ -884,8 +897,12 @@ private static void checkAllele(String allele, boolean isRef, int lineNo) {
         if ( allele == null || allele.isEmpty() )
             generateException(generateExceptionTextForBadAlleleBases(""), lineNo);
 
-        if ( GeneralUtils.DEBUG_MODE_ENABLED && MAX_ALLELE_SIZE_BEFORE_WARNING != -1 && allele.length() > MAX_ALLELE_SIZE_BEFORE_WARNING ) {
-            System.err.println(String.format("Allele detected with length %d exceeding max size %d at approximately line %d, likely resulting in degraded VCF processing performance", allele.length(), MAX_ALLELE_SIZE_BEFORE_WARNING, lineNo));
+        if ( MAX_ALLELE_SIZE_BEFORE_WARNING != -1 && allele.length() > MAX_ALLELE_SIZE_BEFORE_WARNING ) {
+            logger.warn(String.format(
+                "Allele detected with length %d exceeding max size %d at approximately line %d, " +
+                    "likely resulting in degraded VCF processing performance",
+                allele.length(), MAX_ALLELE_SIZE_BEFORE_WARNING, lineNo
+            ));
         }
 
         if (Allele.wouldBeSymbolicAllele(allele.getBytes())) {
@@ -996,8 +1013,7 @@ public LazyGenotypesContext.LazyData createGenotypeMap(final String str,
         // cycle through the genotype strings
         boolean PlIsSet = false;
         for (int genotypeOffset = 1; genotypeOffset < nParts; genotypeOffset++) {
-            List<String> genotypeValues = ParsingUtils.split(genotypeParts[genotypeOffset], VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
-            genotypeValues = vcfTextTransformer.decodeText(genotypeValues);
+            final List<String> genotypeValues = ParsingUtils.split(genotypeParts[genotypeOffset], VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
 
             final String sampleName = sampleNameIterator.next();
             final GenotypeBuilder gb = new GenotypeBuilder(sampleName);
@@ -1071,8 +1087,8 @@ public LazyGenotypesContext.LazyData createGenotypeMap(final String str,
     }
 
     private static final int[] decodeInts(final String string) {
-        List<String> split = ParsingUtils.split(string, ',');
-        int [] values = new int[split.size()];
+        final List<String> split = ParsingUtils.split(string, ',');
+        final int [] values = new int[split.size()];
         try {
             for (int i = 0; i < values.length; i++) {
                 values[i] = Integer.parseInt(split.get(i));
@@ -1089,7 +1105,16 @@ private static final int[] decodeInts(final String string) {
      * raw VCF records
      */
     public final void disableOnTheFlyModifications() {
-        doOnTheFlyModifications = false;
+        setVersionUpgradePolicy(VCFVersionUpgradePolicy.DO_NOT_UPGRADE);
+    }
+
+    /**
+     * Forces all VCFCodecs to not perform any on the fly modifications to the VCF header
+     * of VCF records.  Useful primarily for raw comparisons such as when comparing
+     * raw VCF records
+     */
+    public final void setVersionUpgradePolicy(final VCFVersionUpgradePolicy policy) {
+        this.policy = policy;
     }
 
     /**
diff --git a/src/main/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java
index 7f0f255883..028798757a 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFCompoundHeaderLine.java
@@ -180,16 +180,16 @@ public Optional<VCFValidationFailure<VCFHeaderLine>> getValidationFailure(final
         // However, the key values correspond to INFO/FORMAT header lines defining the attribute and its type,
         // so we do the validation here
         if (vcfTargetVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) {
-            if (!validHeaderID(getID())) {
-                final VCFValidationFailure<VCFHeaderLine> validationFailure = new VCFValidationFailure<>(
-                        vcfTargetVersion,
-                        this,
-                        String.format("ID tag \"%s\" does not conform to tag restrictions", getID()));
+            final Optional<VCFValidationFailure<VCFHeaderLine>> validationFailure = validateKeyOrID(getID())
+                .map(e -> new VCFValidationFailure<>(vcfTargetVersion, this, e));
+            if (validationFailure.isPresent()) {
+                // TODO thinking that these getValidationFailure should be a pure function and its caller
+                //  decides whether to pass the error up or just log if not using strict validation
                 if (VCFUtils.isStrictVCFVersionValidation()) {
-                    return Optional.of(validationFailure);
+                    return validationFailure;
                 } else {
                     // warn for older versions - this line can't be used as a v4.3 line
-                    logger.warn(validationFailure.getFailureMessage());
+                    logger.warn(validationFailure.get().getFailureMessage());
                 }
             }
         }
@@ -201,9 +201,11 @@ public Optional<VCFValidationFailure<VCFHeaderLine>> getValidationFailure(final
      * @param id the candidate ID
      * @return true if ID conforms to header line id requirements, otherwise false
      */
-    //TODO: the existing VCFHeaderLine.validateKeyOrID method should be refactored so it can be used instead of this
-    protected boolean validHeaderID(final String id) {
-        return VALID_HEADER_ID_PATTERN.matcher(id).matches();
+    @Override
+    protected Optional<String> validateKeyOrID(final String id) {
+        return VALID_HEADER_ID_PATTERN.matcher(id).matches()
+            ? Optional.empty()
+            : Optional.of(String.format("Key: %s does not match header line key regex: %s", id, VALID_HEADER_ID_PATTERN));
     }
 
     /**
diff --git a/src/main/java/htsjdk/variant/vcf/VCFEncoder.java b/src/main/java/htsjdk/variant/vcf/VCFEncoder.java
index 9cffb45837..7be6e32de6 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFEncoder.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFEncoder.java
@@ -15,7 +15,9 @@
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@@ -26,10 +28,7 @@
  */
 public class VCFEncoder {
 
-    /**
-     * The encoding used for VCF files: ISO-8859-1. When writing VCF4.3 is implemented, this should change to UTF-8.
-     */
-    public static final Charset VCF_CHARSET = StandardCharsets.ISO_8859_1;
+    public static final Charset VCF_CHARSET = StandardCharsets.UTF_8;
     private static final String QUAL_FORMAT_STRING = "%.2f";
     private static final String QUAL_FORMAT_EXTENSION_TO_TRIM = ".00";
 
@@ -41,6 +40,8 @@ public class VCFEncoder {
 
     private boolean outputTrailingFormatFields = false;
 
+    private final VCFTextTransformer vcfTextTransformer;
+
     /**
      * Prepare a VCFEncoder that will encode records appropriate to the given VCF header, optionally
      * allowing missing fields in the header.
@@ -52,6 +53,9 @@ public VCFEncoder(final VCFHeader header, final boolean allowMissingFieldsInHead
         this.header = header;
         this.allowMissingFieldsInHeader = allowMissingFieldsInHeader;
         this.outputTrailingFormatFields = outputTrailingFormatFields;
+        this.vcfTextTransformer = header.getVCFHeaderVersion().isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)
+            ? new VCFPercentEncodedTextTransformer()
+            : new VCFPassThruTextTransformer();
     }
 
     /**
@@ -148,7 +152,7 @@ public void write(final Appendable vcfOutput, final VariantContext context) thro
                 fieldIsMissingFromHeaderError(context, field.getKey(), "INFO");
             }
 
-            final String outputValue = formatVCFField(field.getValue());
+            final String outputValue = formatVCFField(field.getValue(), context.isFullyDecoded());
             if (outputValue != null) {
                 infoFields.put(field.getKey(), outputValue);
             }
@@ -218,34 +222,71 @@ private void fieldIsMissingFromHeaderError(final VariantContext vc, final String
         }
     }
 
-    @SuppressWarnings("rawtypes")
-    String formatVCFField(final Object val) {
-        final String result;
+    String formatVCFField(final Object val, final boolean fullyDecoded) {
         if (val == null) {
-            result = VCFConstants.MISSING_VALUE_v4;
+            return VCFConstants.MISSING_VALUE_v4;
         } else if (val instanceof Double) {
-            result = formatVCFDouble((Double) val);
+            return formatVCFDouble((Double) val);
         } else if (val instanceof Boolean) {
-            result = (Boolean) val ? "" : null; // empty string for true, null for false
+            return (Boolean) val ? "" : null; // empty string for true, null for false
         } else if (val instanceof List) {
-            result = formatVCFField(((List) val).toArray());
+            return formatList((List<?>) val, fullyDecoded);
         } else if (val.getClass().isArray()) {
-            final int length = Array.getLength(val);
-            if (length == 0) {
-                return formatVCFField(null);
+            return val.getClass().getComponentType().isPrimitive()
+                ? formatPrimitiveArray(val)
+                : formatList(Arrays.asList((Object[]) val), fullyDecoded);
+        } else if (val instanceof String) {
+            final String s = val.toString();
+            // If the VariantContext from which this string was obtained was already fully decoded,
+            // its in-memory representation may contain special characters which must be re-encoded,
+            // while strings which have not been decoded yet represent the field as read directly
+            // from the source VCF, so they are written back out without encoding
+            return fullyDecoded ? vcfTextTransformer.encodeText(s) : s;
+        } else {
+            return val.toString();
+        }
+    }
+
+    private static String formatPrimitiveArray(final Object v) {
+        final int len = Array.getLength(v);
+        if (len == 0) return VCFConstants.MISSING_VALUE_v4;
+        int i = 0;
+        final StringBuilder s = new StringBuilder();
+        if (v instanceof int[]) {
+            final int[] a = (int[]) v;
+            for (;;) {
+                s.append(a[i++]);
+                if (i == len) break;
+                s.append(',');
             }
-            final StringBuilder sb = new StringBuilder(
-                formatVCFField(Array.get(val, 0)));
-            for (int i = 1; i < length; i++) {
-                sb.append(',');
-                sb.append(formatVCFField(Array.get(val, i)));
+        } else if (v instanceof double[]) {
+            final double[] a = (double[]) v;
+            for (;;) {
+                s.append(formatVCFDouble(a[i++]));
+                if (i == len) break;
+                s.append(',');
+            }
+        } else if (v instanceof long[]) {
+            final long[] a = (long[]) v;
+            for (;;) {
+                s.append(a[i++]);
+                if (i == len) break;
+                s.append(',');
             }
-            result = sb.toString();
-        } else {
-            result = val.toString();
         }
+        return s.toString();
+    }
 
-        return result;
+    private String formatList(final List<?> list, final boolean fullyDecoded) {
+        if (list.isEmpty()) return VCFConstants.MISSING_VALUE_v4;
+        final StringBuilder s = new StringBuilder();
+        final Iterator<?> it = list.iterator();
+        for (;;) {
+            s.append(formatVCFField(it.next(), fullyDecoded));
+            if (!it.hasNext()) break;
+            s.append(',');
+        }
+        return s.toString();
     }
 
     /**
@@ -310,7 +351,8 @@ public void addGenotypeData(final VariantContext vc, final Map<Allele, String> a
      * @param vcfoutput VCF output
      * @throws IOException
      */
-    private void appendGenotypeData(final VariantContext vc, final Map<Allele, String> alleleMap, final List<String> genotypeFormatKeys, final Appendable vcfoutput) throws IOException {final int ploidy = vc.getMaxPloidy(2);
+    private void appendGenotypeData(final VariantContext vc, final Map<Allele, String> alleleMap, final List<String> genotypeFormatKeys, final Appendable vcfoutput) throws IOException {
+        final int ploidy = vc.getMaxPloidy(2);
 
         for (final String sample : this.header.getGenotypeSamples()) {
             vcfoutput.append(VCFConstants.FIELD_SEPARATOR);
@@ -357,7 +399,7 @@ public void addGenotypeData(final VariantContext vc, final Map<Allele, String> a
                             }
                         } else {
                             Object val = g.hasExtendedAttribute(field) ? g.getExtendedAttribute(field) : VCFConstants.MISSING_VALUE_v4;
-                            outputValue = formatVCFField(val);
+                            outputValue = formatVCFField(val, vc.isFullyDecoded());
                         }
                     }
 
diff --git a/src/main/java/htsjdk/variant/vcf/VCFFileReader.java b/src/main/java/htsjdk/variant/vcf/VCFFileReader.java
index c6ff6158e0..2ab29ddcb4 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFFileReader.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFFileReader.java
@@ -24,6 +24,7 @@
 
 package htsjdk.variant.vcf;
 
+import htsjdk.samtools.Defaults;
 import htsjdk.samtools.SAMSequenceDictionary;
 import htsjdk.samtools.util.CloseableIterator;
 import htsjdk.samtools.util.FileExtensions;
@@ -35,6 +36,7 @@
 import htsjdk.tribble.TribbleException;
 import htsjdk.variant.bcf2.BCF2Codec;
 import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.writer.VCFVersionUpgradePolicy;
 
 import java.io.File;
 import java.io.IOException;
@@ -115,6 +117,23 @@ public VCFFileReader(final File file, final File indexFile, final boolean requir
         return isBCF(path) ? new BCF2Codec() : new VCFCodec();
     }
 
+    /**
+     * returns Correct Feature codec for Path depending whether
+     * the name seems to indicate that it's a BCF.
+     *
+     * @param path to vcf/bcf
+     * @return FeatureCodec for input Path
+     */
+    private static FeatureCodec<VariantContext, ?> getCodecForPath(Path path, final VCFVersionUpgradePolicy policy) {
+        if (isBCF(path)) {
+            return new BCF2Codec();
+        } else {
+            final VCFCodec codec = new VCFCodec();
+            codec.setVersionUpgradePolicy(policy);
+            return codec;
+        }
+    }
+
     /**
      * Returns the SAMSequenceDictionary from the provided VCF file.
      */
@@ -142,21 +161,49 @@ public VCFFileReader(final Path path, final Path indexPath) {
      * Allows construction of a VCFFileReader that will or will not assert the presence of an index as desired.
      */
     public VCFFileReader(final Path path, final boolean requireIndex) {
-        this.reader = AbstractFeatureReader.getFeatureReader(
-                path.toUri().toString(),
-                getCodecForPath(path),
-                requireIndex);
+        this(path, requireIndex, Defaults.VCF_VERSION_TRANSITION_POLICY);
     }
 
     /**
      * Allows construction of a VCFFileReader with a specified index path.
      */
     public VCFFileReader(final Path path, final Path indexPath, final boolean requireIndex) {
+        this(path, indexPath, requireIndex, Defaults.VCF_VERSION_TRANSITION_POLICY);
+    }
+
+    /**
+     * Constructs a VCFFileReader that requires the index to be present.
+     */
+    public VCFFileReader(final Path path, final VCFVersionUpgradePolicy policy) {
+        this(path, true, policy);
+    }
+
+    /**
+     * Constructs a VCFFileReader with a specified index.
+     */
+    public VCFFileReader(final Path path, final Path indexPath, final VCFVersionUpgradePolicy policy) {
+        this(path, indexPath, true, policy);
+    }
+
+    /**
+     * Allows construction of a VCFFileReader that will or will not assert the presence of an index as desired.
+     */
+    public VCFFileReader(final Path path, final boolean requireIndex, final VCFVersionUpgradePolicy policy) {
+        this.reader = AbstractFeatureReader.getFeatureReader(
+            path.toUri().toString(),
+            getCodecForPath(path, policy),
+            requireIndex);
+    }
+
+    /**
+     * Allows construction of a VCFFileReader with a specified index path.
+     */
+    public VCFFileReader(final Path path, final Path indexPath, final boolean requireIndex, final VCFVersionUpgradePolicy policy) {
         this.reader = AbstractFeatureReader.getFeatureReader(
-                path.toUri().toString(),
-                indexPath.toUri().toString(),
-                getCodecForPath(path),
-                requireIndex);
+            path.toUri().toString(),
+            indexPath.toUri().toString(),
+            getCodecForPath(path, policy),
+            requireIndex);
     }
 
     /**
diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeader.java b/src/main/java/htsjdk/variant/vcf/VCFHeader.java
index 1dcb5e07f9..e1a0cf7a4e 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFHeader.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFHeader.java
@@ -58,7 +58,7 @@
 public class VCFHeader implements HtsHeader, Serializable {
     public static final long serialVersionUID = 1L;
     protected static final Log logger = Log.getInstance(VCFHeader.class);
-    public static final VCFHeaderVersion DEFAULT_VCF_VERSION = VCFHeaderVersion.VCF4_2;
+    public static final VCFHeaderVersion DEFAULT_VCF_VERSION = VCFHeaderVersion.VCF4_3;
 
     // the mandatory header fields
     public enum HEADER_FIELDS {
@@ -604,6 +604,10 @@ private VCFHeaderVersion initializeHeaderVersion() {
         return metaDataVersion;
     }
 
+    public Collection<VCFValidationFailure<VCFHeaderLine>> getValidationErrors(final VCFHeaderVersion targetVersion) {
+        return mMetaData.getValidationErrors(targetVersion);
+    }
+
     private void validateVersionTransition(
             final VCFHeaderLine newHeaderLine,
             final VCFHeaderVersion currentVersion,
diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFHeaderLine.java
index 94a3a0849e..9214f7095f 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFHeaderLine.java
@@ -85,6 +85,16 @@ public String getValue() {
      */
     public String getID() { return null; }
 
+    /**
+     * Validate the state of this header line. Require the key be valid as an "id".
+     */
+    private void validate() {
+        final Optional<String> validationFailure = validateKeyOrID(mKey);
+        if (validationFailure.isPresent()) {
+            throw new TribbleException(validationFailure.get());
+        }
+    }
+
     /**
      * Validates this header line against {@code vcfTargetVersion}.
      * Subclasses can override this to provide line type-specific version validation, and the
@@ -138,19 +148,15 @@ public void validateForVersion(final VCFHeaderVersion vcfTargetVersion) {
     /**
      * Validate a string that is to be used as a unique id or key field.
      */
-    protected static void validateKeyOrID(final String keyString, final String sourceName) {
-        ValidationUtils.nonNull(sourceName);
+    protected Optional<String> validateKeyOrID(final String keyString) {
         if (keyString == null) {
-            throw new TribbleException(
-                    String.format("VCFHeaderLine: %s cannot be null or empty", sourceName));
-        }
-        if ( keyString.contains("<") || keyString.contains(">") ) {
-            throw new TribbleException(
-                    String.format("VCFHeaderLine: %s cannot contain angle brackets", sourceName));
-        }
-        if ( keyString.contains("=") ) {
-            throw new TribbleException(
-                    String.format("VCFHeaderLine: %s cannot contain an equals sign", sourceName));
+            return Optional.of("VCFHeaderLine: key cannot be null or empty");
+        } else if ( keyString.contains("<") || keyString.contains(">") ) {
+            return Optional.of(String.format("VCFHeaderLine: key %s contains illegal character: angle brackets", keyString));
+        } else if ( keyString.contains("=") ) {
+            return Optional.of(String.format("VCFHeaderLine: key %s contains illegal character: equals sign", keyString));
+        } else {
+            return Optional.empty();
         }
     }
 
@@ -207,7 +213,8 @@ public int compareTo(Object other) {
      * @param line    the line
      * @return true if the line is a VCF meta data line, or false if it is not
      */
-    public static boolean isHeaderLine(String line) {
+    @Deprecated // starting after version 2.24.1
+    static boolean isHeaderLine(String line) {
         return line != null && !line.isEmpty() && VCFHeader.HEADER_INDICATOR.equals(line.substring(0,1));
     }
 
@@ -230,22 +237,15 @@ public static String toStringEncoding(Map<String, ? extends Object> keyValues) {
             builder.append(entry.getKey());
             builder.append('=');
             builder.append(entry.getValue().toString().contains(",") ||
-                           entry.getValue().toString().contains(" ") ||
-                           entry.getKey().equals("Description") ||
-                           entry.getKey().equals("Source") || // As per VCFv4.2, Source and Version should be surrounded by double quotes
-                           entry.getKey().equals("Version") ? "\""+ escapeQuotes(entry.getValue().toString()) + "\"" : entry.getValue());
+                entry.getValue().toString().contains(" ") ||
+                entry.getKey().equals("Description") ||
+                entry.getKey().equals("Source") || // As per VCFv4.2, Source and Version should be surrounded by double quotes
+                entry.getKey().equals("Version") ? "\""+ escapeQuotes(entry.getValue().toString()) + "\"" : entry.getValue());
         }
         builder.append('>');
         return builder.toString();
     }
 
-    /**
-     * Validate the state of this header line. Require the key be valid as an "id".
-     */
-    private void validate() {
-        validateKeyOrID(mKey, "key");
-    }
-
     private static String escapeQuotes(final String value) {
         // java escaping in a string literal makes this harder to read than it should be
         // without string literal escaping and quoting the regex would be: replaceAll( ([^\])" , $1\" )
diff --git a/src/main/java/htsjdk/variant/vcf/VCFInfoHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFInfoHeaderLine.java
index 12a29a1f6c..4a116e1381 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFInfoHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFInfoHeaderLine.java
@@ -29,6 +29,8 @@
 import htsjdk.samtools.util.Log;
 import htsjdk.utils.ValidationUtils;
 
+import java.util.Optional;
+
 /**
  *         <p>
  *         Class VCFInfoHeaderLine
@@ -102,14 +104,15 @@ public static VCFInfoHeaderLine getMergedInfoHeaderLine(
     }
 
     @Override
-    public boolean shouldBeAddedToDictionary() {
-        return true;
+    protected Optional<String> validateKeyOrID(final String id) {
+        return id.equals(VCFConstants.THOUSAND_GENOMES_KEY)
+            ? Optional.empty()
+            : super.validateKeyOrID(id);
     }
 
     @Override
-    //TODO: integrate this with the existing validateKeyOrID method
-    protected boolean validHeaderID(final String id) {
-        return super.validHeaderID(id) || id.equals(VCFConstants.THOUSAND_GENOMES_KEY);
+    public boolean shouldBeAddedToDictionary() {
+        return true;
     }
 
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFMetaDataLines.java b/src/main/java/htsjdk/variant/vcf/VCFMetaDataLines.java
index 97f208e7b4..3055c93889 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFMetaDataLines.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFMetaDataLines.java
@@ -83,6 +83,18 @@ public VCFHeaderLine addMetaDataLine(final VCFHeaderLine newMetaDataLine) {
             // than the new line, since old VCF versions use a different format key than modern versions)
             return updateVersion(newMetaDataLine);
         } else {
+            // Enforce restriction that contig and ALT line IDs cannot share IDs (c.f. VCF 4.3 spec section 1.4.7)
+            // We do not store them in the same namespace so that we can distinguish cases of two lines
+            // of the same type clashing vs an ALT line clashing with an existing contig line or vice versa
+            switch (newMetaDataLine.getKey()) {
+                case VCFConstants.CONTIG_HEADER_KEY:
+                    validateContigAndALTLinesDisjoint(VCFConstants.ALT_HEADER_KEY, newMetaDataLine.getID());
+                    break;
+                case VCFConstants.ALT_HEADER_KEY:
+                    validateContigAndALTLinesDisjoint(VCFConstants.CONTIG_HEADER_KEY, newMetaDataLine.getID());
+                    break;
+            }
+
             // otherwise, see if there is an equivalent line that the new line will replace
             final HeaderLineMapKey newMapKey = makeKeyForLine(newMetaDataLine);
             final VCFHeaderLine equivalentMetaDataLine = mMetaData.get(newMapKey);
@@ -95,6 +107,13 @@ public VCFHeaderLine addMetaDataLine(final VCFHeaderLine newMetaDataLine) {
         }
     }
 
+    private void validateContigAndALTLinesDisjoint(final String namespace, final String id) {
+        if (mMetaData.containsKey(makeKey(namespace, id))) {
+            throw new IllegalStateException(
+                String.format("ALT and contig line IDs must be disjoint, but both were found for ID: %s", id));
+        }
+    }
+
     /**
      * Remove an equivalent metadata line from the list. This is the inverse of addMetaDataLine, and removes
      * any equivalent line that already exists (any existing file format line if the line to be removed is
@@ -187,12 +206,12 @@ public void validateMetaDataLines(final VCFHeaderVersion targetVersion) {
      * @return an Collection<VCFValidationFailure> describing the lines that failed to validate
      * incompatible with targetVersion. The collections is empty if validation succeeded for all lines.
      */
-    public Collection<VCFValidationFailure> getValidationErrors(final VCFHeaderVersion targetVersion) {
+    public Collection<VCFValidationFailure<VCFHeaderLine>> getValidationErrors(final VCFHeaderVersion targetVersion) {
         return mMetaData.values().stream()
                 .filter(line -> !VCFHeaderVersion.isFormatString(line.getKey()))
                 .map(l -> l.getValidationFailure(targetVersion))
-                .filter(o -> o.isPresent())
-                .map(o -> o.get())
+                .filter(Optional::isPresent)
+                .map(Optional::get)
                 .collect(Collectors.toList());
     }
 
diff --git a/src/main/java/htsjdk/variant/vcf/VCFPassThruTextTransformer.java b/src/main/java/htsjdk/variant/vcf/VCFPassThruTextTransformer.java
index 24abed8eb0..55c172391c 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFPassThruTextTransformer.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFPassThruTextTransformer.java
@@ -27,4 +27,14 @@ public String decodeText(final String rawPart) {
     public List<String> decodeText(final List<String> rawParts) {
         return rawParts;
     }
+
+    /**
+     * No-op encoder for a single string
+     * @param rawPart the raw string to be decoded
+     * @return the raw string with no transformation done
+     */
+    @Override
+    public String encodeText(final String rawPart) {
+        return rawPart;
+    }
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFPercentEncodedTextTransformer.java b/src/main/java/htsjdk/variant/vcf/VCFPercentEncodedTextTransformer.java
index 4c8015eaa5..b98b36e3f3 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFPercentEncodedTextTransformer.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFPercentEncodedTextTransformer.java
@@ -1,8 +1,8 @@
 package htsjdk.variant.vcf;
 
-import htsjdk.tribble.TribbleException;
-
+import java.util.Arrays;
 import java.util.List;
+import java.util.function.IntPredicate;
 import java.util.stream.Collectors;
 
 /**
@@ -10,65 +10,204 @@
  * of characters that have special meaning in VCF.
  */
 public class VCFPercentEncodedTextTransformer implements VCFTextTransformer {
-    final static private String ENCODING_SENTINEL_STRING = "%";
-    final static private char ENCODING_SENTNEL_CHAR = '%';
-    final static private int ENCODING_BASE_RADIX = 16;
+    private static final char ENCODING_SENTINEL_CHAR = '%';
+
+    private static final byte invalidHexEncoding = ~0;
+    private static final byte maxPossibleHexDigit = 'f' + 1;
+    private static final byte[] hexToBytes = new byte[maxPossibleHexDigit];
+    private static final char[] bytesToHex = {
+        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+    };
+
+    static {
+        Arrays.fill(hexToBytes, invalidHexEncoding);
+        for (byte i = '0'; i <= '9'; i++) hexToBytes[i] = (byte) (i - '0');
+        for (byte i = 'A'; i <= 'F'; i++) hexToBytes[i] = (byte) (10 + i - 'A');
+        for (byte i = 'a'; i <= 'f'; i++) hexToBytes[i] = (byte) (10 + i - 'a');
+    }
 
     /**
-     * Transform a single string, replacing % encoded values with their corresponding text.
+     * Transform a single string, replacing percent encoded values with their corresponding text.
      *
      * @param rawPart the raw string to be decoded
      * @return the decoded string
-     * @throws TribbleException if the the encoding is uninterpretable
      */
     @Override
     public String decodeText(final String rawPart) {
-        return decodePercentEncodedChars(rawPart);
+        return percentDecode(rawPart);
     }
 
     /**
-     * Transform a list of strings, replacing % encoded values with their corresponding text in each string.
+     * Transform a list of strings, replacing percent encoded values with their corresponding text in each string.
      *
-     * @param rawParts  a list of raw strings
+     * @param rawParts a list of raw strings
      * @return a list of decoded strings
-     * @throws TribbleException if the the encoding is uninterpretable
      */
     @Override
     public List<String> decodeText(final List<String> rawParts) {
-        return rawParts.stream().map(this::decodeText).collect(Collectors.toList());
+        return rawParts.stream().map(VCFPercentEncodedTextTransformer::percentDecode).collect(Collectors.toList());
     }
 
     /**
-     * Transform input strings containing embedded percent=encoded characters. For example, when given the
+     * Transform input strings containing embedded percent encoded characters. For example, when given the
      * string '%3D%41' will return the string '=A'.
+     * <p>
+     * This method is permissive in the input it accepts. Capitalized and lower case percent encoding are both
+     * accepted, although the VCF spec only allows capitalized encoding. Uninterpretable escape sequences
+     * (the % character followed by fewer than 2 characters before the end of the string, or the % sentinel
+     * followed by 2 characters either of which does not match the regular expression [0-9A-Fa-f]) are passed through
+     * uninterpreted.
+     * <p>
+     * If the input text does not contain any valid percent encoded sequences, a new string is not allocated,
+     * and the original string is returned.
      *
-     * @param rawText a string containing zero or more embedded encodings
+     * @param rawString a string containing zero or more embedded encodings
      * @return a string with all encoded characters replaced with the corresponding character
-     * @throws TribbleException if the the encoding is uninterpretable
      */
-    protected static String decodePercentEncodedChars(final String rawText) {
-        if (rawText.contains(ENCODING_SENTINEL_STRING)) {
-            StringBuilder builder = new StringBuilder(rawText.length());
-            for (int i = 0; i < rawText.length(); i++) {
-                final char c = rawText.charAt(i);
-                if (c == ENCODING_SENTNEL_CHAR && ((i + 2) < rawText.length())) {
-                    try {
-                        final char[] trans = Character.toChars(Integer.parseInt(rawText.substring(i + 1, i + 3), ENCODING_BASE_RADIX));
-                        if (trans.length != 1) {
-                            throw new TribbleException(String.format("escape sequence '%c' corresponds to an invalid encoding in '%s'", c, rawText));
-                        }
-                        builder.append(trans[0]);
-                        i += 2;
-                    } catch (IllegalArgumentException e) {
-                        builder.append(c);
+    public static String percentDecode(final String rawString) {
+        int matches = 0;
+        final int length = rawString.length();
+        // A valid percent encoding requires at least 3 characters (the % character and 2 hex digits)
+        // so we do not scan for % characters in the last 2 characters of the string
+        // The spec does not specify how "truncated" encodings (% followed by fewer than 2 hex digits
+        // before the string ends) should be interpreted, but we treat them as literal characters
+        // and append them uninterpreted
+        for (int i = 0, l = length - 2; i < l; i++) {
+            if (rawString.charAt(i) == ENCODING_SENTINEL_CHAR) matches++;
+        }
+
+        if (matches == 0) {
+            return rawString;
+        } else {
+            final StringBuilder s = new StringBuilder(length - 2 * matches);
+            int lastMatchEnd = 0;
+            int matched = 0;
+            for (int i = 0; ; i++) {
+                if (rawString.charAt(i) == ENCODING_SENTINEL_CHAR) {
+                    final int hiDecoded = hexDigitToInt(rawString.charAt(++i));
+                    final int loDecoded = hexDigitToInt(rawString.charAt(++i));
+                    // Only decode and append the character if both characters after the % were interpretable
+                    // as hex digits
+                    if ((hiDecoded | loDecoded) != invalidHexEncoding) {
+                        // Append on the portion of the original string that came before this matching character
+                        s.append(rawString, lastMatchEnd, i - 2);
+                        s.append((char) ((hiDecoded << 4) | (loDecoded & 0x0F)));
+                        lastMatchEnd = i + 1;
+                    }
+                    matched++;
+
+                    // Found all sequences to decode in the string, so append the rest of the original string
+                    if (matched == matches) {
+                        s.append(rawString, lastMatchEnd, length);
+                        return s.toString();
                     }
-                } else {
-                    builder.append(c);
                 }
             }
-            return builder.toString();
         }
-        return rawText;
     }
 
+    private static int hexDigitToInt(final char c) {
+        return c < maxPossibleHexDigit ? hexToBytes[c] : invalidHexEncoding;
+    }
+
+    /**
+     * Transform a single string, percent encoding values that have special meanings in VCF.
+     *
+     * @param rawPart the raw string to be encoded
+     * @return the encoded string
+     */
+    @Override
+    public String encodeText(final String rawPart) {
+        return percentEncode(rawPart);
+    }
+
+    /**
+     * Transform a single string, percent encoding values that have special meanings in VCF.
+     *
+     * @param rawPart the raw string to be encoded
+     * @return the encoded string
+     */
+    public static String percentEncode(final String rawPart) {
+        return percentEncode(rawPart, VCFPercentEncodedTextTransformer::isVCFSpecialChar);
+    }
+
+    /**
+     * Transform a single string, percent encoding values that have special meanings in VCF.
+     * <p>
+     * This method is suitable for encoding a header value in a key=value pair that is of type String (e.g. Description)
+     * which have fewer restrictions than fields in the body of the VCF such as INFO and FORMAT.
+     *
+     * @param rawString String to encode
+     * @return the encoded string
+     */
+    public static String percentEncodeHeaderText(final String rawString) {
+        return percentEncode(rawString, VCFPercentEncodedTextTransformer::isHeaderSpecialChar);
+    }
+
+    private static String percentEncode(final String rawString, final IntPredicate charPredicate) {
+        int matches = 0;
+        final int length = rawString.length();
+        for (int i = 0; i < length; i++) {
+            if (charPredicate.test(rawString.charAt(i))) matches++;
+        }
+
+        if (matches == 0) {
+            return rawString;
+        } else {
+            final StringBuilder s = new StringBuilder(length + 2 * matches);
+            int lastMatchEnd = 0;
+            int matched = 0;
+            for (int i = 0; ; i++) {
+                final char c = rawString.charAt(i);
+                if (charPredicate.test(c)) {
+                    // Append on the portion of the original string that came before this matching character
+                    s.append(rawString, lastMatchEnd, i);
+                    s.append(ENCODING_SENTINEL_CHAR);
+                    s.append(bytesToHex[c >>> 4]);
+                    s.append(bytesToHex[c & 0x0F]);
+
+                    lastMatchEnd = i + 1;
+                    matched++;
+
+                    // Found all matching characters in the string, so append the rest of the original string
+                    if (matched == matches) {
+                        s.append(rawString, lastMatchEnd, length);
+                        return s.toString();
+                    }
+                }
+            }
+        }
+    }
+
+    // Characters that have special meaning in the value part of a structured header line key=value pair.
+    // Note that this is less restrictive than the full set of characters with special meaning in VCF.
+    // Space and comma are allowed due to the double-quoting introduced in VCF 4.2, and '=' is allowed because
+    // key=value pairs are comma-delimited, so internal '=' is unambiguously part of the value as long as ',' is quoted
+    private static boolean isHeaderSpecialChar(final int c) {
+        switch (c) {
+            case '\n':
+            case '\t':
+            case '\r':
+            case '%':
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    private static boolean isVCFSpecialChar(final int c) {
+        switch (c) {
+            case '\n':
+            case '\t':
+            case '\r':
+            case '%':
+            case ',':
+            case ':':
+            case ';':
+            case '=':
+                return true;
+            default:
+                return false;
+        }
+    }
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java
index c0a3abce5c..2c53899f1d 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java
@@ -34,7 +34,7 @@
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.stream.Collectors;
+import java.util.Optional;
 
 /**
  * An abstract class representing a VCF metadata line with a key and attribute=value pairs, one of
@@ -168,14 +168,27 @@ public int hashCode() {
     @Override
     protected String toStringEncoding() {
         //NOTE: this preserves/round-trips "extra" attributes such as SOURCE, VERSION, etc.
-        final StringBuilder builder = new StringBuilder();
-        builder.append(getKey());
-        builder.append("=<");
-        builder.append(genericFields.entrySet().stream()
-                .map(e -> e.getKey() + "=" + quoteAttributeValueForSerialization(e.getKey(), e.getValue()))
-                .collect(Collectors.joining(",")));
-        builder.append('>');
-        return builder.toString();
+        final StringBuilder s = new StringBuilder();
+        s.append(getKey());
+        s.append('=');
+        s.append('<');
+        boolean notFirst = false;
+        for (final Map.Entry<String, String> e : genericFields.entrySet()) {
+            if (notFirst) {
+                s.append(',');
+            } else {
+                notFirst = true;
+            }
+
+            final String k = e.getKey();
+            final String v = e.getValue();
+            s.append(k);
+            s.append('=');
+            s.append(encodeAttributeValueForSerialization(k, v));
+        }
+        s.append('>');
+
+        return s.toString();
     }
 
     // Called by VCFInfoHeaderLine to allow repairing of VCFInfoLines that have a Flag type and a non-zero count
@@ -190,10 +203,10 @@ protected void updateGenericField(final String attributeName, final String value
     /**
      * Return true if the attribute name requires quotes.
      * @param attributeName name of the attribute being serialized
-     * @return boolean indicating whether the value should be embedded n quotes during serialization
+     * @return boolean indicating whether the value should be embedded in quotes during serialization
      */
     protected boolean getIsQuotableAttribute(final String attributeName) {
-        // the (VF4.3) spec says that the DESCRIPTION, SOURCE, and VERSION attributes should be quoted
+        // the (VCF4.3) spec says that the DESCRIPTION, SOURCE, and VERSION attributes should be quoted
         // for INFO/FORMAT lines, but htsjdk seems to have historically quoted these for all structured
         // header lines
         return attributeName.equals(DESCRIPTION_ATTRIBUTE) ||
@@ -201,12 +214,37 @@ protected boolean getIsQuotableAttribute(final String attributeName) {
                 attributeName.equals(VERSION_ATTRIBUTE);
     }
 
+    /**
+     * Return true if the attribute name allows percent encoding.
+     * @param attributeName name of the attribute being serialized
+     * @return boolean indicating whether the value may be percent encoded serialization
+     */
+    protected boolean isPercentEncodableAttribute(final String attributeName) {
+        // As of VCF4.3 attribute values containing characters that have special meanings can be percent encoded.
+        // ID, NUMBER and TYPE values do not permit values that would require percent encoding, so they are excluded,
+        // but all other attributes may potentially be percent encoded.
+        return !(attributeName.equals(VCFSimpleHeaderLine.ID_ATTRIBUTE) ||
+            attributeName.equals(VCFCompoundHeaderLine.NUMBER_ATTRIBUTE) ||
+            attributeName.equals(VCFCompoundHeaderLine.TYPE_ATTRIBUTE));
+    }
+
     private void validate() {
         if ( genericFields.isEmpty() || !genericFields.keySet().stream().findFirst().get().equals(ID_ATTRIBUTE)) {
             throw new TribbleException(
                     String.format("The required ID tag is missing or not the first attribute: key=%s", super.getKey()));
         }
-        validateKeyOrID(getGenericFieldValue(ID_ATTRIBUTE), "ID");
+        final Optional<String> validationFailure = validateKeyOrID(getGenericFieldValue(ID_ATTRIBUTE));
+        if (validationFailure.isPresent()) {
+            throw new TribbleException.VersionValidationFailure(validationFailure.get());
+        }
+    }
+
+    // Perform all text transformations required to encode an attribute value
+    private String encodeAttributeValueForSerialization(final String attribute, final String originalValue) {
+        final String quotedAttributeValue = quoteAttributeValueForSerialization(attribute, originalValue);
+        return isPercentEncodableAttribute(attribute)
+            ? VCFPercentEncodedTextTransformer.percentEncodeHeaderText(quotedAttributeValue)
+            : quotedAttributeValue;
     }
 
     // Add quotes around any attribute value that contains a space or comma, or is supposed to be quoted by
diff --git a/src/main/java/htsjdk/variant/vcf/VCFTextTransformer.java b/src/main/java/htsjdk/variant/vcf/VCFTextTransformer.java
index 36f842b20a..f928507b01 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFTextTransformer.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFTextTransformer.java
@@ -28,4 +28,12 @@ public interface VCFTextTransformer {
      */
     List<String> decodeText(final List<String> rawParts);
 
+    /**
+     * Encode a single string.
+     *
+     * @param rawPart the raw string to be encoded
+     * @return the encoded string
+     * @throws TribbleException if the the encoding is unencodable
+     */
+    String encodeText(final String rawPart);
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFVersionUpgrader.java b/src/main/java/htsjdk/variant/vcf/VCFVersionUpgrader.java
new file mode 100644
index 0000000000..8db6a1883c
--- /dev/null
+++ b/src/main/java/htsjdk/variant/vcf/VCFVersionUpgrader.java
@@ -0,0 +1,30 @@
+package htsjdk.variant.vcf;
+
+import htsjdk.variant.variantcontext.writer.VCFVersionUpgradePolicy;
+
+import java.util.Collection;
+
+final class VCFVersionUpgrader {
+    public static void getOutputVersion(final VCFHeader header, final VCFVersionUpgradePolicy policy) {
+        // Guaranteed to not be null
+        final VCFHeaderVersion currentVersion = header.getVCFHeaderVersion();
+        switch (policy) {
+            case ONLY_INFALLIBLE_UPGRADE:
+                // 4.3+ lines are output as the latest version, pre-4.3 lines are output as 4.2
+                final VCFHeaderVersion newVersion = currentVersion.isAtLeastAsRecentAs(VCFHeader.DEFAULT_VCF_VERSION)
+                    ? VCFHeader.DEFAULT_VCF_VERSION
+                    : VCFHeaderVersion.VCF4_2;
+                header.addMetaDataLine(VCFHeader.makeHeaderVersionLine(newVersion));
+            case UPGRADE_OR_FALLBACK:
+                final Collection<VCFValidationFailure<VCFHeaderLine>> failures = header.getValidationErrors(VCFHeader.DEFAULT_VCF_VERSION);
+                if (failures.isEmpty()) {
+                    header.addMetaDataLine(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
+                }
+                break;
+            case UPGRADE_OR_FAIL:
+                // If validation fails, simply pass the exception through
+                header.addMetaDataLine(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
+                break;
+        }
+    }
+}
diff --git a/src/test/java/htsjdk/variant/VariantBaseTest.java b/src/test/java/htsjdk/variant/VariantBaseTest.java
index dc59309e7b..749ffe69e9 100644
--- a/src/test/java/htsjdk/variant/VariantBaseTest.java
+++ b/src/test/java/htsjdk/variant/VariantBaseTest.java
@@ -29,11 +29,15 @@
 import htsjdk.samtools.SAMSequenceDictionary;
 import htsjdk.samtools.SAMSequenceRecord;
 import htsjdk.samtools.util.Tuple;
+import htsjdk.tribble.AbstractFeatureReader;
+import htsjdk.tribble.FeatureReader;
+import htsjdk.tribble.TribbleException;
 import htsjdk.utils.ValidationUtils;
 import htsjdk.variant.variantcontext.Genotype;
 import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.writer.VCFVersionUpgradePolicy;
+import htsjdk.variant.vcf.VCFCodec;
 import htsjdk.variant.vcf.VCFConstants;
-import htsjdk.variant.vcf.VCFFileReader;
 import htsjdk.variant.vcf.VCFHeader;
 import org.testng.Assert;
 
@@ -141,8 +145,16 @@ public static SAMSequenceDictionary createArtificialSequenceDictionary() {
      */
     public static Tuple<VCFHeader, List<VariantContext>> readEntireVCFIntoMemory(final Path vcfPath) {
         ValidationUtils.nonNull(vcfPath);
-        try ( final VCFFileReader vcfReader = new VCFFileReader(vcfPath, false) ){
-            return new Tuple<>(vcfReader.getFileHeader(), vcfReader.iterator().toList());
+        final VCFCodec codec = new VCFCodec();
+        codec.setVersionUpgradePolicy(VCFVersionUpgradePolicy.UPGRADE_OR_FALLBACK);
+        try (final FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(
+                vcfPath.toUri().toString(),
+                codec,
+                false
+        )) {
+            return new Tuple<>((VCFHeader) reader.getHeader(), reader.iterator().toList());
+        } catch (final IOException e) {
+            throw new TribbleException("Could not create an iterator from a feature reader.", e);
         }
     }
 
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
index 95fb359446..e18c0d9309 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
@@ -75,10 +75,10 @@ public void testCreateDictionary() {
         final List<VCFHeaderLine> inputLines = new ArrayList<VCFHeaderLine>();
         int counter = 0;
         inputLines.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
-        inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++)));
-        inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++)));
-        inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter));
-        inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter));
+        inputLines.add(new VCFFilterHeaderLine("l" + counter++));
+        inputLines.add(new VCFFilterHeaderLine("l" + counter++));
+        inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", "l" + counter++), counter));
+        inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", "l" + counter++), counter));
         inputLines.add(new VCFInfoHeaderLine(String.valueOf("A"+counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
         inputLines.add(new VCFInfoHeaderLine(String.valueOf("A"+counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
         inputLines.add(new VCFHeaderLine("x", "misc"));
@@ -115,21 +115,22 @@ public Object[][] makeHeaderOrderTestProvider() {
 
         int counter = 0;
         inputLines.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
-        inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++)));
-        inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++)));
-        inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter));
-        inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter));
-        inputLines.add(new VCFInfoHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
-        inputLines.add(new VCFInfoHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
-        inputLines.add(new VCFFormatHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
-        inputLines.add(new VCFFormatHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        // We prefix all the line IDs with "l" because as of VCF 4.3, IDs cannot start with a number
+        inputLines.add(new VCFFilterHeaderLine("l" + counter++));
+        inputLines.add(new VCFFilterHeaderLine("l" + counter++));
+        inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", "l" + counter++), counter));
+        inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", "l" + counter++), counter));
+        inputLines.add(new VCFInfoHeaderLine("l" + counter++, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        inputLines.add(new VCFInfoHeaderLine("l" + counter++, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        inputLines.add(new VCFFormatHeaderLine("l" + counter++, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        inputLines.add(new VCFFormatHeaderLine("l" + counter++, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
         final int inputLineCounter = counter;
         final VCFHeader inputHeader = new VCFHeader(new LinkedHashSet<VCFHeaderLine>(inputLines));
 
-        extraLines.add(new VCFFilterHeaderLine(String.valueOf(counter++)));
-        extraLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter));
-        extraLines.add(new VCFInfoHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
-        extraLines.add(new VCFFormatHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        extraLines.add(new VCFFilterHeaderLine("l" + counter++));
+        extraLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", "l" + counter++), counter));
+        extraLines.add(new VCFInfoHeaderLine("l" + counter++, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        extraLines.add(new VCFFormatHeaderLine("l" + counter++, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
         extraLines.add(new VCFHeaderLine("x", "misc"));
         extraLines.add(new VCFHeaderLine("y", "misc"));
 
@@ -180,7 +181,8 @@ private static boolean expectedConsistent(final VCFHeader combinationHeader, fin
         final List<Integer> ids = new ArrayList<Integer>();
         for ( final VCFHeaderLine line : combinationHeader.getMetaDataInInputOrder() ) {
             if ( line.isIDHeaderLine()) {
-                ids.add(Integer.valueOf(line.getID()));
+                // Strip off "l" prefix
+                ids.add(Integer.valueOf(line.getID().substring(1)));
             }
         }
 
diff --git a/src/test/java/htsjdk/variant/variantcontext/GenotypeBuilderTest.java b/src/test/java/htsjdk/variant/variantcontext/GenotypeBuilderTest.java
index 5e3f0b9eb8..caed6dbdf8 100644
--- a/src/test/java/htsjdk/variant/variantcontext/GenotypeBuilderTest.java
+++ b/src/test/java/htsjdk/variant/variantcontext/GenotypeBuilderTest.java
@@ -25,16 +25,85 @@
 
 package htsjdk.variant.variantcontext;
 
+import htsjdk.tribble.TribbleException;
 import htsjdk.variant.VariantBaseTest;
 import org.testng.Assert;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 
 public class GenotypeBuilderTest extends VariantBaseTest {
 
+    @Test(expectedExceptions = TribbleException.class)
+    public void testRejectDuplicateFilters() {
+        final GenotypeBuilder gb = new GenotypeBuilder("test");
+        gb.filter("x;y;x");
+    }
+
+    @Test(expectedExceptions = TribbleException.class)
+    public void testRejectDuplicateFiltersCollection() {
+        final GenotypeBuilder gb = new GenotypeBuilder("test");
+        gb.filters(Arrays.asList("x", "y", "x"));
+    }
+
+    @Test(expectedExceptions = TribbleException.class)
+    public void testRejectDuplicateFiltersArray() {
+        final GenotypeBuilder gb = new GenotypeBuilder("test");
+        gb.filters("x", "y", "x");
+    }
+
+    @DataProvider
+    public Object[][] illegalFilterNameProvider() {
+        return new Object[][]{
+            // Reserved string 0
+            {"0"},
+            // Contains whitespace
+            {"a b"},
+            // Contains separator
+            {"a;b"}
+        };
+    }
+
+    @Test(dataProvider = "illegalFilterNameProvider", expectedExceptions = TribbleException.class)
+    public void testRejectIllegalFilterName(final String filter) {
+        final GenotypeBuilder gb = new GenotypeBuilder("test");
+        gb.filters(Collections.singletonList(filter));
+    }
+
+    @DataProvider
+    public Object[][] illegalFilterSeparatorPlacementProvider() {
+        return new Object[][]{
+            // Begins with ;
+            {";a"},
+            // Ends with ;
+            {"a;"},
+            // Contains adjacent internal ;
+            {"a;;b"}
+        };
+    }
+
+    @Test(dataProvider = "illegalFilterSeparatorPlacementProvider", expectedExceptions = TribbleException.class)
+    public void testRejectIllegalFilterSeparatorPlacement(final String filter) {
+        final GenotypeBuilder gb = new GenotypeBuilder("test");
+        gb.filters(filter);
+    }
+
+    @Test(expectedExceptions = TribbleException.class)
+    public void testRejectMissingWithValueFilterString() {
+        final GenotypeBuilder gb = new GenotypeBuilder("test");
+        gb.filters("a;.");
+    }
+
+    @Test
+    public void testAcceptMissingFilterString() {
+        final GenotypeBuilder gb = new GenotypeBuilder("test");
+        gb.filters(".");
+    }
+
     @Test
     public void testMakeWithShallowCopy() {
         final GenotypeBuilder gb = new GenotypeBuilder("test");
diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantContextBuilderTest.java b/src/test/java/htsjdk/variant/variantcontext/VariantContextBuilderTest.java
index c8871bd2be..fdd95e1e14 100644
--- a/src/test/java/htsjdk/variant/variantcontext/VariantContextBuilderTest.java
+++ b/src/test/java/htsjdk/variant/variantcontext/VariantContextBuilderTest.java
@@ -1,5 +1,6 @@
 package htsjdk.variant.variantcontext;
 
+import htsjdk.tribble.TribbleException;
 import htsjdk.variant.VariantBaseTest;
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
@@ -258,7 +259,7 @@ public static Object[][] illegalFilterStrings() {
         };
     }
 
-    @Test(dataProvider = "illegalFilterStrings", expectedExceptions = IllegalStateException.class)
+    @Test(dataProvider = "illegalFilterStrings", expectedExceptions = TribbleException.class)
     public void testFilterCannotUseBadFilters(final String filter) {
         final Set<String> filters = new HashSet<>();
         filters.add(filter);
@@ -322,7 +323,7 @@ public void testCanResetFilters() {
         builder.filter("mayIPlease?");
     }
 
-    @Test(expectedExceptions = IllegalStateException.class)
+    @Test(expectedExceptions = TribbleException.class)
     public void testCantCreateNullFilter(){
         final VariantContextBuilder builder = new VariantContextBuilder("source", "contig", 1, 1, Arrays.asList(Tref, C, G)).filter("TEST");
         builder.filters((String)null);
diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantContextUnitTest.java b/src/test/java/htsjdk/variant/variantcontext/VariantContextUnitTest.java
index 8613be1e01..085bf6d10e 100644
--- a/src/test/java/htsjdk/variant/variantcontext/VariantContextUnitTest.java
+++ b/src/test/java/htsjdk/variant/variantcontext/VariantContextUnitTest.java
@@ -41,6 +41,7 @@
 import htsjdk.variant.vcf.VCFConstants;
 import htsjdk.variant.vcf.VCFFileReader;
 
+import htsjdk.variant.vcf.VCFHeader;
 import org.testng.Assert;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.BeforeSuite;
@@ -1200,7 +1201,7 @@ private VariantContext createTestVariantContext(final List<Allele> alleles, fina
         // most of the fields are not important to the tests, we just need alleles and gc set properly
         return new VariantContext("genotypes", VCFConstants.EMPTY_ID_FIELD, snpLoc, snpLocStart, snpLocStop, alleles,
                 gc, VariantContext.NO_LOG10_PERROR, filters, attributes,
-                fullyDecoded, toValidate);
+                fullyDecoded, VCFHeader.DEFAULT_VCF_VERSION, toValidate);
     }
 
     // validateReferenceBases: PASS conditions
@@ -1296,7 +1297,7 @@ private VariantContext createTestVariantContextRsIds(final String rsId) {
 
         return new VariantContext("genotypes", rsId, snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T),
                 GenotypesContext.NO_GENOTYPES, VariantContext.NO_LOG10_PERROR, filters, attributes,
-                fullyDecoded, toValidate);
+                fullyDecoded, VCFHeader.DEFAULT_VCF_VERSION, toValidate);
     }
     private Set<String> makeRsIDsSet(final String... rsIds) {
         return new HashSet<>(Arrays.asList(rsIds));
diff --git a/src/test/java/htsjdk/variant/vcf/VCFCodec43FeaturesTest.java b/src/test/java/htsjdk/variant/vcf/VCFCodec43FeaturesTest.java
index 8dbf6dd30d..d11af08105 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFCodec43FeaturesTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFCodec43FeaturesTest.java
@@ -10,6 +10,10 @@
 import htsjdk.variant.VariantBaseTest;
 import htsjdk.variant.variantcontext.Allele;
 import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.writer.Options;
+import htsjdk.variant.variantcontext.writer.VCFVersionUpgradePolicy;
+import htsjdk.variant.variantcontext.writer.VariantContextWriter;
+import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
@@ -39,6 +43,11 @@ public class VCFCodec43FeaturesTest extends VariantBaseTest {
     private static final Path TEST_43_UTF8_FILE = TEST_PATH.resolve("all43Features.utf8.vcf");
     private static final Path TEST_43_UTF8_GZ_FILE = TEST_PATH.resolve("all43FeaturesCompressed.utf8.vcf.gz");
 
+    private static final Path TEST_42_PEDIGREE_FILE = TEST_PATH.resolve("42Pedigree.vcf");
+    private static final Path TEST_INVALID_43_CONTIG_NAME_FILE = TEST_PATH.resolve("invalid43ContigName.vcf");
+    private static final Path TEST_VALID_43_CONTIG_NAME_FILE = TEST_PATH.resolve("valid43ContigName.vcf");
+    private static final Path TEST_42_AUTOMATICALLY_CONVERTIBLE_FILE = TEST_PATH.resolve("42AutomaticallyConvertible.vcf");
+
     @DataProvider(name="all43Files")
     private Object[][] allVCF43Files() {
         return new Object[][] {
@@ -59,7 +68,7 @@ private Object[][] allVCF43Files() {
     }
 
     @Test(dataProvider="all43Files")
-    public void testReadAllVCF43Features(final Path testFile, int expectedHeaderLineCount) {
+    public void testReadAllVCF43Features(final Path testFile, final int expectedHeaderLineCount) {
         final Tuple<VCFHeader, List<VariantContext>> entireVCF = readEntireVCFIntoMemory(testFile);
 
         Assert.assertEquals(entireVCF.a.getMetaDataInInputOrder().size(), expectedHeaderLineCount);
@@ -142,7 +151,7 @@ public void testVCF43PercentEncoding(final Path testFile, int ignored) {
 
         // 1       327     .       T       <*>     666.18  GATK_STANDARD;HARD_TO_VALIDATE
         // AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=fil%3AteredInBoth
-        final VariantContext vc = entireVCF.b.get(0);
+        final VariantContext vc = entireVCF.b.get(0).fullyDecode(entireVCF.a, false);
         Assert.assertEquals(vc.getContig(), "1");
         Assert.assertEquals(vc.getStart(), 327);
         // set=fil%3AteredInBoth
@@ -166,6 +175,81 @@ public void testSymbolicAlternateAllele(final Path testFile, int ignored) {
         Assert.assertEquals(symbolicAlternateAllele, Allele.create(Allele.UNSPECIFIED_ALTERNATE_ALLELE_STRING));
     }
 
+    @Test(dataProvider = "all43Files")
+    public void testReadWriteRoundTrip(final Path testFile, final int ignored) throws IOException {
+        // Make sure 4.3 files round trip through reading into memory, writing, then reading back in
+        final Tuple<VCFHeader, List<VariantContext>> readVCF = readEntireVCFIntoMemory(testFile);
+        final VCFHeader readHeader = readVCF.a;
+
+        final File out = File.createTempFile("testReadWriteRoundTrip", testFile.getFileName().toString());
+        out.deleteOnExit();
+
+        final VariantContextWriter writer = new VariantContextWriterBuilder()
+            .setOutputFile(out)
+            .unsetOption(Options.INDEX_ON_THE_FLY)
+            .unsetOption(Options.DO_NOT_WRITE_GENOTYPES)
+            .build();
+
+        writer.writeHeader(readHeader);
+        for (final VariantContext vc : readVCF.b) {
+            writer.add(vc.fullyDecode(readHeader, false));
+        }
+
+        writer.close();
+
+        final Tuple<VCFHeader, List<VariantContext>> writeVCF = readEntireVCFIntoMemory(out.toPath());
+        final VCFHeader writeHeader = writeVCF.a;
+
+        Assert.assertNotNull(readHeader.getVCFHeaderVersion());
+        Assert.assertNotNull(writeHeader.getVCFHeaderVersion());
+
+        Assert.assertEquals(readHeader.getMetaDataInSortedOrder(), writeHeader.getMetaDataInSortedOrder());
+        Assert.assertEquals(readHeader.getInfoHeaderLines(), writeHeader.getInfoHeaderLines());
+        Assert.assertEquals(readHeader.getFormatHeaderLines(), writeHeader.getFormatHeaderLines());
+
+        Assert.assertEqualsNoOrder(readHeader.getFilterLines().toArray(), writeHeader.getFilterLines().toArray());
+        Assert.assertEqualsNoOrder(readHeader.getContigLines().toArray(), writeHeader.getContigLines().toArray());
+
+        for (int i = 0; i < writeVCF.b.size(); i++) {
+            VariantBaseTest.assertVariantContextsAreEqual(
+                writeVCF.b.get(i).fullyDecode(writeHeader, false),
+                readVCF.b.get(i).fullyDecode(readHeader, false)
+            );
+        }
+    }
+
+    @DataProvider(name = "automaticUpConversionTestFiles")
+    private Object[][] automaticUpConversionTestFiles() {
+        return new Object[][]{
+            {TEST_42_PEDIGREE_FILE, VCFHeaderVersion.VCF4_2},
+            {TEST_INVALID_43_CONTIG_NAME_FILE, VCFHeaderVersion.VCF4_2},
+            {TEST_VALID_43_CONTIG_NAME_FILE, VCFHeaderVersion.VCF4_3},
+            {TEST_42_AUTOMATICALLY_CONVERTIBLE_FILE, VCFHeaderVersion.VCF4_3}
+        };
+    }
+
+    @Test(dataProvider = "automaticUpConversionTestFiles")
+    public void testAutomaticUpConversion(final Path testFile, final VCFHeaderVersion expectedVersion) throws IOException {
+        // Pre 4.3 files which can be automatically converted to 4.3 should be
+        // and files which cannot should be left as 4.2
+        final Tuple<VCFHeader, List<VariantContext>> readVCF = readEntireVCFIntoMemory(testFile);
+
+        final File out = File.createTempFile("test", testFile.getFileName().toString());
+        out.deleteOnExit();
+
+        final VariantContextWriter writer = new VariantContextWriterBuilder()
+            .setOutputFile(out)
+            .unsetOption(Options.INDEX_ON_THE_FLY)
+            .unsetOption(Options.DO_NOT_WRITE_GENOTYPES)
+            .build();
+
+        writer.writeHeader(readVCF.a);
+        writer.close();
+
+        final Tuple<VCFHeader, List<VariantContext>> writeVCF = readEntireVCFIntoMemory(out.toPath());
+        Assert.assertEquals(writeVCF.a.getVCFHeaderVersion(), expectedVersion);
+    }
+
     @DataProvider(name="all43IndexableFiles")
     private Object[][] allVCF43IndexableFiles() {
         return new Object[][] {
@@ -273,5 +357,4 @@ private static List<VCFHeaderLine> getIDHeaderLinesWithKey(final VCFHeader heade
                         .collect(Collectors.toList());
         return headerLines;
     }
-
 }
diff --git a/src/test/java/htsjdk/variant/vcf/VCFCompoundHeaderLineUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFCompoundHeaderLineUnitTest.java
index 96924b4e3a..2ba980fbb9 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFCompoundHeaderLineUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFCompoundHeaderLineUnitTest.java
@@ -91,8 +91,7 @@ public Object[][] getInvalidLines() {
 
     @Test(dataProvider = "invalidIDs", expectedExceptions = TribbleException.VersionValidationFailure.class)
     public void testGetValidationError(final String lineString) {
-        // TODO change to VCFHeader.DEFAULT_VCF_VERSION
-        new VCFInfoHeaderLine(lineString, VCFHeaderVersion.VCF4_3);
+        new VCFInfoHeaderLine(lineString, VCFHeader.DEFAULT_VCF_VERSION);
     }
 
     @DataProvider (name = "headerLineTypes")
diff --git a/src/test/java/htsjdk/variant/vcf/VCFContigHeaderLineUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFContigHeaderLineUnitTest.java
index ad33575bef..8c4ef944f5 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFContigHeaderLineUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFContigHeaderLineUnitTest.java
@@ -49,8 +49,7 @@ public Object[][] getInvalidIDs() {
 
     @Test(dataProvider = "invalidIDs", expectedExceptions = TribbleException.VersionValidationFailure.class)
     public void testInvalidIDs(final String lineString) {
-        // TODO change to VCFHeader.DEFAULT_VCF_VERSION
-        new VCFContigHeaderLine(lineString, VCFHeaderVersion.VCF4_3, 1);
+        new VCFContigHeaderLine(lineString, VCFHeader.DEFAULT_VCF_VERSION, 1);
     }
 
     @Test(expectedExceptions=TribbleException.class)
diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderLineUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderLineUnitTest.java
index d5d7e47ec9..2cd81e7ef9 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFHeaderLineUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderLineUnitTest.java
@@ -100,11 +100,6 @@ public void testInvalidKeys(final String testKey) {
         new VCFHeaderLine(testKey, "");
     }
 
-    @Test(dataProvider = "invalidHeaderLineKeys", expectedExceptions=TribbleException.class)
-    public void testValidateAsIdInvalid(final String testKey) {
-        VCFHeaderLine.validateKeyOrID(testKey, "test");
-    }
-
     @DataProvider(name = "vcfVersions")
     public Object[][] vcfVersions() {
         return new Object[][]{
diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
index 9f51901f91..188375ba58 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
@@ -27,7 +27,6 @@
 
 import htsjdk.samtools.SAMSequenceDictionary;
 import htsjdk.samtools.SAMSequenceRecord;
-import htsjdk.samtools.util.CloseableIterator;
 import htsjdk.samtools.util.FileExtensions;
 import htsjdk.samtools.util.TestUtil;
 import htsjdk.tribble.TribbleException;
@@ -38,16 +37,29 @@
 import htsjdk.variant.VariantBaseTest;
 import htsjdk.variant.variantcontext.VariantContext;
 import htsjdk.variant.variantcontext.writer.Options;
+import htsjdk.variant.variantcontext.writer.VCFVersionUpgradePolicy;
 import htsjdk.variant.variantcontext.writer.VariantContextWriter;
 import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
-import java.io.*;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
 import java.util.stream.Collectors;
 
 public class VCFHeaderUnitTest extends VariantBaseTest {
@@ -77,7 +89,7 @@ public void test42FileRoundtrip() throws Exception {
         final File actualFile = File.createTempFile("testVcf4.2roundtrip.", FileExtensions.VCF);
         actualFile.deleteOnExit();
 
-        try (final VCFFileReader originalFileReader = new VCFFileReader(expectedFile, false);
+        try (final VCFFileReader originalFileReader = new VCFFileReader(expectedFile.toPath(), false, VCFVersionUpgradePolicy.DO_NOT_UPGRADE);
              final VariantContextWriter copyWriter = new VariantContextWriterBuilder()
                      .setOutputFile(actualFile)
                      .setReferenceDictionary(createArtificialSequenceDictionary())
@@ -289,9 +301,11 @@ public void testGetContigLinesHonorsSortOrder() {
             Assert.assertTrue(originalContigsInSortedOrder.size() > 0);
 
             // copy the contig lines to a new list
-            final List<VCFContigHeaderLine> confoundedList = new ArrayList<>();
             final int midPoint = originalContigsInSortedOrder.size() / 2;
-            confoundedList.addAll(originalContigsInSortedOrder.subList(0, midPoint));
+            final List<VCFContigHeaderLine> confoundedList = new ArrayList<>(originalContigsInSortedOrder.subList(
+                0,
+                midPoint
+            ));
 
             // deliberately stick an extra contig line in the middle of the list, but using a contig index
             // that will cause the line to sort to the end
@@ -312,7 +326,7 @@ public void testGetContigLinesHonorsSortOrder() {
             // create a new header from the confounded list, call getContigLines() on the header, and validate
             // that the new line is included in the resulting list, and is at the end
             final VCFHeader newHeader = new VCFHeader();
-            confoundedList.forEach(hl -> newHeader.addMetaDataLine(hl));
+            confoundedList.forEach(newHeader::addMetaDataLine);
             final List<VCFContigHeaderLine> roundTrippedLines = newHeader.getContigLines();
             Assert.assertEquals(roundTrippedLines.size(), originalContigsInSortedOrder.size() + 1);
             Assert.assertEquals(roundTrippedLines.get(roundTrippedLines.size() - 1), newContigLine);
@@ -453,6 +467,8 @@ public Object[][] validHeaderVersionTransitions() {
 
     @DataProvider(name="invalidHeaderVersionTransitions")
     public Object[][] invalidHeaderVersionTransitions() {
+        // v4.3 can never be transitioned down to pre v4.3
+        // Pre v4.3 might be able to be transitioned to 4.3, and this is tested in VCFCodec43FeaturesTest
         return new Object[][] {
                 //reject any attempt to go backwards in time
                 {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_2},
@@ -602,12 +618,11 @@ public void testAddMetaDataLineFileFormat() {
 
     @Test
     public void testFileFormatLineFirstInSet() {
-        final Set<VCFHeaderLine> orderedLineSet = new LinkedHashSet<>();
-        orderedLineSet.addAll(VCFHeaderUnitTestData.getV42HeaderLinesWITHOUTFormatString());
-        orderedLineSet.stream().forEach(l -> Assert.assertFalse(VCFHeaderVersion.isFormatString(l.getKey())));
+        final Set<VCFHeaderLine> orderedLineSet = new LinkedHashSet<>(VCFHeaderUnitTestData.getV42HeaderLinesWITHOUTFormatString());
+        orderedLineSet.forEach(l -> Assert.assertFalse(VCFHeaderVersion.isFormatString(l.getKey())));
         // add the file format line last
         orderedLineSet.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
-        final VCFHeader vcfHeader = new VCFHeader(orderedLineSet, Collections.EMPTY_SET);
+        final VCFHeader vcfHeader = new VCFHeader(orderedLineSet, Collections.emptySet());
 
         final Collection<VCFHeaderLine> inputOrderLines = vcfHeader.getMetaDataInInputOrder();
         final Optional<VCFHeaderLine> optFirstInputOrderLine = inputOrderLines.stream().findFirst();
@@ -708,9 +723,7 @@ public void testVCFHeaderQuoteEscaping() throws Exception {
                 .setOptions(EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER, Options.INDEX_ON_THE_FLY))
                 .build();
         firstCopyWriter.writeHeader(originalHeader);
-        final CloseableIterator<VariantContext> firstCopyVariantIterator = originalFileReader.iterator();
-        while (firstCopyVariantIterator.hasNext()) {
-            final VariantContext variantContext = firstCopyVariantIterator.next();
+        for (final VariantContext variantContext : originalFileReader) {
             firstCopyWriter.add(variantContext);
         }
         originalFileReader.close();
@@ -751,9 +764,7 @@ public void testVCFHeaderQuoteEscaping() throws Exception {
                 .setOptions(EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER, Options.INDEX_ON_THE_FLY))
                 .build();
         secondCopyWriter.writeHeader(firstCopyHeader);
-        final CloseableIterator<VariantContext> secondCopyVariantIterator = firstCopyReader.iterator();
-        while (secondCopyVariantIterator.hasNext()) {
-            final VariantContext variantContext = secondCopyVariantIterator.next();
+        for (final VariantContext variantContext : firstCopyReader) {
             secondCopyWriter.add(variantContext);
         }
         secondCopyWriter.close();
@@ -802,23 +813,58 @@ public void testVCFHeaderQuoteEscaping() throws Exception {
     /////////////////////////////////////////////////////////////////////
 
     // Serialize/encode the header to a file, read metaData back in
-    private Set<VCFHeaderLine> getRoundTripEncoded(final VCFHeader header) throws IOException {
+    private static Set<VCFHeaderLine> getRoundTripEncoded(final VCFHeader header) throws IOException {
         final File myTempFile = File.createTempFile("VCFHeader", "vcf");
-        try (final VariantContextWriter vcfWriter =
-                     new VariantContextWriterBuilder()
-                             .setOutputFile(myTempFile)
-                             .setOutputFileType(VariantContextWriterBuilder.OutputType.VCF)
-                             .setOptions(VariantContextWriterBuilder.NO_OPTIONS)
-                             .build()) {
+        try (final VariantContextWriter vcfWriter = new VariantContextWriterBuilder()
+                .setOutputFile(myTempFile)
+                .setOutputFileType(VariantContextWriterBuilder.OutputType.VCF)
+                .setOptions(VariantContextWriterBuilder.NO_OPTIONS)
+                .build()
+        ) {
             vcfWriter.writeHeader(header);
         }
-        final VCFHeader vcfHeader = (VCFHeader) new VCFCodec().readActualHeader(new LineIteratorImpl(
+        final VCFCodec codec = new VCFCodec();
+        codec.setVersionUpgradePolicy(VCFVersionUpgradePolicy.DO_NOT_UPGRADE);
+        final VCFHeader vcfHeader = (VCFHeader) codec.readActualHeader(new LineIteratorImpl(
                 new SynchronousLineReader(new FileReader(myTempFile.getAbsolutePath()))));
         return vcfHeader.getMetaDataInSortedOrder();
     }
 
+    @Test
+    public void testVcf42Roundtrip() throws Exception {
+        // this test ensures that source/version fields are round-tripped properly
+
+        // read an existing VCF
+        final File expectedFile = new File("src/test/resources/htsjdk/variant/Vcf4.2WithSourceVersionInfoFields.vcf");
+
+        // write the file out into a new copy
+        final File actualFile = File.createTempFile("testVcf4.2roundtrip.", FileExtensions.VCF);
+        actualFile.deleteOnExit();
+
+        try (final VCFFileReader originalFileReader = new VCFFileReader(expectedFile, false);
+             final VariantContextWriter copyWriter = new VariantContextWriterBuilder()
+                     .setOutputFile(actualFile)
+                     .setReferenceDictionary(createArtificialSequenceDictionary())
+                     .setOptions(EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER, Options.INDEX_ON_THE_FLY))
+                     .build()
+        ) {
+            final VCFHeader originalHeader = originalFileReader.getFileHeader();
+
+            copyWriter.writeHeader(originalHeader);
+            for (final VariantContext variantContext : originalFileReader) {
+                copyWriter.add(variantContext);
+            }
+        }
+
+        final String actualContents = new String(Files.readAllBytes(actualFile.toPath()), StandardCharsets.UTF_8);
+        final String expectedContents = new String(Files.readAllBytes(expectedFile.toPath()), StandardCharsets.UTF_8);
+        Assert.assertEquals(actualContents.substring(actualContents.indexOf('\n')), expectedContents.substring(actualContents.indexOf('\n')));
+    }
+
+    private static final int VCF4headerStringCount = 16; // 17 -1 for the #CHROM... line
+
 
-    private VCFHeader getHiSeqVCFHeader() {
+    private static VCFHeader getHiSeqVCFHeader() {
         final File vcf = new File("src/test/resources/htsjdk/variant/HiSeq.10000.vcf");
         final VCFFileReader reader = new VCFFileReader(vcf, false);
         final VCFHeader header = reader.getFileHeader();
diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTestData.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTestData.java
index 286fcecfa6..6c197f1c30 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTestData.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTestData.java
@@ -3,6 +3,7 @@
 import htsjdk.tribble.TribbleException;
 import htsjdk.tribble.readers.LineIteratorImpl;
 import htsjdk.tribble.readers.SynchronousLineReader;
+import htsjdk.variant.variantcontext.writer.VCFVersionUpgradePolicy;
 import org.testng.Assert;
 
 import java.io.StringReader;
@@ -169,6 +170,7 @@ public static Set<VCFHeaderLine> getV42HeaderLinesWITHFormatString() {
 
     public static VCFHeader createHeaderFromString(final String headerStr) {
         final VCFCodec codec = new VCFCodec();
+        codec.setVersionUpgradePolicy(VCFVersionUpgradePolicy.DO_NOT_UPGRADE);
         final VCFHeader header = (VCFHeader) codec.readActualHeader(
                 new LineIteratorImpl(new SynchronousLineReader(new StringReader(headerStr))));
         Assert.assertEquals(header.getMetaDataInInputOrder().size(), VCF_4_HEADER_STRING_COUNT);
diff --git a/src/test/java/htsjdk/variant/vcf/VCFInfoHeaderLineUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFInfoHeaderLineUnitTest.java
index 9e2a82f15a..0ea2c8f1e8 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFInfoHeaderLineUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFInfoHeaderLineUnitTest.java
@@ -69,8 +69,7 @@ public void testAllow1000GKey() {
             VCFHeader.DEFAULT_VCF_VERSION
         );
 
-        // TODO change to VCFHeader.DEFAULT_VCF_VERSION
-        Assert.assertFalse(line.getValidationFailure(VCFHeaderVersion.VCF4_3).isPresent());
+        Assert.assertFalse(line.getValidationFailure(VCFHeader.DEFAULT_VCF_VERSION).isPresent());
     }
 
     @Test(dataProvider = "mergeIncompatibleInfoLines", expectedExceptions= TribbleException.class)
diff --git a/src/test/java/htsjdk/variant/vcf/VCFMetaDataLinesUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFMetaDataLinesUnitTest.java
index f79331a7eb..f88f0fd0ba 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFMetaDataLinesUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFMetaDataLinesUnitTest.java
@@ -70,6 +70,25 @@ public void testKeyCollisions(final VCFHeaderLine line1, final VCFHeaderLine lin
         Assert.assertEquals(mdLines.getMetaDataInInputOrder().size(), expectCollision ? 1 : 2);
     }
 
+    @DataProvider(name = "contigALTCollisions")
+    public Object[][] contigALTCollisions() {
+        return new Object[][] {
+            {
+                new VCFContigHeaderLine("<ID=X>", VCFHeader.DEFAULT_VCF_VERSION, 0), new VCFAltHeaderLine("<ID=X>", VCFHeader.DEFAULT_VCF_VERSION)
+            },
+            {
+                new VCFAltHeaderLine("<ID=X>", VCFHeader.DEFAULT_VCF_VERSION), new VCFContigHeaderLine("<ID=X>", VCFHeader.DEFAULT_VCF_VERSION, 0)
+            },
+        };
+    }
+
+    @Test(dataProvider = "contigALTCollisions", expectedExceptions = IllegalStateException.class)
+    public void testContigALTCollision(final VCFHeaderLine line1, final VCFHeaderLine line2) {
+        final VCFMetaDataLines mdLines = new VCFMetaDataLines();
+        mdLines.addMetaDataLine(line1);
+        mdLines.addMetaDataLine(line2);
+    }
+
     @Test
     public void testRetainFullHeaderLines() {
         final VCFHeaderUnitTestData unitTestData = new VCFHeaderUnitTestData();
diff --git a/src/test/java/htsjdk/variant/vcf/VCFStandardHeaderLinesUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFStandardHeaderLinesUnitTest.java
index 45009ce211..38a8c983f7 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFStandardHeaderLinesUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFStandardHeaderLinesUnitTest.java
@@ -190,7 +190,9 @@ public Object[][] makeRepairHeaderTest() {
     @Test(dataProvider = "RepairHeaderTest")
     public void testRepairHeaderTest(final RepairHeaderTest cfg) {
         final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>();
-        headerLines.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
+        // The standard header line repair facility is not sufficiently powerful to fix broken lines
+        // starting from version 4.3, so it is only used for versions <= 4.2, and we use version 4.2 for this test
+        headerLines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_2.getFormatString(), VCFHeaderVersion.VCF4_2.getVersionString()));
         headerLines.add(cfg.original);
 
         final VCFHeader toRepair = new VCFHeader(headerLines);
diff --git a/src/test/java/htsjdk/variant/vcf/VCFTextTransformerTest.java b/src/test/java/htsjdk/variant/vcf/VCFTextTransformerTest.java
index 8bb9927de0..0f7d9f5963 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFTextTransformerTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFTextTransformerTest.java
@@ -1,56 +1,75 @@
 package htsjdk.variant.vcf;
 
 import htsjdk.HtsjdkTest;
-import htsjdk.tribble.TribbleException;
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
+import java.util.Arrays;
+import java.util.stream.Stream;
+
 public class VCFTextTransformerTest extends HtsjdkTest {
 
-    @DataProvider(name="validPercentEncodings")
+    @DataProvider(name = "validPercentEncodings")
     public Object[][] validPercentEncodings() {
-        return new Object[][] {
-                { "", ""},
-                { "%3A", ":"},
-                { "%3B", ";"},
-                { "%3D", "="},
-                { "%25", "%"},
-                { "%2C", ","},
-                { "%0D", "\r"},
-                { "%0A", "\n"},
-                { "%09", "\t"},
-                { "%3AA", ":A"},
-                { "abc%3A", "abc:"},
-                { "%3Aabc", ":abc"},
-                { "%3Aabc%3A", ":abc:"},
-
-                // valid text containing % encodings that are not valid, and are passed through in raw form (no decoding)
-                { "%3", "%3"},
-                { "%d", "%d"},
-                { "%a", "%a"},
-                { "abcdefg%", "abcdefg%"},
-                { "%3Aabcdefg%", ":abcdefg%"},
-                { "abcdefg%0", "abcdefg%0"},
-                { "abcdefg%1", "abcdefg%1"},
-                { "abcdefg%a", "abcdefg%a"},
-                { "abcdefg%d", "abcdefg%d"},
-                { "abcdefg%g", "abcdefg%g"},
-                { "abcdefg%gg", "abcdefg%gg"},
-                { "abcdefg%-1", "abcdefg%-1"},
+        return new Object[][]{
+            {"", ""},
+            {"%3A", ":"},
+            {"%3B", ";"},
+            {"%3D", "="},
+            {"%25", "%"},
+            {"%2C", ","},
+            {"%0D", "\r"},
+            {"%0A", "\n"},
+            {"%09", "\t"},
+            {"%3AA", ":A"},
+            {"abc%3A", "abc:"},
+            {"%3Aabc", ":abc"},
+            {"%3Aabc%3A", ":abc:"},
         };
     }
 
-    @Test(dataProvider="validPercentEncodings")
-    public void testDecodeValidEncodings(final String rawText, final String decodedText) {
+    @DataProvider(name = "truncatedPercentEncodings")
+    public Object[][] truncatedPercentEncodings() {
+        return new Object[][]{
+            // valid text containing % encodings that are not valid, and are passed through in raw form (no decoding)
+            {"%3", "%3"},
+            {"%d", "%d"},
+            {"%a", "%a"},
+            {"abcdefg%", "abcdefg%"},
+            {"%3Aabcdefg%", ":abcdefg%"},
+            {"abcdefg%0", "abcdefg%0"},
+            {"abcdefg%1", "abcdefg%1"},
+            {"abcdefg%a", "abcdefg%a"},
+            {"abcdefg%d", "abcdefg%d"},
+            {"abcdefg%g", "abcdefg%g"},
+            {"abcdefg%gg", "abcdefg%gg"},
+            {"abcdefg%-1", "abcdefg%-1"},
+        };
+    }
+
+    @DataProvider(name = "allPercentEncodings")
+    public Object[][] allPercentEncodings() {
+        return Stream.concat(Arrays.stream(validPercentEncodings()), Arrays.stream(truncatedPercentEncodings()))
+            .toArray(Object[][]::new);
+    }
+
+    @Test(dataProvider = "allPercentEncodings")
+    public void testDecodeValidEncodings(final String encodedText, final String decodedText) {
         final VCFTextTransformer vcfTextTransformer = new VCFPercentEncodedTextTransformer();
-        Assert.assertEquals(vcfTextTransformer.decodeText(rawText), decodedText);
+        Assert.assertEquals(vcfTextTransformer.decodeText(encodedText), decodedText);
     }
 
-    @Test(dataProvider = "validPercentEncodings")
-    public void testPassThruValidEncodings(final String rawText, final String unused) {
+    @Test(dataProvider = "allPercentEncodings")
+    public void testPassThruValidEncodings(final String encodedText, final String unused) {
         final VCFPassThruTextTransformer vcfPassThruTransformer = new VCFPassThruTextTransformer();
-        Assert.assertEquals(vcfPassThruTransformer.decodeText(rawText), rawText);
+        Assert.assertEquals(vcfPassThruTransformer.decodeText(encodedText), encodedText);
     }
 
+    @Test(dataProvider = "validPercentEncodings")
+    public void testInverseComposition(final String encodedText, final String decodedText) {
+        final VCFTextTransformer vcfTextTransformer = new VCFPercentEncodedTextTransformer();
+        Assert.assertEquals(vcfTextTransformer.encodeText(vcfTextTransformer.decodeText(encodedText)), encodedText);
+        Assert.assertEquals(vcfTextTransformer.decodeText(vcfTextTransformer.encodeText(decodedText)), decodedText);
+    }
 }
diff --git a/src/test/resources/htsjdk/variant/diagnosis_targets_testfile.vcf b/src/test/resources/htsjdk/variant/diagnosis_targets_testfile.vcf
index fbe8d1e405..9f96ce09ed 100644
--- a/src/test/resources/htsjdk/variant/diagnosis_targets_testfile.vcf
+++ b/src/test/resources/htsjdk/variant/diagnosis_targets_testfile.vcf
@@ -14,7 +14,7 @@
 ##FORMAT=<ID=MED,Number=1,Type=Float,Description="Median of depth distribution.">
 ##FORMAT=<ID=Q1,Number=1,Type=Float,Description="Lower Quartile of depth distribution.">
 ##FORMAT=<ID=Q3,Number=1,Type=Float,Description="Upper Quartile of depth Distribution.">
-##INFO=<ID="Diagnose Targets",Number=0,Type=Flag,Description="DiagnoseTargets mode">
+##INFO=<ID="DiagnoseTargets",Number=0,Type=Flag,Description="DiagnoseTargets mode">
 ##INFO=<ID=AVG_INTERVAL_DP,Number=1,Type=Float,Description="Average depth across the interval. Sum of the depth in a loci divided by interval size.">
 ##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
 ##contig=<ID=1,length=249250621,assembly=b37>
diff --git a/src/test/resources/htsjdk/variant/vcf43/42AutomaticallyConvertible.vcf b/src/test/resources/htsjdk/variant/vcf43/42AutomaticallyConvertible.vcf
new file mode 100644
index 0000000000..1d248d2ae9
--- /dev/null
+++ b/src/test/resources/htsjdk/variant/vcf43/42AutomaticallyConvertible.vcf
@@ -0,0 +1,90 @@
+##fileformat=VCFv4.2
+##COMMENT=This file has 0 embedded UTF8 characters, but we need this fake comment line to keep the file aligned with it's utf8 companion test file.
+##ALT=<ID=DEL,Description="Deletion",ExtraAltField="extra alt">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INS,Description="Insertion">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries">
+##ALT=<ID=TRA,Description="Translocation">
+##FILTER=<ID=GATK_STANDARD,Description="Standard GATK filter",ExtraFilterField="extra filter field">
+##FILTER=<ID=HARD_TO_VALIDATE,Description="Hard to validate">
+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed",ExtraFormatField="extra format">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth (only filtered reads used for calling)">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##INFO=<ID=AB,Number=1,Type=Float,Description="Allele Balance for hets (ref/(ref+alt))",ExtraInfoField="extra info">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref base qualities">
+##INFO=<ID=BaseQRankSumZ,Number=1,Type=Float,Description="Z-score From Wilcoxon Rank Sum Test of Alt Vs. Ref base qualities">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">
+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref read mapping qualities">
+##INFO=<ID=MQRankSumZ,Number=1,Type=Float,Description="Z-score From Wilcoxon Rank Sum Test of Alt Vs. Ref read mapping qualities">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref read position bias">
+##INFO=<ID=ReadPosRankSumZ,Number=1,Type=Float,Description="Z-score From Wilcoxon Rank Sum Test of Alt Vs. Ref read position bias">
+##INFO=<ID=SB,Number=1,Type=Float,Description="Strand Bias">
+##INFO=<ID=set,Number=1,Type=String,Description="Source VCF for the merged record in CombineVariants">
+##SAMPLE=<ID=NA19238,Assay=WholeGenome,Ethnicity=AFR,Disease=None,Description="Test NA19238 SAMPLE header line",DOI=http://someurl,ExtraSampleField="extra sample">
+##SAMPLE=<ID=NA19239,Assay=WholeGenome,Ethnicity=AFR,Disease=None,Description="Test NA19239 Test SAMPLE header line",DOI=http://someurl>
+##SAMPLE=<ID=NA19240,Assay=WholeGenome,Ethnicity=AFR,Disease=None,Description="Test NA19240 SAMPLE header line",DOI=http://someurl>
+##contig=<ID=1,length=249250621,assembly=b37,extraContigField="extra contig field">
+##contig=<ID=10,length=135534747,assembly=b37>
+##contig=<ID=11,length=135006516,assembly=b37>
+##contig=<ID=12,length=133851895,assembly=b37>
+##contig=<ID=13,length=115169878,assembly=b37>
+##contig=<ID=14,length=107349540,assembly=b37>
+##contig=<ID=15,length=102531392,assembly=b37>
+##contig=<ID=16,length=90354753,assembly=b37>
+##contig=<ID=17,length=81195210,assembly=b37>
+##contig=<ID=18,length=78077248,assembly=b37>
+##contig=<ID=19,length=59128983,assembly=b37>
+##contig=<ID=2,length=243199373,assembly=b37>
+##contig=<ID=20,length=63025520,assembly=b37>
+##contig=<ID=21,length=48129895,assembly=b37>
+##contig=<ID=22,length=51304566,assembly=b37>
+##contig=<ID=3,length=198022430,assembly=b37>
+##contig=<ID=4,length=191154276,assembly=b37>
+##contig=<ID=5,length=180915260,assembly=b37>
+##contig=<ID=6,length=171115067,assembly=b37>
+##contig=<ID=7,length=159138663,assembly=b37>
+##contig=<ID=8,length=146364022,assembly=b37>
+##contig=<ID=9,length=141213431,assembly=b37>
+##contig=<ID=X,length=155270560,assembly=b37>
+##contig=<ID=Y,length=59373566,assembly=b37>
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA19238	NA19239	NA19240
+1	327	.	T	<*>	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=fil%3AteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+2	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+3	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+4	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+4	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+5	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+6	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+7	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+8	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+9	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+10	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+11	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+12	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+13	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+14	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+15	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+16	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+17	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+18	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+19	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+20	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+21	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+22	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+X	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+Y	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
diff --git a/src/test/resources/htsjdk/variant/vcf43/42Pedigree.vcf b/src/test/resources/htsjdk/variant/vcf43/42Pedigree.vcf
new file mode 100644
index 0000000000..c9689a922c
--- /dev/null
+++ b/src/test/resources/htsjdk/variant/vcf43/42Pedigree.vcf
@@ -0,0 +1,91 @@
+##fileformat=VCFv4.2
+##COMMENT=This file has 0 embedded UTF8 characters, but we need this fake comment line to keep the file aligned with it's utf8 companion test file.
+##ALT=<ID=DEL,Description="Deletion",ExtraAltField="extra alt">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INS,Description="Insertion">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries">
+##ALT=<ID=TRA,Description="Translocation">
+##FILTER=<ID=GATK_STANDARD,Description="Standard GATK filter",ExtraFilterField="extra filter field">
+##FILTER=<ID=HARD_TO_VALIDATE,Description="Hard to validate">
+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed",ExtraFormatField="extra format">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth (only filtered reads used for calling)">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##PEDIGREE=<Derived=NA12891, Original=NA12878>
+##INFO=<ID=AB,Number=1,Type=Float,Description="Allele Balance for hets (ref/(ref+alt))",ExtraInfoField="extra info">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref base qualities">
+##INFO=<ID=BaseQRankSumZ,Number=1,Type=Float,Description="Z-score From Wilcoxon Rank Sum Test of Alt Vs. Ref base qualities">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">
+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref read mapping qualities">
+##INFO=<ID=MQRankSumZ,Number=1,Type=Float,Description="Z-score From Wilcoxon Rank Sum Test of Alt Vs. Ref read mapping qualities">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref read position bias">
+##INFO=<ID=ReadPosRankSumZ,Number=1,Type=Float,Description="Z-score From Wilcoxon Rank Sum Test of Alt Vs. Ref read position bias">
+##INFO=<ID=SB,Number=1,Type=Float,Description="Strand Bias">
+##INFO=<ID=set,Number=1,Type=String,Description="Source VCF for the merged record in CombineVariants">
+##SAMPLE=<ID=NA19238,Assay=WholeGenome,Ethnicity=AFR,Disease=None,Description="Test NA19238 SAMPLE header line",DOI=http://someurl,ExtraSampleField="extra sample">
+##SAMPLE=<ID=NA19239,Assay=WholeGenome,Ethnicity=AFR,Disease=None,Description="Test NA19239 Test SAMPLE header line",DOI=http://someurl>
+##SAMPLE=<ID=NA19240,Assay=WholeGenome,Ethnicity=AFR,Disease=None,Description="Test NA19240 SAMPLE header line",DOI=http://someurl>
+##contig=<ID=1,length=249250621,assembly=b37,extraContigField="extra contig field">
+##contig=<ID=10,length=135534747,assembly=b37>
+##contig=<ID=11,length=135006516,assembly=b37>
+##contig=<ID=12,length=133851895,assembly=b37>
+##contig=<ID=13,length=115169878,assembly=b37>
+##contig=<ID=14,length=107349540,assembly=b37>
+##contig=<ID=15,length=102531392,assembly=b37>
+##contig=<ID=16,length=90354753,assembly=b37>
+##contig=<ID=17,length=81195210,assembly=b37>
+##contig=<ID=18,length=78077248,assembly=b37>
+##contig=<ID=19,length=59128983,assembly=b37>
+##contig=<ID=2,length=243199373,assembly=b37>
+##contig=<ID=20,length=63025520,assembly=b37>
+##contig=<ID=21,length=48129895,assembly=b37>
+##contig=<ID=22,length=51304566,assembly=b37>
+##contig=<ID=3,length=198022430,assembly=b37>
+##contig=<ID=4,length=191154276,assembly=b37>
+##contig=<ID=5,length=180915260,assembly=b37>
+##contig=<ID=6,length=171115067,assembly=b37>
+##contig=<ID=7,length=159138663,assembly=b37>
+##contig=<ID=8,length=146364022,assembly=b37>
+##contig=<ID=9,length=141213431,assembly=b37>
+##contig=<ID=X,length=155270560,assembly=b37>
+##contig=<ID=Y,length=59373566,assembly=b37>
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA19238	NA19239	NA19240
+1	327	.	T	<*>	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=fil%3AteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+2	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+3	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+4	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+4	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+5	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+6	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+7	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+8	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+9	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+10	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+11	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+12	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+13	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+14	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+15	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+16	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+17	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+18	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+19	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+20	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+21	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+22	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+X	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+Y	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
diff --git a/src/test/resources/htsjdk/variant/vcf43/invalid43ContigName.vcf b/src/test/resources/htsjdk/variant/vcf43/invalid43ContigName.vcf
new file mode 100644
index 0000000000..a9aac29ed0
--- /dev/null
+++ b/src/test/resources/htsjdk/variant/vcf43/invalid43ContigName.vcf
@@ -0,0 +1,90 @@
+##fileformat=VCFv4.2
+##COMMENT=This file has 0 embedded UTF8 characters, but we need this fake comment line to keep the file aligned with it's utf8 companion test file.
+##ALT=<ID=DEL,Description="Deletion",ExtraAltField="extra alt">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INS,Description="Insertion">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries">
+##ALT=<ID=TRA,Description="Translocation">
+##FILTER=<ID=GATK_STANDARD,Description="Standard GATK filter",ExtraFilterField="extra filter field">
+##FILTER=<ID=HARD_TO_VALIDATE,Description="Hard to validate">
+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed",ExtraFormatField="extra format">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth (only filtered reads used for calling)">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##INFO=<ID=AB,Number=1,Type=Float,Description="Allele Balance for hets (ref/(ref+alt))",ExtraInfoField="extra info">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref base qualities">
+##INFO=<ID=BaseQRankSumZ,Number=1,Type=Float,Description="Z-score From Wilcoxon Rank Sum Test of Alt Vs. Ref base qualities">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">
+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref read mapping qualities">
+##INFO=<ID=MQRankSumZ,Number=1,Type=Float,Description="Z-score From Wilcoxon Rank Sum Test of Alt Vs. Ref read mapping qualities">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref read position bias">
+##INFO=<ID=ReadPosRankSumZ,Number=1,Type=Float,Description="Z-score From Wilcoxon Rank Sum Test of Alt Vs. Ref read position bias">
+##INFO=<ID=SB,Number=1,Type=Float,Description="Strand Bias">
+##INFO=<ID=set,Number=1,Type=String,Description="Source VCF for the merged record in CombineVariants">
+##SAMPLE=<ID=NA19238,Assay=WholeGenome,Ethnicity=AFR,Disease=None,Description="Test NA19238 SAMPLE header line",DOI=http://someurl,ExtraSampleField="extra sample">
+##SAMPLE=<ID=NA19239,Assay=WholeGenome,Ethnicity=AFR,Disease=None,Description="Test NA19239 Test SAMPLE header line",DOI=http://someurl>
+##SAMPLE=<ID=NA19240,Assay=WholeGenome,Ethnicity=AFR,Disease=None,Description="Test NA19240 SAMPLE header line",DOI=http://someurl>
+##contig=<ID=1(,length=249250621,assembly=b37,extraContigField="extra contig field">
+##contig=<ID=10,length=135534747,assembly=b37>
+##contig=<ID=11,length=135006516,assembly=b37>
+##contig=<ID=12,length=133851895,assembly=b37>
+##contig=<ID=13,length=115169878,assembly=b37>
+##contig=<ID=14,length=107349540,assembly=b37>
+##contig=<ID=15,length=102531392,assembly=b37>
+##contig=<ID=16,length=90354753,assembly=b37>
+##contig=<ID=17,length=81195210,assembly=b37>
+##contig=<ID=18,length=78077248,assembly=b37>
+##contig=<ID=19,length=59128983,assembly=b37>
+##contig=<ID=2,length=243199373,assembly=b37>
+##contig=<ID=20,length=63025520,assembly=b37>
+##contig=<ID=21,length=48129895,assembly=b37>
+##contig=<ID=22,length=51304566,assembly=b37>
+##contig=<ID=3,length=198022430,assembly=b37>
+##contig=<ID=4,length=191154276,assembly=b37>
+##contig=<ID=5,length=180915260,assembly=b37>
+##contig=<ID=6,length=171115067,assembly=b37>
+##contig=<ID=7,length=159138663,assembly=b37>
+##contig=<ID=8,length=146364022,assembly=b37>
+##contig=<ID=9,length=141213431,assembly=b37>
+##contig=<ID=X,length=155270560,assembly=b37>
+##contig=<ID=Y,length=59373566,assembly=b37>
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA19238	NA19239	NA19240
+1	327	.	T	<*>	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=fil%3AteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+2	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+3	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+4	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+4	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+5	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+6	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+7	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+8	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+9	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+10	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+11	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+12	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+13	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+14	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+15	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+16	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+17	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+18	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+19	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+20	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+21	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+22	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+X	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+Y	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
diff --git a/src/test/resources/htsjdk/variant/vcf43/valid43ContigName.vcf b/src/test/resources/htsjdk/variant/vcf43/valid43ContigName.vcf
new file mode 100644
index 0000000000..e2c2945beb
--- /dev/null
+++ b/src/test/resources/htsjdk/variant/vcf43/valid43ContigName.vcf
@@ -0,0 +1,90 @@
+##fileformat=VCFv4.2
+##COMMENT=This file has 0 embedded UTF8 characters, but we need this fake comment line to keep the file aligned with it's utf8 companion test file.
+##ALT=<ID=DEL,Description="Deletion",ExtraAltField="extra alt">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INS,Description="Insertion">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries">
+##ALT=<ID=TRA,Description="Translocation">
+##FILTER=<ID=GATK_STANDARD,Description="Standard GATK filter",ExtraFilterField="extra filter field">
+##FILTER=<ID=HARD_TO_VALIDATE,Description="Hard to validate">
+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed",ExtraFormatField="extra format">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth (only filtered reads used for calling)">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##INFO=<ID=AB,Number=1,Type=Float,Description="Allele Balance for hets (ref/(ref+alt))",ExtraInfoField="extra info">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref base qualities">
+##INFO=<ID=BaseQRankSumZ,Number=1,Type=Float,Description="Z-score From Wilcoxon Rank Sum Test of Alt Vs. Ref base qualities">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
+##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">
+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref read mapping qualities">
+##INFO=<ID=MQRankSumZ,Number=1,Type=Float,Description="Z-score From Wilcoxon Rank Sum Test of Alt Vs. Ref read mapping qualities">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Phred-scaled p-value From Wilcoxon Rank Sum Test of Alt Vs. Ref read position bias">
+##INFO=<ID=ReadPosRankSumZ,Number=1,Type=Float,Description="Z-score From Wilcoxon Rank Sum Test of Alt Vs. Ref read position bias">
+##INFO=<ID=SB,Number=1,Type=Float,Description="Strand Bias">
+##INFO=<ID=set,Number=1,Type=String,Description="Source VCF for the merged record in CombineVariants">
+##SAMPLE=<ID=NA19238,Assay=WholeGenome,Ethnicity=AFR,Disease=None,Description="Test NA19238 SAMPLE header line",DOI=http://someurl,ExtraSampleField="extra sample">
+##SAMPLE=<ID=NA19239,Assay=WholeGenome,Ethnicity=AFR,Disease=None,Description="Test NA19239 Test SAMPLE header line",DOI=http://someurl>
+##SAMPLE=<ID=NA19240,Assay=WholeGenome,Ethnicity=AFR,Disease=None,Description="Test NA19240 SAMPLE header line",DOI=http://someurl>
+##contig=<ID=_.AZaz09,length=249250621,assembly=b37,extraContigField="extra contig field">
+##contig=<ID=10,length=135534747,assembly=b37>
+##contig=<ID=11,length=135006516,assembly=b37>
+##contig=<ID=12,length=133851895,assembly=b37>
+##contig=<ID=13,length=115169878,assembly=b37>
+##contig=<ID=14,length=107349540,assembly=b37>
+##contig=<ID=15,length=102531392,assembly=b37>
+##contig=<ID=16,length=90354753,assembly=b37>
+##contig=<ID=17,length=81195210,assembly=b37>
+##contig=<ID=18,length=78077248,assembly=b37>
+##contig=<ID=19,length=59128983,assembly=b37>
+##contig=<ID=2,length=243199373,assembly=b37>
+##contig=<ID=20,length=63025520,assembly=b37>
+##contig=<ID=21,length=48129895,assembly=b37>
+##contig=<ID=22,length=51304566,assembly=b37>
+##contig=<ID=3,length=198022430,assembly=b37>
+##contig=<ID=4,length=191154276,assembly=b37>
+##contig=<ID=5,length=180915260,assembly=b37>
+##contig=<ID=6,length=171115067,assembly=b37>
+##contig=<ID=7,length=159138663,assembly=b37>
+##contig=<ID=8,length=146364022,assembly=b37>
+##contig=<ID=9,length=141213431,assembly=b37>
+##contig=<ID=X,length=155270560,assembly=b37>
+##contig=<ID=Y,length=59373566,assembly=b37>
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA19238	NA19239	NA19240
+1	327	.	T	<*>	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=fil%3AteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+2	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+3	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+4	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+4	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+5	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+6	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+7	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+8	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+9	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+10	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+11	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+12	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+13	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+14	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+15	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+16	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+17	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+18	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+19	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+20	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+21	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+22	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+X	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99
+Y	327	.	T	C	666.18	GATK_STANDARD;HARD_TO_VALIDATE	AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth	GT:DP:GQ	1/0:10:62	1/0:37:99	1/0:53:99

From 6a2c193955c9710cfacb678af8735573d800168f Mon Sep 17 00:00:00 2001
From: Anders Leung <anders.leung@ga4gh.org>
Date: Thu, 15 Apr 2021 15:56:19 -0400
Subject: [PATCH 07/22] BCF 2.2 writing WIP

---
 .travis.yml                                   |   2 +
 scripts/install-bcftools.sh                   |   5 +
 .../htsjdk/samtools/util/FileExtensions.java  |   3 +
 .../util/ListByteBufferOutputStream.java      | 138 +++++
 .../tribble/TribbleIndexedFeatureReader.java  |  10 +-
 .../htsjdk/tribble/util/ParsingUtils.java     |   4 +-
 .../java/htsjdk/variant/bcf2/BCF2Codec.java   | 314 ++++++-----
 .../java/htsjdk/variant/bcf2/BCF2Decoder.java | 288 +++++++---
 .../htsjdk/variant/bcf2/BCF2Dictionary.java   | 283 ++++++++++
 .../java/htsjdk/variant/bcf2/BCF2Encoder.java | 386 +++++++++++++
 .../BCF2FieldWriter/BCF2FieldEncoder.java     | 314 +++++++++++
 .../bcf2/BCF2FieldWriter/BCF2FieldWriter.java | 515 ++++++++++++++++++
 .../BCF2FieldWriterManager.java               | 106 ++++
 .../bcf2/BCF2GenotypeFieldDecoders.java       | 137 ++---
 .../bcf2/BCF2LazyGenotypesDecoder.java        |   3 +-
 .../java/htsjdk/variant/bcf2/BCF2Type.java    | 168 ++++--
 .../java/htsjdk/variant/bcf2/BCF2Utils.java   | 216 ++------
 .../java/htsjdk/variant/bcf2/BCFVersion.java  |   8 +
 .../variantcontext/VariantContext.java        |  25 +-
 .../variantcontext/writer/BCF2Encoder.java    | 261 ---------
 .../writer/BCF2FieldEncoder.java              | 455 ----------------
 .../writer/BCF2FieldWriter.java               | 324 -----------
 .../writer/BCF2FieldWriterManager.java        | 180 ------
 .../variantcontext/writer/BCF2Writer.java     | 301 +++++-----
 .../writer/VariantContextWriterBuilder.java   |   5 +-
 .../htsjdk/variant/vcf/AbstractVCFCodec.java  |   7 +-
 .../htsjdk/variant/vcf/VCFFileReader.java     |  11 +-
 .../variant/vcf/VCFFilterHeaderLine.java      |   5 -
 .../variant/vcf/VCFFormatHeaderLine.java      |   6 +-
 .../htsjdk/variant/vcf/VCFHeaderLine.java     |  12 +-
 .../variant/vcf/VCFHeaderLineTranslator.java  |  11 +-
 .../htsjdk/variant/vcf/VCFInfoHeaderLine.java |   5 -
 .../variant/vcf/VCFSimpleHeaderLine.java      |   4 -
 .../variant/vcf/VCFStandardHeaderLines.java   |  32 +-
 .../java/htsjdk/samtools/SamStreamsTest.java  |   3 +-
 .../java/htsjdk/utils/BCFToolsTestUtils.java  | 136 +++++
 .../htsjdk/utils/BCFToolsTestUtilsTest.java   |  35 ++
 .../variant/bcf2/BCF2DictionaryTest.java      | 101 ++++
 .../bcf2/BCF2EncoderDecoderUnitTest.java      | 446 +++++++--------
 .../BCF2FieldWriter/BCF2FieldEncoderTest.java | 464 ++++++++++++++++
 .../variant/bcf2/BCF2UtilsUnitTest.java       | 116 +---
 .../variant/bcf2/BCF2WriterUnitTest.java      | 309 +++++++----
 .../htsjdk/variant/bcf2/BCFCodecTest.java     |  15 +-
 .../VariantContextTestProvider.java           |  35 +-
 .../writer/VCFWriterUnitTest.java             |  64 ++-
 .../variant/vcf/AbstractVCFCodecTest.java     |   2 +-
 .../vcf/VCFCompoundHeaderLineUnitTest.java    |  43 ++
 .../htsjdk/variant/vcf/VCFFileReaderTest.java |   5 +-
 .../vcf/VCFHeaderLineTranslatorUnitTest.java  |   2 -
 .../htsjdk/variant/vcf/VCFIteratorTest.java   |  24 +-
 src/test/resources/htsjdk/variant/bcfV22.bcf  | Bin 0 -> 613 bytes
 .../resources/htsjdk/variant/bcfV22.bcf.gz    | Bin 0 -> 613 bytes
 .../htsjdk/variant/structuralvariants.vcf     |   2 +-
 src/test/resources/htsjdk/variant/test1.vcf   |   2 +-
 54 files changed, 3913 insertions(+), 2435 deletions(-)
 create mode 100644 scripts/install-bcftools.sh
 create mode 100644 src/main/java/htsjdk/samtools/util/ListByteBufferOutputStream.java
 create mode 100644 src/main/java/htsjdk/variant/bcf2/BCF2Dictionary.java
 create mode 100644 src/main/java/htsjdk/variant/bcf2/BCF2Encoder.java
 create mode 100644 src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java
 create mode 100644 src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriter.java
 create mode 100644 src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriterManager.java
 delete mode 100644 src/main/java/htsjdk/variant/variantcontext/writer/BCF2Encoder.java
 delete mode 100644 src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldEncoder.java
 delete mode 100644 src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldWriter.java
 delete mode 100644 src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldWriterManager.java
 create mode 100644 src/test/java/htsjdk/utils/BCFToolsTestUtils.java
 create mode 100644 src/test/java/htsjdk/utils/BCFToolsTestUtilsTest.java
 create mode 100644 src/test/java/htsjdk/variant/bcf2/BCF2DictionaryTest.java
 create mode 100644 src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java
 create mode 100644 src/test/resources/htsjdk/variant/bcfV22.bcf
 create mode 100644 src/test/resources/htsjdk/variant/bcfV22.bcf.gz

diff --git a/.travis.yml b/.travis.yml
index f00fe8b27e..dab05066b8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,6 +13,7 @@ cache:
 env:
   global:
     - HTSJDK_SAMTOOLS_BIN=/usr/bin/samtools
+    - HTSJDK_BCFTOOLS_BIN=/usr/bin/bcftools
 jdk:
   - oraclejdk8
   - openjdk8
@@ -32,6 +33,7 @@ matrix:
 
 before_install:
   - scripts/install-samtools.sh
+  - scripts/install-bcftools.sh
   - scripts/htsget-scripts/start-htsget-test-server.sh
 
 script:
diff --git a/scripts/install-bcftools.sh b/scripts/install-bcftools.sh
new file mode 100644
index 0000000000..fca5a62134
--- /dev/null
+++ b/scripts/install-bcftools.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+set -ex
+wget https://github.com/samtools/bcftools/releases/download/1.13/bcftools-1.13.tar.bz2
+tar -xjvf bcftools-1.13.tar.bz2
+cd bcftools-1.13 && ./configure --prefix=/usr && make && sudo make install
diff --git a/src/main/java/htsjdk/samtools/util/FileExtensions.java b/src/main/java/htsjdk/samtools/util/FileExtensions.java
index fc2e37d6c6..dcb8c889f9 100755
--- a/src/main/java/htsjdk/samtools/util/FileExtensions.java
+++ b/src/main/java/htsjdk/samtools/util/FileExtensions.java
@@ -65,6 +65,9 @@ public final class FileExtensions {
     public static final String VCF = ".vcf";
     public static final String VCF_INDEX = TRIBBLE_INDEX;
     public static final String BCF = ".bcf";
+    // Note that .bcf on its own may be gzip compressed and usually is,
+    // but files with the extension .bcf.gz to seem to exist in the wild and should be supported
+    public static final String COMPRESSED_BCF = ".bcf.gz";
     public static final String COMPRESSED_VCF = ".vcf.gz";
     public static final String COMPRESSED_VCF_INDEX = ".tbi";
     public static final List<String> VCF_LIST = Collections.unmodifiableList(Arrays.asList(VCF, COMPRESSED_VCF, BCF));
diff --git a/src/main/java/htsjdk/samtools/util/ListByteBufferOutputStream.java b/src/main/java/htsjdk/samtools/util/ListByteBufferOutputStream.java
new file mode 100644
index 0000000000..d183a3b90f
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/util/ListByteBufferOutputStream.java
@@ -0,0 +1,138 @@
+package htsjdk.samtools.util;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+/**
+ * Growable byte buffer backed by a list of byte arrays, which can
+ * be used to buffer data without reallocating an underlying array.
+ * Once data is accumulated, it can either be retrieved by converting
+ * into a byte[] for interfaces that require a contiguous block of bytes,
+ * or written directly to an OutputStream to avoid array copies.
+ */
+public class ListByteBufferOutputStream extends OutputStream {
+
+    private final int blockSize;
+    private final ArrayList<byte[]> blocks;
+    private byte[] currentBlock;
+    private int nextBlockIndex;
+    private int nextBytePosition;
+    private int size;
+
+    public ListByteBufferOutputStream(final int blockSize) {
+        this.blockSize = blockSize;
+        blocks = new ArrayList<>();
+        nextBlockIndex = 0;
+        advanceBlock();
+        size = 0;
+    }
+
+    @Override
+    public void write(final int b) {
+        if (nextBytePosition == blockSize) {
+            advanceBlock();
+        }
+        currentBlock[nextBytePosition++] = (byte) b;
+        size++;
+    }
+
+    public void write(final byte b, final int nCopies) {
+        assert nCopies >= 0;
+
+        int bytesRemaining = nCopies;
+        while (bytesRemaining > 0) {
+            if (nextBytePosition == blockSize) {
+                advanceBlock();
+            }
+            final int toIndex = Math.min(nextBytePosition + bytesRemaining, blockSize);
+            Arrays.fill(currentBlock, nextBytePosition, toIndex, b);
+            bytesRemaining -= toIndex - nextBytePosition;
+            nextBytePosition = toIndex;
+        }
+        size += nCopies;
+    }
+
+    @Override
+    public void write(final byte[] b) {
+        write(b, 0, b.length);
+    }
+
+    @Override
+    public void write(final byte[] b, int off, final int len) {
+        assert b != null;
+        assert off >= 0;
+        assert len >= 0;
+        assert off + len <= b.length;
+
+        int bytesRemaining = len;
+        while (bytesRemaining > 0) {
+            if (nextBytePosition == blockSize) {
+                advanceBlock();
+            }
+            final int lengthToWrite = Math.min(bytesRemaining, blockSize - nextBytePosition);
+            System.arraycopy(b, off, currentBlock, nextBytePosition, lengthToWrite);
+            nextBytePosition += lengthToWrite;
+            off += lengthToWrite;
+            bytesRemaining -= lengthToWrite;
+        }
+        size += len;
+    }
+
+    public int size() {
+        return size;
+    }
+
+    public void writeTo(final OutputStream out) throws IOException {
+        for (final byte[] b : blocks) {
+            if (b == currentBlock) {
+                out.write(b, 0, nextBytePosition);
+                break;
+            } else {
+                out.write(b);
+            }
+        }
+    }
+
+    public byte[] toByteArray() {
+        final byte[] bytes = new byte[size];
+        final ByteBuffer buff = ByteBuffer.wrap(bytes);
+        for (final byte[] b : blocks) {
+            if (b == currentBlock) {
+                buff.put(b, 0, nextBytePosition);
+                break;
+            } else {
+                buff.put(b);
+            }
+        }
+        return bytes;
+    }
+
+    public void reset() {
+        currentBlock = blocks.get(0);
+        nextBytePosition = 0;
+        nextBlockIndex = 1;
+        size = 0;
+    }
+
+    public void clear() {
+        reset();
+        // blocks always has at least 1 element
+        blocks.subList(1, blocks.size()).clear();
+    }
+
+    private void advanceBlock() {
+        if (nextBlockIndex == blocks.size()) {
+            // Need to add a new block
+            currentBlock = new byte[blockSize];
+            blocks.add(currentBlock);
+        } else {
+            // Reuse old block
+            currentBlock = blocks.get(nextBlockIndex);
+        }
+        nextBytePosition = 0;
+        nextBlockIndex++;
+    }
+}
diff --git a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java
index 768c797ac0..7e2c10ebc0 100644
--- a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java
+++ b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java
@@ -33,6 +33,7 @@
 import htsjdk.tribble.index.IndexFactory;
 import htsjdk.tribble.readers.PositionalBufferedStream;
 import htsjdk.tribble.util.ParsingUtils;
+import htsjdk.variant.vcf.VCFFileReader;
 
 import java.io.BufferedInputStream;
 import java.io.IOException;
@@ -252,7 +253,11 @@ private void readHeader() throws IOException {
         PositionalBufferedStream pbs = null;
         try {
             is = ParsingUtils.openInputStream(path, wrapper);
-            if (IOUtil.hasBlockCompressedExtension(new URI(URLEncoder.encode(path, "UTF-8")))) {
+            // BCFs are usually gzipped but do not have the .gz extension,
+            // so we explicitly check for the presence of a gzip header
+            if (IOUtil.hasBlockCompressedExtension(new URI(URLEncoder.encode(path, "UTF-8")))
+                || (VCFFileReader.isBCF(path) && IOUtil.isGZIPInputStream(is))
+            ) {
                 // TODO: TEST/FIX THIS! https://github.com/samtools/htsjdk/issues/944
                 // TODO -- warning I don't think this can work, the buffered input stream screws up position
                 is = new GZIPInputStream(new BufferedInputStream(is));
@@ -326,7 +331,8 @@ public WFIterator() throws IOException {
             final InputStream inputStream = ParsingUtils.openInputStream(path, wrapper);
 
             final PositionalBufferedStream pbs;
-            if (IOUtil.hasBlockCompressedExtension(path)) {
+            // BCFs can be gzipped but usually do not have a compressed extension, so an extra check is needed
+            if (IOUtil.hasBlockCompressedExtension(path) || (VCFFileReader.isBCF(path) && IOUtil.isGZIPInputStream(inputStream))) {
                 // Gzipped -- we need to buffer the GZIPInputStream methods as this class makes read() calls,
                 // and seekableStream does not support single byte reads
                 final InputStream is = new GZIPInputStream(new BufferedInputStream(inputStream, 512000));
diff --git a/src/main/java/htsjdk/tribble/util/ParsingUtils.java b/src/main/java/htsjdk/tribble/util/ParsingUtils.java
index 6b4470a72a..bca147e7ca 100644
--- a/src/main/java/htsjdk/tribble/util/ParsingUtils.java
+++ b/src/main/java/htsjdk/tribble/util/ParsingUtils.java
@@ -101,8 +101,8 @@ public static InputStream openInputStream(final String uri, final Function<Seeka
         if (URL_SCHEMES.stream().anyMatch(uri::startsWith)) {
             inputStream = getURLHelper(new URL(uri)).openInputStream();
         } else if (!IOUtil.hasScheme(uri)) {
-            File file = new File(uri);
-            inputStream = Files.newInputStream(file.toPath());
+            final File file = new File(uri);
+            inputStream = new SeekablePathStream(file.toPath(), wrapper);
         } else {
             inputStream = new SeekablePathStream(IOUtil.getPath(uri), wrapper);
         }
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
index 97e8ce959d..ce999574aa 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
@@ -1,36 +1,41 @@
 /*
-* Copyright (c) 2012 The Broad Institute
-* 
-* Permission is hereby granted, free of charge, to any person
-* obtaining a copy of this software and associated documentation
-* files (the "Software"), to deal in the Software without
-* restriction, including without limitation the rights to use,
-* copy, modify, merge, publish, distribute, sublicense, and/or sell
-* copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following
-* conditions:
-* 
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-* 
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
+ * Copyright (c) 2012 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
 
 package htsjdk.variant.bcf2;
 
+import htsjdk.samtools.BAMIndexer;
 import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.Log;
 import htsjdk.tribble.BinaryFeatureCodec;
 import htsjdk.tribble.Feature;
 import htsjdk.tribble.FeatureCodecHeader;
 import htsjdk.tribble.TribbleException;
-import htsjdk.tribble.readers.*;
+import htsjdk.tribble.readers.LineIterator;
+import htsjdk.tribble.readers.LineIteratorImpl;
+import htsjdk.tribble.readers.PositionalBufferedStream;
+import htsjdk.tribble.readers.SynchronousLineReader;
 import htsjdk.variant.utils.GeneralUtils;
 import htsjdk.variant.variantcontext.Allele;
 import htsjdk.variant.variantcontext.GenotypeBuilder;
@@ -41,28 +46,39 @@
 import htsjdk.variant.vcf.VCFCodec;
 import htsjdk.variant.vcf.VCFCompoundHeaderLine;
 import htsjdk.variant.vcf.VCFConstants;
-import htsjdk.variant.vcf.VCFContigHeaderLine;
 import htsjdk.variant.vcf.VCFHeader;
 import htsjdk.variant.vcf.VCFHeaderLineType;
 
-import java.io.*;
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.zip.GZIPInputStream;
 
 /**
  * Decode BCF2 files
  */
 public class BCF2Codec extends BinaryFeatureCodec<VariantContext> {
+    private static final Log log = Log.getInstance(BCF2Codec.class);
+
+    public static String IDXField = "IDX"; // BCF2.2 IDX field name
+
     protected final static int ALLOWED_MAJOR_VERSION = 2;
-    protected final static int ALLOWED_MINOR_VERSION = 1;
+    protected final static int ALLOWED_MINOR_VERSION = 2;
     public static final BCFVersion ALLOWED_BCF_VERSION = new BCFVersion(ALLOWED_MAJOR_VERSION, ALLOWED_MINOR_VERSION);
 
-    /** sizeof a BCF header (+ min/max version). Used when trying to detect when a streams starts with a bcf header */
-    public static final int SIZEOF_BCF_HEADER =  BCFVersion.MAGIC_HEADER_START.length + 2*Byte.BYTES;
-    
+    /**
+     * sizeof a BCF header (+ min/max version). Used when trying to detect when a streams starts with a bcf header
+     */
+    public static final int SIZEOF_BCF_HEADER = BCFVersion.MAGIC_HEADER_START.length + 2 * Byte.BYTES;
+
     private BCFVersion bcfVersion = null;
 
     private VCFHeader header = null;
@@ -70,19 +86,19 @@ public class BCF2Codec extends BinaryFeatureCodec<VariantContext> {
     /**
      * Maps offsets (encoded in BCF) into contig names (from header) for the CHROM field
      */
-    private final ArrayList<String> contigNames = new ArrayList<String>();
+    private BCF2Dictionary contigDictionary;
 
     /**
      * Maps header string names (encoded in VCF) into strings found in the BCF header
-     *
+     * <p>
      * Initialized when processing the header
      */
-    private ArrayList<String> dictionary;
+    private BCF2Dictionary stringDictionary;
 
     /**
      * Our decoder that reads low-level objects from the BCF2 records
      */
-    private final BCF2Decoder decoder = new BCF2Decoder();
+    private BCF2Decoder decoder;
 
     /**
      * Provides some sanity checking on the header
@@ -96,7 +112,7 @@ public class BCF2Codec extends BinaryFeatureCodec<VariantContext> {
 
     /**
      * A cached array of GenotypeBuilders for efficient genotype decoding.
-     *
+     * <p>
      * Caching it allows us to avoid recreating this intermediate data
      * structure each time we decode genotypes
      */
@@ -114,12 +130,12 @@ public class BCF2Codec extends BinaryFeatureCodec<VariantContext> {
     // ----------------------------------------------------------------------
 
     @Override
-    public Feature decodeLoc( final PositionalBufferedStream inputStream ) {
+    public Feature decodeLoc(final PositionalBufferedStream inputStream) {
         return decode(inputStream);
     }
 
     @Override
-    public VariantContext decode( final PositionalBufferedStream inputStream ) {
+    public VariantContext decode(final PositionalBufferedStream inputStream) {
         try {
             recordNo++;
             final VariantContextBuilder builder = new VariantContextBuilder();
@@ -134,7 +150,7 @@ public VariantContext decode( final PositionalBufferedStream inputStream ) {
             decoder.readNextBlock(genotypeBlockSize, inputStream);
             createLazyGenotypesDecoder(info, builder);
             return builder.fullyDecoded(true).make();
-        } catch ( IOException e ) {
+        } catch (final IOException e) {
             throw new TribbleException("Failed to read BCF file", e);
         }
     }
@@ -153,10 +169,13 @@ public Class<VariantContext> getFeatureType() {
      * The default policy is to require an exact version match.
      * @param supportedVersion the current BCF implementation version
      * @param actualVersion the actual version
-     * @thows TribbleException if the version policy determines that {@code actualVersion} is not compatible
+     * @throws TribbleException if the version policy determines that {@code actualVersion} is not compatible
      * with {@code supportedVersion}
      */
-    protected void validateVersionCompatibility(final BCFVersion supportedVersion, final BCFVersion actualVersion) {
+    protected void validateVersionCompatibility(
+        final BCFVersion supportedVersion,
+        final BCFVersion actualVersion
+    ) throws TribbleException {
         if ( actualVersion.getMajorVersion() != ALLOWED_MAJOR_VERSION ) {
             error("BCF2Codec can only process BCF2 files, this file has major version " + bcfVersion.getMajorVersion());
         }
@@ -168,26 +187,24 @@ protected void validateVersionCompatibility(final BCFVersion supportedVersion, f
     }
 
     @Override
-    public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream ) {
+    public FeatureCodecHeader readHeader(final PositionalBufferedStream inputStream) {
         try {
             // note that this reads the magic as well, and so does double duty
             bcfVersion = BCFVersion.readBCFVersion(inputStream);
-            if ( bcfVersion == null ) {
+            if (bcfVersion == null) {
                 error("Input stream does not contain a BCF encoded file; BCF magic header info not found");
             }
 
-            validateVersionCompatibility(BCF2Codec.ALLOWED_BCF_VERSION, bcfVersion);
-            if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
-                System.err.println("Parsing data stream with BCF version " + bcfVersion);
-            }
+            decoder = BCF2Decoder.getDecoder(bcfVersion);
+            log.debug("Parsing data stream with BCF version " + bcfVersion);
 
             final int headerSizeInBytes = BCF2Type.INT32.read(inputStream);
 
-            if ( headerSizeInBytes <= 0 || headerSizeInBytes > MAX_HEADER_SIZE) // no bigger than 8 MB
-                error("BCF2 header has invalid length: " + headerSizeInBytes + " must be >= 0 and < "+ MAX_HEADER_SIZE);
+            if (headerSizeInBytes <= 0 || headerSizeInBytes > MAX_HEADER_SIZE) // no bigger than 8 MB
+                error("BCF2 header has invalid length: " + headerSizeInBytes + " must be >= 0 and < " + MAX_HEADER_SIZE);
 
             final byte[] headerBytes = new byte[headerSizeInBytes];
-            if ( inputStream.read(headerBytes) != headerSizeInBytes )
+            if (inputStream.read(headerBytes) != headerSizeInBytes)
                 error("Couldn't read all of the bytes specified in the header length = " + headerSizeInBytes);
 
             final PositionalBufferedStream bps = new PositionalBufferedStream(new ByteArrayInputStream(headerBytes));
@@ -195,24 +212,21 @@ public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream
             final VCFCodec headerParser = new VCFCodec();
             this.header = (VCFHeader) headerParser.readActualHeader(lineIterator);
             bps.close();
-        } catch ( IOException e ) {
+        } catch (final IOException e) {
             throw new TribbleException("I/O error while reading BCF2 header");
         }
 
-        // create the config offsets
-        if ( ! header.getContigLines().isEmpty() ) {
-            contigNames.clear();
-            for ( final VCFContigHeaderLine contig : header.getContigLines()) {
-                if ( contig.getID() == null || contig.getID().equals("") )
-                    error("found a contig with an invalid ID " + contig);
-                contigNames.add(contig.getID());
-            }
-        } else {
-            error("Didn't find any contig lines in BCF2 file header");
+        // TODO should follow up on hts-specs and clarify the relationship between ##dictionary and IDX fields
+        // Error on ##dictionary lines, we don't know what to do with them
+        if (this.header.getMetaDataInInputOrder().stream().anyMatch(line -> line.getKey().equals("dictionary"))) {
+            throw new TribbleException("Use of the ##dictionary line is not supported");
         }
 
+        // create the contig dictionary
+        contigDictionary = makeContigDictionary(bcfVersion);
+
         // create the string dictionary
-        dictionary = parseDictionary(header);
+        stringDictionary = makeStringDictionary(bcfVersion);
 
         // prepare the genotype field decoders
         gtFieldDecoders = new BCF2GenotypeFieldDecoders(header);
@@ -220,7 +234,7 @@ public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream
         // create and initialize the genotype builder array
         final int nSamples = header.getNGenotypeSamples();
         builders = new GenotypeBuilder[nSamples];
-        for ( int i = 0; i < nSamples; i++ ) {
+        for (int i = 0; i < nSamples; i++) {
             builders[i] = new GenotypeBuilder(header.getGenotypeSamples().get(i));
         }
 
@@ -229,11 +243,20 @@ public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream
     }
 
     @Override
-    public boolean canDecode( final String path ) {
-        try (InputStream fis = Files.newInputStream(IOUtil.getPath(path)) ){
-            final BCFVersion version = BCFVersion.readBCFVersion(fis);
-            return version != null && version.getMajorVersion() == ALLOWED_MAJOR_VERSION;
-        } catch ( final IOException e ) {
+    public boolean canDecode(final String path) {
+        try (final InputStream fis = Files.newInputStream(IOUtil.getPath(path))) {
+            final InputStream is = IOUtil.isGZIPInputStream(fis) ? new GZIPInputStream(fis) : fis;
+            final BCFVersion version = BCFVersion.readBCFVersion(is);
+            if (version == null) {
+                return false;
+            } else {
+                // Validation will throw a TribbleException for incompatible versions
+                // The default policy is to require an exact major and minor version match
+                // but subclasses can implement more permissive policies
+                validateVersionCompatibility(ALLOWED_BCF_VERSION, version);
+                return true;
+            }
+        } catch (final IOException | TribbleException e) {
             return false;
         }
     }
@@ -264,8 +287,8 @@ private final void decodeSiteLoc(final VariantContextBuilder builder) throws IOE
 
         this.pos = decoder.decodeInt(BCF2Type.INT32) + 1; // GATK is one based, BCF2 is zero-based
         final int refLength = decoder.decodeInt(BCF2Type.INT32);
-        builder.start((long)pos);
-        builder.stop((long)(pos + refLength - 1)); // minus one because GATK has closed intervals but BCF2 is open
+        builder.start(pos);
+        builder.stop(pos + refLength - 1); // minus one because GATK has closed intervals but BCF2 is open
     }
 
     /**
@@ -276,21 +299,22 @@ private final void decodeSiteLoc(final VariantContextBuilder builder) throws IOE
      */
     private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) throws IOException {
         final Object qual = decoder.decodeSingleValue(BCF2Type.FLOAT);
-        if ( qual != null ) {
-            builder.log10PError(((Double)qual) / -10.0);
+        if (qual != null) {
+            builder.log10PError(((Double) qual) / -10.0);
         }
 
         final int nAlleleInfo = decoder.decodeInt(BCF2Type.INT32);
         final int nFormatSamples = decoder.decodeInt(BCF2Type.INT32);
-        final int nAlleles = nAlleleInfo >> 16;
+        // Use logical shift to not introduce leading 1s
+        final int nAlleles = nAlleleInfo >>> 16;
         final int nInfo = nAlleleInfo & 0x0000FFFF;
-        final int nFormatFields = nFormatSamples >> 24;
+        final int nFormatFields = nFormatSamples >>> 24;
         final int nSamples = nFormatSamples & 0x00FFFFF;
 
-        if ( header.getNGenotypeSamples() != nSamples )
+        if (header.getNGenotypeSamples() != nSamples)
             error("Reading BCF2 files with different numbers of samples per record " +
-                    "is not currently supported.  Saw " + header.getNGenotypeSamples() +
-                    " samples in header but have a record with " + nSamples + " samples");
+                "is not currently supported.  Saw " + header.getNGenotypeSamples() +
+                " samples in header but have a record with " + nSamples + " samples");
 
         decodeID(builder);
         final List<Allele> alleles = decodeAlleles(builder, pos, nAlleles);
@@ -298,7 +322,7 @@ private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextB
         decodeInfo(builder, nInfo);
 
         final SitesInfoForDecoding info = new SitesInfoForDecoding(nFormatFields, nSamples, alleles);
-        if ( ! info.isValid() )
+        if (!info.isValid())
             error("Sites info is malformed: " + info);
         return info;
     }
@@ -316,8 +340,8 @@ private SitesInfoForDecoding(final int nFormatFields, final int nSamples, final
 
         public boolean isValid() {
             return nFormatFields >= 0 &&
-                    nSamples >= 0 &&
-                    alleles != null && ! alleles.isEmpty() && alleles.get(0).isReference();
+                nSamples >= 0 &&
+                alleles != null && !alleles.isEmpty() && alleles.get(0).isReference();
         }
 
         @Override
@@ -328,12 +352,13 @@ public String toString() {
 
     /**
      * Decode the id field in this BCF2 file and store it in the builder
+     *
      * @param builder
      */
-    private void decodeID( final VariantContextBuilder builder ) throws IOException {
-        final String id = (String)decoder.decodeTypedValue();
+    private void decodeID(final VariantContextBuilder builder) throws IOException {
+        final String id = decoder.decodeUnexplodedString();
 
-        if ( id == null )
+        if (id == null || id.isEmpty())
             builder.noID();
         else
             builder.id(id);
@@ -341,54 +366,67 @@ private void decodeID( final VariantContextBuilder builder ) throws IOException
 
     /**
      * Decode the alleles from this BCF2 file and put the results in builder
+     *
      * @param builder
      * @param pos
      * @param nAlleles
      * @return the alleles
      */
-    private List<Allele> decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) throws IOException {
-        // TODO -- probably need inline decoder for efficiency here (no sense in going bytes -> string -> vector -> bytes
-        List<Allele> alleles = new ArrayList<Allele>(nAlleles);
-        String ref = null;
-
-        for ( int i = 0; i < nAlleles; i++ ) {
-            final String alleleBases = (String)decoder.decodeTypedValue();
+    private List<Allele> decodeAlleles(final VariantContextBuilder builder, final int pos, final int nAlleles) throws IOException {
+        final List<Allele> alleles = new ArrayList<>(nAlleles);
+        byte[] ref = null;
+
+        for (int i = 0; i < nAlleles; i++) {
+            // Some decoder functionality is inlined here to avoid conversion from bytes -> string -> bytes
+            final byte typeDescriptor = decoder.readTypeDescriptor();
+            final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
+            if (type != BCF2Type.CHAR) {
+                error("Expected to find vector of type CHAR while decoding Allele bases, found type " + type);
+            }
+            final int size = decoder.decodeNumberOfElements(typeDescriptor);
+            final byte[] alleleBases = decoder.decodeRawBytes(size);
 
             final boolean isRef = i == 0;
+            if (isRef) {
+                ref = alleleBases;
+            }
+
             final Allele allele = Allele.create(alleleBases, isRef);
-            if ( isRef ) ref = alleleBases;
 
             alleles.add(allele);
         }
+
         assert ref != null;
+        assert ref.length > 0;
 
         builder.alleles(alleles);
-
-        assert !ref.isEmpty();
-
         return alleles;
     }
 
     /**
      * Decode the filter field of this BCF2 file and store the result in the builder
+     *
      * @param builder
      */
-    private void decodeFilter( final VariantContextBuilder builder ) throws IOException {
-        final Object value = decoder.decodeTypedValue();
+    private void decodeFilter(final VariantContextBuilder builder) throws IOException {
+        final byte typeDescriptor = decoder.readTypeDescriptor();
+        final int size = decoder.decodeNumberOfElements(typeDescriptor);
+        final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
 
-        if ( value == null )
+        if (size == 0) {
+            // No filters
             builder.unfiltered();
-        else {
-            if ( value instanceof Integer ) {
-                // fast path for single integer result
-                final String filterString = getDictionaryString((Integer)value);
-                if ( VCFConstants.PASSES_FILTERS_v4.equals(filterString))
-                    builder.passFilters();
-                else
-                    builder.filter(filterString);
+        } else if (size == 1) {
+            final int i = decoder.decodeInt(type);
+            if (i == 0) {
+                // PASS is always implicitly encoded as 0
+                builder.passFilters();
             } else {
-                for ( final int offset : (List<Integer>)value )
-                    builder.filter(getDictionaryString(offset));
+                builder.filter(getDictionaryString(i));
+            }
+        } else {
+            for (final int offset : decoder.decodeIntArray(size, type, null)) {
+                builder.filter(getDictionaryString(offset));
             }
         }
     }
@@ -399,17 +437,23 @@ private void decodeFilter( final VariantContextBuilder builder ) throws IOExcept
      * @param builder
      * @param numInfoFields
      */
-    private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) throws IOException {
-        if ( numInfoFields == 0 )
+    private void decodeInfo(final VariantContextBuilder builder, final int numInfoFields) throws IOException {
+        if (numInfoFields == 0)
             // fast path, don't bother doing any work if there are no fields
             return;
 
-        final Map<String, Object> infoFieldEntries = new HashMap<String, Object>(numInfoFields);
-        for ( int i = 0; i < numInfoFields; i++ ) {
+        final Map<String, Object> infoFieldEntries = new HashMap<>(numInfoFields);
+        for (int i = 0; i < numInfoFields; i++) {
             final String key = getDictionaryString();
             Object value = decoder.decodeTypedValue();
             final VCFCompoundHeaderLine metaData = VariantContextUtils.getMetaDataForField(header, key);
-            if ( metaData.getType() == VCFHeaderLineType.Flag ) value = true; // special case for flags
+            if (metaData.getType() == VCFHeaderLineType.Flag) {
+                // Despite contradictory language in the spec, bcftools/htslib encode the "payload" of
+                // FLAG as 0x00 (MISSING type) which we would normally decode as MISSING/null,
+                // so we consider this value to be Boolean TRUE simply based on the presence of the key
+                // See https://github.com/samtools/hts-specs/issues/384
+                value = Boolean.TRUE; // special case for flags
+            }
             infoFieldEntries.put(key, value);
         }
 
@@ -429,17 +473,17 @@ private void decodeInfo( final VariantContextBuilder builder, final int numInfoF
      * @param siteInfo
      * @param builder
      */
-    private void createLazyGenotypesDecoder( final SitesInfoForDecoding siteInfo,
-                                             final VariantContextBuilder builder ) {
+    private void createLazyGenotypesDecoder(final SitesInfoForDecoding siteInfo,
+                                            final VariantContextBuilder builder) {
         if (siteInfo.nSamples > 0) {
             final LazyGenotypesContext.LazyParser lazyParser =
-                    new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields, builders);
+                new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields, builders);
 
             final LazyData lazyData = new LazyData(header, siteInfo.nFormatFields, decoder.getRecordBytes());
             final LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, lazyData, header.getNGenotypeSamples());
 
             // did we resort the sample names?  If so, we need to load the genotype data
-            if ( !header.samplesWereAlreadySorted() )
+            if (!header.samplesWereAlreadySorted())
                 lazy.decode();
 
             builder.genotypesNoValidation(lazy);
@@ -458,12 +502,22 @@ public LazyData(final VCFHeader header, final int nGenotypeFields, final byte[]
         }
     }
 
-    private final String getDictionaryString() throws IOException {
+    private String getDictionaryString() throws IOException {
         return getDictionaryString((Integer) decoder.decodeTypedValue());
     }
 
     protected final String getDictionaryString(final int offset) {
-        return dictionary.get(offset);
+        return stringDictionary.get(offset);
+    }
+
+    private BCF2Dictionary makeStringDictionary(final BCFVersion bcfVersion) {
+        final BCF2Dictionary dict = BCF2Dictionary.makeBCF2StringDictionary(header, bcfVersion);
+
+        // if we got here we never found a dictionary, or there are no elements in the dictionary
+        if (dict.isEmpty())
+            error("Dictionary header element was absent or empty");
+
+        return dict;
     }
 
     /**
@@ -473,18 +527,16 @@ protected final String getDictionaryString(final int offset) {
      * @param contigOffset
      * @return
      */
-    private final String lookupContigName( final int contigOffset ) {
-        return contigNames.get(contigOffset);
+    private String lookupContigName(final int contigOffset) {
+        return contigDictionary.get(contigOffset);
     }
 
-    private final ArrayList<String> parseDictionary(final VCFHeader header) {
-        final ArrayList<String> dict = BCF2Utils.makeDictionary(header);
-
-        // if we got here we never found a dictionary, or there are no elements in the dictionary
-        if ( dict.isEmpty() )
-            error("Dictionary header element was absent or empty");
+    private BCF2Dictionary makeContigDictionary(final BCFVersion bcfVersion) {
+        // create the config offsets
+        if (header.getContigLines().isEmpty())
+            error("Didn't find any contig lines in BCF2 file header");
 
-        return dict;
+        return BCF2Dictionary.makeBCF2ContigDictionary(header, bcfVersion);
     }
 
     /**
@@ -501,8 +553,9 @@ protected BCF2GenotypeFieldDecoders.Decoder getGenotypeFieldDecoder(final String
     protected void error(final String message) throws RuntimeException {
         throw new TribbleException(String.format("%s, at record %d with position %d:", message, recordNo, pos));
     }
-    
-    /** try to read a BCFVersion from an uncompressed BufferedInputStream.
+
+    /**
+     * Try to read a BCFVersion from an uncompressed BufferedInputStream.
      * The buffer must be large enough to contain {@link #SIZEOF_BCF_HEADER}
      * 
      * @param uncompressedBufferedInput the uncompressed input stream
@@ -515,5 +568,8 @@ public static BCFVersion tryReadBCFVersion(final BufferedInputStream uncompresse
         uncompressedBufferedInput.reset();
         return bcfVersion;
     }
-    
+
+    public BCFVersion getBCFVersion() {
+        return bcfVersion;
+    }
 }
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Decoder.java b/src/main/java/htsjdk/variant/bcf2/BCF2Decoder.java
index 0dd166eef6..1544d9ed6c 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Decoder.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Decoder.java
@@ -31,24 +31,35 @@
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
 
-public final class BCF2Decoder {
-    byte[] recordBytes = null;
-    ByteArrayInputStream recordStream = null;
+public abstract class BCF2Decoder {
+    protected byte[] recordBytes = null;
+    protected ByteArrayInputStream recordStream = null;
 
-    public BCF2Decoder() {
+    private BCF2Decoder() {
         // nothing to do
     }
 
-    /**
-     * Create a new decoder ready to read BCF2 data from the byte[] recordBytes, for testing purposes
-     *
-     * @param recordBytes
-     */
-    protected BCF2Decoder(final byte[] recordBytes) {
-        setRecordBytes(recordBytes);
+    public static BCF2Decoder getDecoder(final BCFVersion version) {
+        switch (version.getMinorVersion()) {
+            case 1:
+                return new BCF2Decoder.BCF2_1Decoder();
+            case 2:
+                return new BCF2Decoder.BCF2_2Decoder();
+            default:
+                throw new TribbleException("BCF2Codec can only process BCF2 files with minor version <= " + BCF2Codec.ALLOWED_MINOR_VERSION + " but this file has minor version " + version.getMinorVersion());
+        }
+    }
+
+    public static BCF2Decoder getDecoder(final BCFVersion version, final byte[] recordBytes) {
+        final BCF2Decoder decoder = BCF2Decoder.getDecoder(version);
+        decoder.setRecordBytes(recordBytes);
+        return decoder;
     }
 
     // ----------------------------------------------------------------------
@@ -63,7 +74,7 @@ protected BCF2Decoder(final byte[] recordBytes) {
      * @param stream
      */
     public void readNextBlock(final int blockSizeInBytes, final InputStream stream) {
-        if ( blockSizeInBytes < 0 ) throw new TribbleException("Invalid block size " + blockSizeInBytes);
+        if (blockSizeInBytes < 0) throw new TribbleException("Invalid block size " + blockSizeInBytes);
         setRecordBytes(readRecordBytes(blockSizeInBytes, stream));
     }
 
@@ -74,9 +85,9 @@ public void readNextBlock(final int blockSizeInBytes, final InputStream stream)
      */
     public void skipNextBlock(final int blockSizeInBytes, final InputStream stream) {
         try {
-            final int bytesRead = (int)stream.skip(blockSizeInBytes);
+            final int bytesRead = (int) stream.skip(blockSizeInBytes);
             validateReadBytes(bytesRead, 1, blockSizeInBytes);
-        } catch ( IOException e ) {
+        } catch (final IOException e) {
             throw new TribbleException("I/O error while reading BCF2 file", e);
         }
         this.recordBytes = null;
@@ -85,6 +96,7 @@ public void skipNextBlock(final int blockSizeInBytes, final InputStream stream)
 
     /**
      * Returns the byte[] for the block of data we are currently decoding
+     *
      * @return
      */
     public byte[] getRecordBytes() {
@@ -131,41 +143,54 @@ public final Object decodeTypedValue(final byte typeDescriptor) throws IOExcepti
     }
 
     public final Object decodeTypedValue(final byte typeDescriptor, final int size) throws IOException {
-        if ( size == 0 ) {
+        if (size == 0) {
             // missing value => null in java
             return null;
         } else {
             final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
-            if ( type == BCF2Type.CHAR ) { // special case string decoding for efficiency
-                return decodeLiteralString(size);
-            } else if ( size == 1 ) {
-                return decodeSingleValue(type);
+            if (type == BCF2Type.CHAR) { // special case string decoding for efficiency
+                final List<String> strings = decodeExplodedStrings(size, ',');
+                if (strings.isEmpty()) {
+                    return null;
+                } else if (strings.size() == 1) {
+                    return strings.get(0);
+                } else {
+                    return strings;
+                }
+            } else if (size == 1) {
+                final Object o = decodeSingleValue(type);
+                return o == BCF2Type.EOVValue() ? null : o;
             } else {
-                final ArrayList<Object> ints = new ArrayList<Object>(size);
-                for ( int i = 0; i < size; i++ ) {
+                final ArrayList<Object> ints = new ArrayList<>(size);
+                for (int i = 0; i < size; i++) {
                     final Object val = decodeSingleValue(type);
-                    if ( val == null ) continue; // auto-pruning.  We remove trailing nulls
+                    if (val == BCF2Type.EOVValue()) continue;
                     ints.add(val);
                 }
-                return ints.isEmpty() ? null : ints; // return null when all of the values are null
+                return ints.isEmpty() ? null : ints;
             }
         }
     }
 
     public final Object decodeSingleValue(final BCF2Type type) throws IOException {
-        // TODO -- decodeTypedValue should integrate this routine
         final int value = decodeInt(type);
 
-        if ( value == type.getMissingBytes() )
+        if (value == type.getMissingBytes()) {
             return null;
-        else {
+        } else if (value == type.getEOVBytes()) {
+            return BCF2Type.EOVValue();
+        } else {
             switch (type) {
                 case INT8:
                 case INT16:
-                case INT32: return value;
-                case FLOAT: return rawFloatToFloat(value);
-                case CHAR:  return value & 0xFF; // TODO -- I cannot imagine why we'd get here, as string needs to be special cased
-                default:    throw new TribbleException("BCF2 codec doesn't know how to decode type " + type );
+                case INT32:
+                    return value;
+                case FLOAT:
+                    return rawFloatToFloat(value);
+                case CHAR:
+                    return value & 0xFF; // TODO -- I cannot imagine why we'd get here, as string needs to be special cased
+                default:
+                    throw new TribbleException("BCF2 codec doesn't know how to decode type " + type);
             }
         }
     }
@@ -176,31 +201,8 @@ public final Object decodeSingleValue(final BCF2Type type) throws IOException {
     //
     // ----------------------------------------------------------------------
 
-    private final Object decodeLiteralString(final int size) {
-        assert size > 0;
-
-        // TODO -- assumes size > 0
-        final byte[] bytes = new byte[size]; // TODO -- in principle should just grab bytes from underlying array
-        try {
-            recordStream.read(bytes);
-
-            int goodLength = 0;
-            for ( ; goodLength < bytes.length ; goodLength++ )
-                if ( bytes[goodLength] == 0 ) break;
-
-            if ( goodLength == 0 )
-                return null;
-            else {
-                final String s = new String(bytes, 0, goodLength);
-                return BCF2Utils.isCollapsedString(s) ? BCF2Utils.explodeStringList(s) : s;
-            }
-        } catch ( IOException e ) {
-            throw new TribbleException("readByte failure", e);
-        }
-    }
-
     public final int decodeNumberOfElements(final byte typeDescriptor) throws IOException {
-        if ( BCF2Utils.sizeIsOverflow(typeDescriptor) )
+        if (BCF2Utils.sizeIsOverflow(typeDescriptor))
             // -1 ensures we explode immediately with a bad size if the result is missing
             return decodeInt(readTypeDescriptor(), -1);
         else
@@ -228,14 +230,22 @@ public final int decodeInt(final BCF2Type type) throws IOException {
 
     /**
      * Low-level reader for int[]
-     *
+     * <p>
      * Requires a typeDescriptor so the function knows how many elements to read,
      * and how they are encoded.
-     *
+     * <p>
+     * Note that this method is only suitable for reading arrays which are known
+     * to not contain any internal MISSING values (e.g. filter or GT,
+     * in the case of GT in BCF 2.1, the vector may be MISSING padded if the
+     * sample ploidy is less than the maximum, but these missing values are
+     * not considered to be part of the array, and will not be returned).
+     * Parts of the decoder that require missing values to be preserved should
+     * use decodeTyped
+     * <p>
      * If size == 0 =&gt; result is null
      * If size &gt; 0 =&gt; result depends on the actual values in the stream
-     *      -- If the first element read is MISSING, result is null (all values are missing)
-     *      -- Else result = int[N] where N is the first N non-missing values decoded
+     * -- If the first element read is MISSING, result is null (all values are missing)
+     * -- Else result = int[N] where N is the first N non-missing values decoded
      *
      * @param maybeDest if not null we'll not allocate space for the vector, but instead use
      *                  the externally allocated array of ints to store values.  If the
@@ -244,45 +254,131 @@ public final int decodeInt(final BCF2Type type) throws IOException {
      *                  int elements are still forced to do a fresh allocation as well.
      * @return see description
      */
-    public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) throws IOException {
-        if ( size == 0 ) {
+    public int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) throws IOException {
+        if (size == 0) {
             return null;
         } else {
-            if ( maybeDest != null && maybeDest.length < size )
+            if (maybeDest != null && maybeDest.length < size)
                 maybeDest = null; // by nulling this out we ensure that we do fresh allocations as maybeDest is too small
 
             final int val1 = decodeInt(type);
-            if ( val1 == type.getMissingBytes() ) {
-                // fast path for first element being missing
-                for ( int i = 1; i < size; i++ ) decodeInt(type);
+            if (val1 == getPaddingValue(type)) {
+                // Fast path for first element being padding, meaning the whole array is empty
+                final int bytesToDrop = type.getSizeInBytes() * (size - 1);
+                // Skip the rest of the padding values
+                recordStream.skip(bytesToDrop);
                 return null;
             } else {
                 // we know we will have at least 1 element, so making the int[] is worth it
                 final int[] ints = maybeDest == null ? new int[size] : maybeDest;
-                ints[0] = val1; // we already read the first one
-                for ( int i = 1; i < size; i++ ) {
+                ints[0] = val1;
+                for (int i = 1; i < size; i++) {
                     ints[i] = decodeInt(type);
-                    if ( ints[i] == type.getMissingBytes() ) {
-                        // read the rest of the missing values, dropping them
-                        for ( int j = i + 1; j < size; j++ ) decodeInt(type);
+                    if (ints[i] == getPaddingValue(type)) {
+                        final int bytesToDrop = type.getSizeInBytes() * (size - (i + 1));
+                        // Skip the rest of the padding values
+                        recordStream.skip(bytesToDrop);
                         // deal with auto-pruning by returning an int[] containing
-                        // only the non-MISSING values.  We do this by copying the first
+                        // only the non-padding values.  We do this by copying the first
                         // i elements, as i itself is missing
                         return Arrays.copyOf(ints, i);
                     }
                 }
-                return ints; // all of the elements were non-MISSING
+                return ints; // all of the elements were non-padding
             }
         }
     }
 
+    public byte[] decodeRawBytes(final int size) throws IOException {
+        final byte[] bytes = new byte[size];
+        recordStream.read(bytes);
+        return bytes;
+    }
+
+    /**
+     * Decode a single ASCII encoded string which may be padded with NULL bytes.
+     * Multiple strings which were encoded as a single comma separated string are
+     * returned unexploded.
+     * <p>
+     * Reads directly from underlying byte buffer to avoid unnecessary array copies.
+     *
+     * @param size
+     * @return
+     */
+    public String decodeUnexplodedString(final int size) {
+        // Get our current position in the buffer so we can index directly into it
+        final int currentBufferPosition = recordBytes.length - recordStream.available();
+
+        // Jump over all bytes, including NULL padding
+        recordStream.skip(size);
+
+        // Scan for first NULL padding byte
+        int realLength = 0;
+        for (; realLength < size; realLength++)
+            if (recordBytes[currentBufferPosition + realLength] == '\0') break;
+
+        // The BCF spec states that strings are ASCII encoded, but we use UTF-8 for future proofing
+        return new String(recordBytes, currentBufferPosition, realLength, StandardCharsets.UTF_8);
+    }
+
+    public String decodeUnexplodedString() throws IOException {
+        final byte typeDescriptor = readTypeDescriptor();
+        final int size = decodeNumberOfElements(typeDescriptor);
+
+        return size > 0 ? decodeUnexplodedString(size) : "";
+    }
+
+    /**
+     * Decode a list of ASCII encoded strings.
+     * Multiple strings as a single separator delimited string are
+     * exploded. If only a single string was encoded with no separators, returns a
+     * list of length 1.
+     * <p>
+     * Reads directly from underlying byte buffer to avoid unnecessary array copies.
+     *
+     * @param size
+     * @return
+     */
+    public List<String> decodeExplodedStrings(final int size, final char separator) {
+        // Get our current position in the buffer so we can index directly into it
+        final int currentBufferPosition = recordBytes.length - recordStream.available();
+
+        // Jump over all bytes
+        recordStream.skip(size);
+
+        if (size == 0 || recordBytes[currentBufferPosition] == '\0') return Collections.emptyList();
+
+        int numStrings = 1;
+        // Start at offset 1 to avoid counting optional leading comma
+        // Real length may be shorter than provided one because of NULL padding
+        int realLength = 1;
+        for (; realLength < size; realLength++) {
+            final byte currentByte = recordBytes[currentBufferPosition + realLength];
+            if (currentByte == separator) numStrings++;
+            else if (currentByte == '\0') break;
+        }
+
+        final List<String> strings = new ArrayList<>(numStrings);
+        int currentStringStart = recordBytes[currentBufferPosition] == separator ? 1 : 0;
+        for (int i = 1; i < realLength; i++) {
+            if (recordBytes[currentBufferPosition + i] == separator) {
+                strings.add(new String(recordBytes, currentBufferPosition + currentStringStart, i - currentStringStart, StandardCharsets.UTF_8));
+                currentStringStart = i + 1;
+            }
+        }
+        // Add final string
+        strings.add(new String(recordBytes, currentBufferPosition + currentStringStart, realLength - currentStringStart, StandardCharsets.UTF_8));
+
+        return strings;
+    }
+
     public final int[] decodeIntArray(final byte typeDescriptor, final int size) throws IOException {
         final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
         return decodeIntArray(size, type, null);
     }
 
     private double rawFloatToFloat(final int rawFloat) {
-        return (double)Float.intBitsToFloat(rawFloat);
+        return Float.intBitsToFloat(rawFloat);
     }
 
     // ----------------------------------------------------------------------
@@ -303,11 +399,11 @@ public final int readBlockSize(final InputStream inputStream) throws IOException
 
     /**
      * Read all bytes for a BCF record block into a byte[], and return it
-     *
+     * <p>
      * Is smart about reading from the stream multiple times to fill the buffer, if necessary
      *
      * @param blockSizeInBytes number of bytes to read
-     * @param inputStream the stream to read from
+     * @param inputStream      the stream to read from
      * @return a non-null byte[] containing exactly blockSizeInBytes bytes from the inputStream
      */
     private static byte[] readRecordBytes(final int blockSizeInBytes, final InputStream inputStream) {
@@ -316,23 +412,23 @@ private static byte[] readRecordBytes(final int blockSizeInBytes, final InputStr
         final byte[] record = new byte[blockSizeInBytes];
         try {
             int bytesRead = 0;
-            int nReadAttempts = 0; // keep track of how many times we've read
+            final int nReadAttempts = 0; // keep track of how many times we've read
 
             // because we might not read enough bytes from the file in a single go, do it in a loop until we get EOF
-            while ( bytesRead < blockSizeInBytes ) {
+            while (bytesRead < blockSizeInBytes) {
                 final int read1 = inputStream.read(record, bytesRead, blockSizeInBytes - bytesRead);
-                if ( read1 == -1 )
+                if (read1 == -1)
                     validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes);
                 else
                     bytesRead += read1;
             }
 
-            if ( GeneralUtils.DEBUG_MODE_ENABLED && nReadAttempts > 1 ) { // TODO -- remove me
+            if (GeneralUtils.DEBUG_MODE_ENABLED && nReadAttempts > 1) { // TODO -- remove me
                 System.err.println("Required multiple read attempts to actually get the entire BCF2 block, unexpected behavior");
             }
 
             validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes);
-        } catch ( IOException e ) {
+        } catch (final IOException e) {
             throw new TribbleException("I/O error while reading BCF2 file", e);
         }
 
@@ -349,14 +445,40 @@ private static byte[] readRecordBytes(final int blockSizeInBytes, final InputStr
     private static void validateReadBytes(final int actuallyRead, final int nReadAttempts, final int expected) {
         assert expected >= 0;
 
-        if ( actuallyRead < expected ) {
+        if (actuallyRead < expected) {
             throw new TribbleException(
-                    String.format("Failed to read next complete record: expected %d bytes but read only %d after %d iterations",
-                            expected, actuallyRead, nReadAttempts));
+                String.format("Failed to read next complete record: expected %d bytes but read only %d after %d iterations",
+                    expected, actuallyRead, nReadAttempts));
         }
     }
 
     public final byte readTypeDescriptor() throws IOException {
-        return BCF2Utils.readByte(recordStream);
+        return (byte) recordStream.read();
+    }
+
+
+    // ----------------------------------------------------------------------
+    //
+    // Version specific behavior
+    //
+    // ----------------------------------------------------------------------
+
+
+    public abstract int getPaddingValue(final BCF2Type type);
+
+    public static class BCF2_1Decoder extends BCF2Decoder {
+
+        @Override
+        public int getPaddingValue(final BCF2Type type) {
+            return type.getMissingBytes();
+        }
+    }
+
+    public static class BCF2_2Decoder extends BCF2Decoder {
+
+        @Override
+        public int getPaddingValue(final BCF2Type type) {
+            return type.getEOVBytes();
+        }
     }
 }
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Dictionary.java b/src/main/java/htsjdk/variant/bcf2/BCF2Dictionary.java
new file mode 100644
index 0000000000..7b30da8643
--- /dev/null
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Dictionary.java
@@ -0,0 +1,283 @@
+package htsjdk.variant.bcf2;
+
+import htsjdk.tribble.TribbleException;
+import htsjdk.variant.vcf.VCFConstants;
+import htsjdk.variant.vcf.VCFHeader;
+import htsjdk.variant.vcf.VCFHeaderLine;
+import htsjdk.variant.vcf.VCFSimpleHeaderLine;
+
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.BiConsumer;
+import java.util.stream.Collectors;
+
+/**
+ * Dictionary of strings or contigs for use with a BCF file.
+ * <p>
+ * Provides an Integer -> String map interface, but determines during construction whether
+ * mapping can be stored as an array (if it can be stored as a dense array) or
+ * it must be stored using a map.
+ * <p>
+ * This class validates that IDX fields are used as required by the BCF 2.2 spec, namely
+ * that either all lines of a given dictionary type (contig or FORMAT/INFO/FILTER) have
+ * IDX fields or none do.
+ * <p>
+ * The spec does not require a 1-to-1 IDX-to-string mapping, but logically a header with a
+ * 1-to-n IDX-to-string mapping would be unparsable, and we reject such headers, while an
+ * n-to-1 IDX-to-string mapping might result from tools that do not deduplicate IDXs, so
+ * we accept them.
+ */
+public abstract class BCF2Dictionary extends AbstractMap<Integer, String> {
+
+    /**
+     * Create and return a BCF string dictionary
+     * The dictionary is an ordered list of common VCF identifiers (FILTER, INFO, and FORMAT) fields.
+     * <p>
+     * Note that it's critical that the list be dedupped and sorted in a consistent manner each time,
+     * as the BCF2 offsets are encoded relative to this dictionary, and if it isn't determined exactly
+     * the same way as in the header each time it's very bad
+     *
+     * @param vcfHeader VCFHeader containing the strings to be stored
+     * @param version   BCF version for which the dictionary will be used
+     * @return BCF2Dictionary suitable for use with a BCF file
+     */
+    public static BCF2Dictionary makeBCF2StringDictionary(final VCFHeader vcfHeader, final BCFVersion version) {
+        final List<VCFSimpleHeaderLine> headerLines = vcfHeader.getMetaDataInInputOrder().stream()
+            .filter(BCF2Dictionary::isStringDictionaryDefining)
+            .map(l -> (VCFSimpleHeaderLine) l)
+            .collect(Collectors.toList());
+
+        return BCF2Dictionary.makeDictionary(headerLines, version, true);
+    }
+
+    private static boolean isStringDictionaryDefining(final VCFHeaderLine line) {
+        switch (line.getKey()) {
+            case VCFConstants.INFO_HEADER_KEY:
+            case VCFConstants.FORMAT_HEADER_KEY:
+            case VCFConstants.FILTER_HEADER_KEY:
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    /**
+     * Create and return a BCF contig dictionary
+     *
+     * @param vcfHeader VCFHeader containing the contig header lines to be stored
+     * @param version   BCF version for which the dictionary will be used
+     * @return BCF2Dictionary suitable for use with a BCF file
+     */
+    public static BCF2Dictionary makeBCF2ContigDictionary(final VCFHeader vcfHeader, final BCFVersion version) {
+        return BCF2Dictionary.makeDictionary(vcfHeader.getContigLines(), version, false);
+    }
+
+    private static BCF2Dictionary makeDictionary(
+        final List<? extends VCFSimpleHeaderLine> headerLines,
+        final BCFVersion version,
+        final boolean stringDictionary
+    ) {
+        if (headerLines.isEmpty()) {
+            return new BCF2DenseDictionary(Collections.emptyList());
+        }
+
+        // Note that we count FILTER/FORMAT/INFO header lines with the same ID but different key
+        // (e.g. a FORMAT line and an INFO line both with ID "A") to define the same string
+        // for the purposes of building the dictionary
+        // c.f. https://github.com/samtools/hts-specs/issues/591#issuecomment-904487133
+        final Set<String> seen = new HashSet<>(headerLines.size() + 1);
+
+        if (stringDictionary) {
+            // Special case the special PASS field which may not show up in the FILTER field definitions
+            seen.add(VCFConstants.PASSES_FILTERS_v4);
+        }
+
+        // Check version and possibly peek at first value to see if lines should contain IDX fields or not
+        final boolean shouldHaveIDX = version.getMinorVersion() > 1 &&
+            headerLines.get(0).getGenericFieldValue(BCF2Codec.IDXField) != null;
+
+        // Validate
+        for (final VCFSimpleHeaderLine headerLine : headerLines) {
+            final String idxString = headerLine.getGenericFieldValue(BCF2Codec.IDXField);
+            if ((idxString == null) == shouldHaveIDX) {
+                // If any line had an IDX then they all should
+                throw new TribbleException.InvalidHeader(String.format(
+                    "Inconsistent IDX field usage in BCF file %s header line %s, %s",
+                    headerLine.getKey(),
+                    headerLine.getID(),
+                    shouldHaveIDX ? "did not find expected IDX field" : "unexpected IDX field"
+                ));
+            }
+        }
+
+        if (shouldHaveIDX) {
+            final HashMap<Integer, String> strings = new HashMap<>(headerLines.size() + 1);
+            int maxIDX = 0;
+            if (stringDictionary) {
+                strings.put(0, VCFConstants.PASSES_FILTERS_v4);
+            }
+
+            for (final VCFSimpleHeaderLine line : headerLines) {
+                final String id = line.getID();
+                final int IDX = Integer.parseUnsignedInt(line.getGenericFieldValue(BCF2Codec.IDXField));
+                if (!seen.contains(id)) {
+                    seen.add(id);
+                    maxIDX = Math.max(maxIDX, IDX);
+                    strings.put(IDX, line.getID());
+                }
+
+                // Have we seen this IDX before with a different string?
+                if (strings.containsKey(IDX)) {
+                    final String oldString = strings.get(IDX);
+                    if (!oldString.equals(id)) {
+                        throw new TribbleException.InvalidHeader(String.format(
+                            "IDX %d associated with multiple dictionary defining strings: %s and %s",
+                            IDX, oldString, id
+                        ));
+                    }
+                }
+            }
+            if (maxIDX == seen.size() - 1) {
+                // By the pigeonhole principle, if we have N unique non-negative IDXs numbered starting from 0
+                // (possibly including 0 -> PASS implicitly) and (N - 1) is the highest IDX we have seen,
+                // we have all the IDXs in [0, N), which we can represent as a length N dense array.
+                // This check is useful because bcftools will always add IDX fields to headers even when not
+                // strictly necessary, so we can avoid the cost of the hash map in many cases.
+                final ArrayList<String> stringsList = new ArrayList<>(seen.size());
+                strings.forEach(stringsList::add);
+                return new BCF2DenseDictionary(stringsList);
+            } else {
+                return new BCF2SparseDictionary(strings);
+            }
+        } else {
+            final ArrayList<String> strings = new ArrayList<>(headerLines.size() + 1);
+            if (stringDictionary) {
+                strings.add(VCFConstants.PASSES_FILTERS_v4);
+            }
+
+            for (final VCFSimpleHeaderLine line : headerLines) {
+                final String id = line.getID();
+                if (!seen.contains(id)) {
+                    strings.add(line.getID());
+                    seen.add(id);
+                }
+            }
+            return new BCF2DenseDictionary(strings);
+        }
+    }
+
+    /**
+     * Additional method in interface to avoid boxing when indexing into a
+     * dictionary backed by a List
+     *
+     * @param i index
+     * @return the string associated with the index or null
+     */
+    public abstract String get(final int i);
+
+    /**
+     * BCF 2.2 dense sequence dictionary. Strings are assigned an index corresponding to its position in a 0-indexed
+     * array. This dictionary is used if no IDX fields are present in the header, or they are present, but they
+     * represent a set of indices that are of the form 0, 1, ..., n, that is, the set has no gaps and is numbered
+     * starting at 0.
+     */
+    private static class BCF2DenseDictionary extends BCF2Dictionary {
+
+        private final List<String> dictionary;
+
+        private BCF2DenseDictionary(final List<String> dictionary) {
+            this.dictionary = dictionary;
+        }
+
+        @Override
+        public Set<Entry<Integer, String>> entrySet() {
+            final Set<Entry<Integer, String>> set = new HashSet<>(dictionary.size());
+            int i = 0;
+            for (final String s : dictionary) {
+                set.add(new AbstractMap.SimpleEntry<>(i, s));
+                i++;
+            }
+            return set;
+        }
+
+        @Override
+        public String get(final int i) {
+            return i < 0 || i >= dictionary.size() ? null : dictionary.get(i);
+        }
+
+        @Override
+        public String get(final Object key) {
+            return dictionary.get((Integer) key);
+        }
+
+        @Override
+        public int size() {
+            return dictionary.size();
+        }
+
+        @Override
+        public boolean isEmpty() {
+            return dictionary.isEmpty();
+        }
+
+        @Override
+        public void forEach(final BiConsumer<? super Integer, ? super String> action) {
+            int i = 0;
+            for (final String s : dictionary) {
+                action.accept(i, s);
+                i++;
+            }
+        }
+    }
+
+    /**
+     * BCF 2.2 sparse dictionary. Strings are assigned an index corresponding to its line's IDX field.
+     * This dictionary is used if IDX fields are present in the header, and they represent a set of
+     * indices that is not of the form 0, 1, ..., n, that is, the set has gaps or is not numbered starting
+     * at 0.
+     */
+    private static class BCF2SparseDictionary extends BCF2Dictionary {
+
+        private final Map<Integer, String> dictionary;
+
+        private BCF2SparseDictionary(final Map<Integer, String> dictionary) {
+            this.dictionary = dictionary;
+        }
+
+        @Override
+        public Set<Entry<Integer, String>> entrySet() {
+            return dictionary.entrySet();
+        }
+
+        @Override
+        public String get(final int i) {
+            return dictionary.get(i);
+        }
+
+        @Override
+        public String get(final Object key) {
+            return dictionary.get(key);
+        }
+
+        @Override
+        public int size() {
+            return dictionary.size();
+        }
+
+        @Override
+        public boolean isEmpty() {
+            return dictionary.isEmpty();
+        }
+
+        @Override
+        public void forEach(final BiConsumer<? super Integer, ? super String> action) {
+            dictionary.forEach(action);
+        }
+    }
+}
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Encoder.java b/src/main/java/htsjdk/variant/bcf2/BCF2Encoder.java
new file mode 100644
index 0000000000..ae99f8e7e1
--- /dev/null
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Encoder.java
@@ -0,0 +1,386 @@
+/*
+* Copyright (c) 2012 The Broad Institute
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation
+* files (the "Software"), to deal in the Software without
+* restriction, including without limitation the rights to use,
+* copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following
+* conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+package htsjdk.variant.bcf2;
+
+import htsjdk.samtools.util.ListByteBufferOutputStream;
+import htsjdk.tribble.TribbleException;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * See #BCFWriter for documentation on this classes role in encoding BCF2 files
+ *
+ * @author Mark DePristo
+ * @since 06/12
+ */
+public abstract class BCF2Encoder {
+    // TODO -- increase default size?
+    public static final int WRITE_BUFFER_INITIAL_SIZE = 16_384;
+    protected final ListByteBufferOutputStream encodeStream = new ListByteBufferOutputStream(WRITE_BUFFER_INITIAL_SIZE);
+
+    public static BCF2Encoder getEncoder(final BCFVersion version) {
+        switch (version.getMinorVersion()) {
+            case 1:
+                return new BCF2_1Encoder();
+            case 2:
+                return new BCF2_2Encoder();
+            default:
+                throw new TribbleException("BCF2Codec can only process BCF2 files with minor version <= " + 2 + " but this file has minor version " + version.getMinorVersion());
+        }
+    }
+
+
+    // --------------------------------------------------------------------------------
+    //
+    // Functions to return the data being encoded here
+    //
+    // --------------------------------------------------------------------------------
+
+    /**
+     * This allocates a new array and copies the stream's contents over so it
+     * should not be used in the actual encoder, but may be useful for testing
+     */
+    public byte[] getRecordBytes() {
+        final byte[] bytes = encodeStream.toByteArray();
+        encodeStream.reset();
+        return bytes;
+    }
+
+    public final int getSize() {
+        return encodeStream.size();
+    }
+
+    public final void write(final OutputStream out) throws IOException {
+        encodeStream.writeTo(out);
+        encodeStream.reset();
+    }
+
+
+    // --------------------------------------------------------------------------------
+    //
+    // Writing typed values (writes out typing byte(s) first)
+    //
+    // --------------------------------------------------------------------------------
+
+    public final void encodeTypedMissing(final BCF2Type type) throws IOException {
+        encodeType(0, type);
+    }
+
+    public final void encodeTyped(final Object value, final BCF2Type type) throws IOException {
+        if (value == null)
+            encodeTypedMissing(type);
+        else {
+            switch (type) {
+                case INT8:
+                case INT16:
+                case INT32:
+                    encodeTypedInt((Integer) value, type);
+                    break;
+                case FLOAT:
+                    encodeTypedFloat((Double) value);
+                    break;
+                case CHAR:
+                    encodeTypedString((String) value);
+                    break;
+                default:
+                    throw new IllegalArgumentException("Illegal type encountered " + type);
+            }
+        }
+    }
+
+    public final void encodeTypedInt(final int v) throws IOException {
+        final BCF2Type type = BCF2Utils.determineIntegerType(v);
+        encodeTypedInt(v, type);
+    }
+
+    public final void encodeTypedInt(final int v, final BCF2Type type) throws IOException {
+        encodeType(1, type);
+        encodeRawInt(v, type);
+    }
+
+    public final void encodeTypedFloat(final double v) throws IOException {
+        encodeType(1, BCF2Type.FLOAT);
+        encodeRawFloat(v);
+    }
+
+    public final void encodeTypedString(final String s) throws IOException {
+        encodeTypedString(s.getBytes(StandardCharsets.UTF_8));
+    }
+
+    public final void encodeTypedString(final byte[] s) throws IOException {
+        encodeType(s.length, BCF2Type.CHAR);
+        encodeStream.write(s);
+    }
+
+    public final void encodeTypedVecInt(final int[] vs) throws IOException {
+        final int size = vs.length;
+        final BCF2Type type = BCF2Utils.determineIntegerType(vs);
+        encodeType(size, type);
+        encodeRawVecInt(vs, size, type);
+    }
+
+
+    public final void encodeTypedVecInt(final int[] vs, final int paddedSize) throws IOException {
+        final BCF2Type type = BCF2Utils.determineIntegerType(vs);
+        encodeType(paddedSize, type);
+        encodeRawVecInt(vs, paddedSize, type);
+    }
+
+    // TODO only used in testing, should remove and update tests
+    public final void encodeTyped(final List<?> v, final BCF2Type type) throws IOException {
+        if (type == BCF2Type.CHAR && !v.isEmpty()) {
+            encodeTypedString(compactStrings((List<String>) v));
+        } else {
+            encodeType(v.size(), type);
+            encodeRawValues(v, type);
+        }
+    }
+
+
+    // --------------------------------------------------------------------------------
+    //
+    // Writing raw values (does not write out typing byte(s))
+    //
+    // --------------------------------------------------------------------------------
+
+    public final <T> void encodeRawValues(final Collection<T> v, final BCF2Type type) throws IOException {
+        for (final T v1 : v) {
+            encodeRawValue(v1, type);
+        }
+    }
+
+    public final <T> void encodeRawValue(final T value, final BCF2Type type) throws IOException {
+        try {
+            if (value == type.getMissingJavaValue())
+                encodeRawMissingValue(type);
+            else {
+                switch (type) {
+                    case INT8:
+                    case INT16:
+                    case INT32:
+                        encodeRawBytes((Integer) value, type);
+                        break;
+                    case FLOAT:
+                        encodeRawFloat((Double) value);
+                        break;
+                    case CHAR:
+                        encodeRawChar((Byte) value);
+                        break;
+                    default:
+                        throw new IllegalArgumentException("Illegal type encountered " + type);
+                }
+            }
+        } catch (final ClassCastException e) {
+            throw new ClassCastException("BUG: invalid type cast to " + type + " from " + value);
+        }
+    }
+
+    public final void encodeRawMissingValue(final BCF2Type type) throws IOException {
+        encodeRawBytes(type.getMissingBytes(), type);
+    }
+
+
+    // --------------------------------------------------------------------------------
+    //
+    // Low-level encoders
+    //
+    // --------------------------------------------------------------------------------
+
+    public final void encodeType(final int size, final BCF2Type type) throws IOException {
+        if (size <= BCF2Utils.MAX_INLINE_ELEMENTS) {
+            final int typeByte = BCF2Utils.encodeTypeDescriptor(size, type);
+            encodeStream.write(typeByte);
+        } else {
+            final int typeByte = BCF2Utils.encodeTypeDescriptor(BCF2Utils.OVERFLOW_ELEMENT_MARKER, type);
+            encodeStream.write(typeByte);
+            // write in the overflow size
+            encodeTypedInt(size);
+        }
+    }
+
+    public final void encodeRawBytes(final int v, final BCF2Type type) throws IOException {
+        type.write(v, encodeStream);
+    }
+
+    public final void encodeRawInt(final int v, final BCF2Type type) throws IOException {
+        type.write(v, encodeStream);
+    }
+
+    public final void encodeRawFloat(final double v) throws IOException {
+        encodeRawBytes(Float.floatToIntBits((float) v), BCF2Type.FLOAT);
+    }
+
+    public final void encodeRawChar(final byte c) {
+        encodeStream.write(c);
+    }
+
+    public final void encodeRawString(final byte[] s, final int paddedSize) {
+        encodeStream.write(s);
+        final int padding = paddedSize - s.length;
+        if (padding > 0) {
+            // Pad with zeros, see https://github.com/samtools/hts-specs/issues/232
+            encodeStream.write((byte) 0, padding);
+        }
+    }
+
+    public final void encodeRawVecInt(final int[] vs, final int paddedSize, final BCF2Type type) throws IOException {
+        for (final int v : vs) {
+            type.write(v, encodeStream);
+        }
+        encodePaddingValues(paddedSize - vs.length, type);
+    }
+
+    public final void encodeRawVecInt(final List<Integer> vs, final BCF2Type type) throws IOException {
+        for (final Integer v : vs) {
+            if (v == null) {
+                type.write(type.getMissingBytes(), encodeStream);
+            } else {
+                type.write(v, encodeStream);
+            }
+        }
+    }
+
+    public final void encodeRawVecInt(final List<Integer> vs, final int paddedSize, final BCF2Type type) throws IOException {
+        encodeRawVecInt(vs, type);
+        encodePaddingValues(paddedSize - vs.size(), type);
+    }
+
+    public final void encodeRawVecFloat(final double[] vs, final int paddedSize) throws IOException {
+        for (final double v : vs) {
+            encodeRawFloat(v);
+        }
+        encodePaddingValues(paddedSize - vs.length, BCF2Type.FLOAT);
+    }
+
+    public final void encodeRawVecFloat(final List<Double> vs) throws IOException {
+        for (final Double v : vs) {
+            if (v == null) {
+                encodeRawMissingValue(BCF2Type.FLOAT);
+            } else {
+                encodeRawFloat(v);
+            }
+        }
+    }
+
+    public final void encodeRawVecFloat(final List<Double> vs, final int paddedSize) throws IOException {
+        encodeRawVecFloat(vs);
+        encodePaddingValues(paddedSize - vs.size(), BCF2Type.FLOAT);
+    }
+
+    public final void encodePaddingValues(final int size, final BCF2Type type) throws IOException {
+        for (int i = 0; i < size; i++) {
+            encodePaddingValue(type);
+        }
+    }
+
+    public abstract void encodePaddingValue(final BCF2Type type) throws IOException;
+
+    // --------------------------------------------------------------------------------
+    //
+    // Utility Functions
+    //
+    // --------------------------------------------------------------------------------
+
+    public final byte[] compactStrings(final String[] strings) {
+        return compactStrings(Arrays.asList(strings));
+    }
+
+    public abstract byte[] compactStrings(final List<String> strings);
+
+
+    // --------------------------------------------------------------------------------
+    //
+    // Version specific behavior
+    //
+    // --------------------------------------------------------------------------------
+
+    public static class BCF2_1Encoder extends BCF2Encoder {
+
+        @Override
+        public void encodePaddingValue(final BCF2Type type) throws IOException {
+            type.write(type.getMissingBytes(), encodeStream);
+        }
+
+        @Override
+        public byte[] compactStrings(final List<String> strings) {
+            if (strings.isEmpty()) return new byte[0];
+
+            // 1 comma for each string, then add on individual string lengths
+            int size = strings.size();
+            final byte[][] bytes = new byte[strings.size()][];
+            int i = 0;
+            for (final String s : strings) {
+                final byte[] b = s.getBytes(StandardCharsets.UTF_8);
+                size += b.length;
+                bytes[i++] = b;
+            }
+            final ByteBuffer buff = ByteBuffer.allocate(size);
+            for (final byte[] bs : bytes) {
+                buff.put((byte) ',');
+                buff.put(bs);
+            }
+
+            return buff.array();
+        }
+    }
+
+    public static class BCF2_2Encoder extends BCF2Encoder {
+
+        @Override
+        public void encodePaddingValue(final BCF2Type type) throws IOException {
+            type.write(type.getEOVBytes(), encodeStream);
+        }
+
+        @Override
+        public byte[] compactStrings(final List<String> strings) {
+            if (strings.isEmpty()) return new byte[0];
+
+            // 1 comma for each string except the first, then add on individual string lengths
+            int size = strings.size() - 1;
+            final byte[][] bytes = new byte[strings.size()][];
+            int i = 0;
+            for (final String s : strings) {
+                final byte[] b = s.getBytes(StandardCharsets.UTF_8);
+                size += b.length;
+                bytes[i++] = b;
+            }
+            final ByteBuffer buff = ByteBuffer.allocate(size);
+            buff.put(bytes[0]);
+            for (int j = 1; j < strings.size(); j++) {
+                buff.put((byte) ',');
+                buff.put(bytes[j]);
+            }
+
+            return buff.array();
+        }
+    }
+}
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java
new file mode 100644
index 0000000000..3a6aeae2cb
--- /dev/null
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java
@@ -0,0 +1,314 @@
+package htsjdk.variant.bcf2.BCF2FieldWriter;
+
+import htsjdk.tribble.TribbleException;
+import htsjdk.variant.bcf2.BCF2Encoder;
+import htsjdk.variant.bcf2.BCF2Type;
+import htsjdk.variant.bcf2.BCF2Utils;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFCompoundHeaderLine;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+
+abstract class BCF2FieldEncoder {
+
+    final BCF2Encoder encoder;
+
+    BCF2Type type;
+
+    /*
+    The number of VCF values this encoder has seen, taking the maximum over all objects loaded.
+    This value is not identical to either the number of Java objects loaded or the BCF2 typing byte length
+    but is primarily useful for checking that the number of VCF values matches the header's declared count.
+
+    For example, for a writer of type Character having loaded the String "abc", nValues is 3 matching its typing byte,
+    while for a writer of type String having loaded the String "abc", nValues is 1, but its typing byte length is 3.
+     */
+    int nValues;
+
+    BCF2FieldEncoder(final BCF2Encoder encoder) {
+        this.encoder = encoder;
+    }
+
+    abstract void load(final Object o);
+
+    void encodeType() throws IOException {
+        encoder.encodeType(nValues, type);
+    }
+
+    void checkNValues(final VCFCompoundHeaderLine headerLine, final VariantContext vc) {
+        final int expectedValues = headerLine.getCount(vc);
+        if (nValues > expectedValues)
+            throw BCF2FieldWriter.tooManyValues(nValues, expectedValues, headerLine.getKey(), vc);
+        nValues = expectedValues;
+    }
+
+    abstract void encode() throws IOException;
+
+
+    static class AtomicIntFieldEncoder extends BCF2FieldEncoder {
+
+        private final List<Integer> vs = new ArrayList<>();
+
+        AtomicIntFieldEncoder(final BCF2Encoder encoder) {
+            super(encoder);
+            type = BCF2Type.INT8;
+            nValues = 1;
+        }
+
+        @Override
+        void load(final Object o) {
+            if (o == null) {
+                vs.add(null);
+            } else if (o instanceof Integer) {
+                final Integer v = (Integer) o;
+                type = BCF2Utils.maxIntegerType(type, BCF2Utils.determineIntegerType(v));
+                vs.add(v);
+            } else {
+                throw BCF2FieldEncoder.incompatibleType(o, type);
+            }
+        }
+
+        @Override
+        void encode() throws IOException {
+            encoder.encodeRawVecInt(vs, type);
+            vs.clear();
+            type = BCF2Type.INT8;
+        }
+    }
+
+    static class AtomicFloatFieldEncoder extends BCF2FieldEncoder {
+
+        private final List<Double> vs = new ArrayList<>();
+
+        AtomicFloatFieldEncoder(final BCF2Encoder encoder) {
+            super(encoder);
+            type = BCF2Type.FLOAT;
+            nValues = 1;
+        }
+
+        @Override
+        void load(final Object o) {
+            if (o == null) {
+                vs.add(null);
+            } else if (o instanceof Double) {
+                vs.add((Double) o);
+            } else {
+                throw BCF2FieldEncoder.incompatibleType(o, type);
+            }
+        }
+
+        @Override
+        void encode() throws IOException {
+            encoder.encodeRawVecFloat(vs);
+            vs.clear();
+        }
+    }
+
+    static class CharFieldEncoder extends BCF2FieldEncoder {
+
+        private static final byte[] EMPTY = new byte[0];
+
+        private final List<byte[]> vs = new ArrayList<>();
+
+        CharFieldEncoder(final BCF2Encoder encoder) {
+            super(encoder);
+            type = BCF2Type.CHAR;
+            nValues = 0;
+        }
+
+        @Override
+        void load(final Object o) {
+            if (o == null) {
+                vs.add(EMPTY);
+            } else if (o instanceof String) {
+                final byte[] b = ((String) o).getBytes(StandardCharsets.UTF_8);
+                nValues = Math.max(nValues, b.length);
+                vs.add(b);
+            } else {
+                throw BCF2FieldEncoder.incompatibleType(o, type);
+            }
+        }
+
+        @Override
+        void encode() {
+            for (final byte[] v : vs) {
+                encoder.encodeRawString(v, nValues);
+            }
+            vs.clear();
+            nValues = 0;
+        }
+    }
+
+    static class StringFieldEncoder extends BCF2FieldEncoder {
+
+        private static final byte[] EMPTY = new byte[0];
+
+        private final List<byte[]> vs = new ArrayList<>();
+        private int charLength;
+
+        StringFieldEncoder(final BCF2Encoder encoder) {
+            super(encoder);
+            type = BCF2Type.CHAR;
+            nValues = 0;
+            charLength = 0;
+        }
+
+        @Override
+        void load(final Object o) {
+            if (o == null) {
+                vs.add(EMPTY);
+            } else {
+                final byte[] v;
+                final int stringsSeen;
+                if (o instanceof String) {
+                    v = ((String) o).getBytes(StandardCharsets.UTF_8);
+                    stringsSeen = 1;
+                } else if (o instanceof List) {
+                    final List<String> strings = (List<String>) o;
+                    v = encoder.compactStrings(strings);
+                    stringsSeen = strings.size();
+                } else if (o instanceof String[]) {
+                    final String[] strings = (String[]) o;
+                    v = encoder.compactStrings(strings);
+                    stringsSeen = strings.length;
+                } else {
+                    throw BCF2FieldEncoder.incompatibleType(o, type);
+                }
+
+                vs.add(v);
+                nValues = Math.max(nValues, stringsSeen);
+                charLength = Math.max(charLength, v.length);
+            }
+        }
+
+        @Override
+        void encodeType() throws IOException {
+            encoder.encodeType(charLength, type);
+        }
+
+        @Override
+        void encode() {
+            for (final byte[] v : vs) {
+                encoder.encodeRawString(v, charLength);
+            }
+            vs.clear();
+            nValues = 0;
+            charLength = 0;
+        }
+    }
+
+    static class VecIntFieldEncoder extends BCF2FieldEncoder {
+
+        private final List<Object> vs = new ArrayList<>();
+
+        VecIntFieldEncoder(final BCF2Encoder encoder) {
+            super(encoder);
+            type = BCF2Type.INT8;
+            nValues = 0;
+        }
+
+        @Override
+        void load(final Object o) {
+            if (o != null) {
+                if (o instanceof List) {
+                    final List<Integer> v = (List<Integer>) o;
+                    type = BCF2Utils.maxIntegerType(type, BCF2Utils.determineIntegerType(v));
+                    nValues = Math.max(nValues, v.size());
+                } else if (o instanceof Integer) {
+                    final Integer v = (Integer) o;
+                    type = BCF2Utils.maxIntegerType(type, BCF2Utils.determineIntegerType(v));
+                    nValues = Math.max(nValues, 1);
+                } else if (o instanceof int[]) {
+                    final int[] v = (int[]) o;
+                    type = BCF2Utils.maxIntegerType(type, BCF2Utils.determineIntegerType(v));
+                    nValues = Math.max(nValues, v.length);
+                } else {
+                    // TODO do we need to support Integer[] ?
+                    throw BCF2FieldEncoder.incompatibleType(o, type);
+                }
+            }
+            vs.add(o);
+        }
+
+        @Override
+        void encode() throws IOException {
+            for (final Object o : vs) {
+                if (o == null) {
+                    encoder.encodePaddingValues(nValues, type);
+                } else if (o instanceof List) {
+                    final List<Integer> v = (List<Integer>) o;
+                    encoder.encodeRawVecInt(v, nValues, type);
+                } else if (o instanceof Integer) {
+                    final Integer v = (Integer) o;
+                    encoder.encodeRawInt(v, type);
+                    encoder.encodePaddingValues(nValues - 1, type);
+                } else if (o instanceof int[]) {
+                    final int[] v = (int[]) o;
+                    encoder.encodeRawVecInt(v, nValues, type);
+                }
+            }
+            vs.clear();
+            type = BCF2Type.INT8;
+            nValues = 0;
+        }
+    }
+
+    static class VecFloatFieldEncoder extends BCF2FieldEncoder {
+
+        private final List<Object> vs = new ArrayList<>();
+
+        VecFloatFieldEncoder(final BCF2Encoder encoder) {
+            super(encoder);
+            type = BCF2Type.FLOAT;
+            nValues = 0;
+        }
+
+        @Override
+        void load(final Object o) {
+            if (o != null) {
+                if (o instanceof List) {
+                    final List<Double> v = (List<Double>) o;
+                    nValues = Math.max(nValues, v.size());
+                } else if (o instanceof Double) {
+                    nValues = Math.max(nValues, 1);
+                } else if (o instanceof double[]) {
+                    final double[] v = (double[]) o;
+                    nValues = Math.max(nValues, v.length);
+                } else {
+                    // TODO do we need to support Double[] ?
+                    throw BCF2FieldEncoder.incompatibleType(o, type);
+                }
+            }
+            vs.add(o);
+        }
+
+        @Override
+        void encode() throws IOException {
+            for (final Object o : vs) {
+                if (o == null) {
+                    encoder.encodePaddingValues(nValues, type);
+                } else if (o instanceof List) {
+                    final List<Double> v = (List<Double>) o;
+                    encoder.encodeRawVecFloat(v, nValues);
+                } else if (o instanceof Double) {
+                    final Double v = (Double) o;
+                    encoder.encodeRawFloat(v);
+                    encoder.encodePaddingValues(nValues - 1, BCF2Type.FLOAT);
+                } else if (o instanceof double[]) {
+                    final double[] v = (double[]) o;
+                    encoder.encodeRawVecFloat(v, nValues);
+                }
+            }
+            vs.clear();
+            nValues = 0;
+        }
+    }
+
+    static TribbleException incompatibleType(final Object o, final BCF2Type type) {
+        final String error = "Could not write object: %s whose type is incompatible with declared header of type: %s";
+        return new TribbleException(String.format(error, o, type));
+    }
+}
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriter.java b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriter.java
new file mode 100644
index 0000000000..3b645bf981
--- /dev/null
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriter.java
@@ -0,0 +1,515 @@
+package htsjdk.variant.bcf2.BCF2FieldWriter;
+
+import htsjdk.tribble.TribbleException;
+import htsjdk.variant.bcf2.BCF2Type;
+import htsjdk.variant.bcf2.BCF2Utils;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.Genotype;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.bcf2.BCF2Encoder;
+import htsjdk.variant.vcf.VCFCompoundHeaderLine;
+import htsjdk.variant.vcf.VCFConstants;
+import htsjdk.variant.vcf.VCFFormatHeaderLine;
+import htsjdk.variant.vcf.VCFHeaderLineCount;
+import htsjdk.variant.vcf.VCFHeaderLineType;
+import htsjdk.variant.vcf.VCFInfoHeaderLine;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+/**
+ * INFO and FORMAT writers
+ */
+class BCF2FieldWriter {
+    final VCFCompoundHeaderLine headerLine;
+    final int dictionaryOffset;
+    final BCF2Type dictionaryOffsetType;
+    final String key;
+    final BCF2Encoder encoder;
+
+    BCF2FieldWriter(final VCFCompoundHeaderLine headerLine, final int dictionaryOffset, final BCF2Encoder encoder) {
+        this.headerLine = headerLine;
+        this.dictionaryOffset = dictionaryOffset;
+        this.dictionaryOffsetType = BCF2Utils.determineIntegerType(dictionaryOffset);
+        this.key = headerLine.getID();
+        this.encoder = encoder;
+    }
+
+    /**
+     * This should be called before encoding every VariantContext in both INFO and FORMAT writers
+     */
+    void encodeKey() throws IOException {
+        encoder.encodeTypedInt(dictionaryOffset, dictionaryOffsetType);
+    }
+
+
+    //////////////////////////////////////////////////
+    // Factory Methods                              //
+    //////////////////////////////////////////////////
+    static SiteWriter createSiteWriter(
+        final VCFInfoHeaderLine line,
+        final int offset,
+        final BCF2Encoder encoder
+    ) {
+        return line.getType() == VCFHeaderLineType.Flag
+            ? new SiteFlagWriter(line, offset, encoder)
+            : new SiteAttributeWriter(line, offset, encoder);
+    }
+
+    static GenotypeWriter createGenotypeWriter(
+        final VCFFormatHeaderLine line,
+        final int offset,
+        final BCF2Encoder encoder
+    ) {
+        // Specialized writers for fields stored inline in the Genotype and not in its attributes map
+        switch (line.getID()) {
+            case VCFConstants.GENOTYPE_KEY:
+                return new GTWriter(line, offset, encoder);
+            case VCFConstants.GENOTYPE_FILTER_KEY:
+                return new FTWriter(line, offset, encoder);
+            case VCFConstants.DEPTH_KEY:
+                return new DPWriter(line, offset, encoder);
+            case VCFConstants.GENOTYPE_QUALITY_KEY:
+                return new GQWriter(line, offset, encoder);
+            case VCFConstants.GENOTYPE_ALLELE_DEPTHS:
+                return new ADWriter(line, offset, encoder);
+            case VCFConstants.GENOTYPE_PL_KEY:
+                return new PLWriter(line, offset, encoder);
+        }
+
+        if (line.getType() == VCFHeaderLineType.Flag) {
+            throw new TribbleException("Format lines cannot have type Flag");
+        } else {
+            return new GenotypeAttributeWriter(line, offset, encoder);
+        }
+    }
+
+    private static BCF2FieldEncoder getEncoder(final VCFCompoundHeaderLine line, final BCF2Encoder encoder) {
+        switch (line.getType()) {
+            case Integer:
+                return line.isFixedCount() && line.getCount() == 1
+                    ? new BCF2FieldEncoder.AtomicIntFieldEncoder(encoder)
+                    : new BCF2FieldEncoder.VecIntFieldEncoder(encoder);
+            case Float:
+                return line.isFixedCount() && line.getCount() == 1
+                    ? new BCF2FieldEncoder.AtomicFloatFieldEncoder(encoder)
+                    : new BCF2FieldEncoder.VecFloatFieldEncoder(encoder);
+            case String:
+                return new BCF2FieldEncoder.StringFieldEncoder(encoder);
+            case Character:
+                return new BCF2FieldEncoder.CharFieldEncoder(encoder);
+            default:
+                throw new TribbleException("Unrecognized line type: " + line.getType());
+        }
+    }
+
+
+    /**
+     * Class that writes one field specified by a {@link VCFInfoHeaderLine}
+     * contained the attributes map of a {@link VariantContext}
+     */
+    abstract static class SiteWriter extends BCF2FieldWriter {
+
+        SiteWriter(final VCFInfoHeaderLine headerLine, final int dictionaryOffset, final BCF2Encoder encoder) {
+            super(headerLine, dictionaryOffset, encoder);
+        }
+
+        abstract void encode(final VariantContext vc) throws IOException;
+    }
+
+    /**
+     * INFO writer that accesses variant context fields stored in the VC's attributes map
+     */
+    static class SiteAttributeWriter extends SiteWriter {
+
+        private final BCF2FieldEncoder siteEncoder;
+        private final boolean boundedNonAtomic;
+
+        SiteAttributeWriter(final VCFInfoHeaderLine headerLine, final int dictionaryOffset, final BCF2Encoder encoder) {
+            super(headerLine, dictionaryOffset, encoder);
+            this.siteEncoder = BCF2FieldWriter.getEncoder(headerLine, encoder);
+
+            // If this line's count is unbounded, or the inner encoder is one of the atomic specializations,
+            // the inner encoder can always figure out the correct number of BCF2 values to write out by itself.
+            // Otherwise we need to inspect the context to determine the number of values to encode
+            // and possibly error if too many values were provided
+            this.boundedNonAtomic = headerLine.getCountType() != VCFHeaderLineCount.UNBOUNDED && !(
+                siteEncoder instanceof BCF2FieldEncoder.AtomicIntFieldEncoder || siteEncoder instanceof BCF2FieldEncoder.AtomicFloatFieldEncoder
+            );
+        }
+
+        @Override
+        void encode(final VariantContext vc) throws IOException {
+            final Object o = vc.getAttribute(key);
+            if (o == null) {
+                encoder.encodeTypedMissing(siteEncoder.type);
+            } else {
+                siteEncoder.load(o);
+                if (boundedNonAtomic) {
+                    siteEncoder.checkNValues(headerLine, vc);
+                }
+
+                siteEncoder.encodeType();
+                siteEncoder.encode();
+            }
+        }
+    }
+
+    /**
+     * INFO writer that accesses Flags stored in the VariantContext's attributes map
+     */
+    static class SiteFlagWriter extends SiteWriter {
+
+        SiteFlagWriter(final VCFInfoHeaderLine headerLine, final int dictionaryOffset, final BCF2Encoder encoder) {
+            super(headerLine, dictionaryOffset, encoder);
+        }
+
+        @Override
+        void encode(final VariantContext vc) throws IOException {
+            // This seems unintuitive, but it matches the behavior of htslib/bcftools
+            // See https://github.com/samtools/hts-specs/issues/384
+            encoder.encodeRawBytes(0, BCF2Type.INT8);
+        }
+    }
+
+
+    // TODO in the genotype writers, a missing genotype (one where variantContext.getGenotype(sampleName) == null)
+    //  is treated like one where all its attributes/inline fields are missing, this matches the behavior
+    //  of the old writer, which previously created a new empty Genotype object for each missing genotypes, is this right?
+    //  For example, should the FT string of a missing genotype be PASS or a padded empty string
+
+    /**
+     * Class that writes one field specified by a {@link VCFFormatHeaderLine}
+     * from all Genotypes contained inside a {@link VariantContext}, iterating through each Genotype in order.
+     * <p>
+     * Writing occurs in two passes: first all the attribute objects are loaded into the lower level
+     * {@link BCF2FieldEncoder} then the attributes are written out. This is necessary as some aspects of the BCF
+     * encoding such as type and sometimes count can only be determined by inspecting all elements to be written.
+     */
+    abstract static class GenotypeWriter extends BCF2FieldWriter {
+
+        GenotypeWriter(final VCFFormatHeaderLine headerLine, final int dictionaryOffset, final BCF2Encoder encoder) {
+            super(headerLine, dictionaryOffset, encoder);
+        }
+
+        abstract void encode(final VariantContext vc, final List<String> sampleNames) throws IOException;
+    }
+
+    /**
+     * FORMAT writer that accesses genotype fields stored in the Genotype object's attributes map
+     */
+    static class GenotypeAttributeWriter extends GenotypeWriter {
+
+        private final BCF2FieldEncoder siteEncoder;
+        private final boolean boundedNonAtomic;
+
+        GenotypeAttributeWriter(final VCFFormatHeaderLine headerLine, final int dictionaryOffset, final BCF2Encoder encoder) {
+            super(headerLine, dictionaryOffset, encoder);
+            this.siteEncoder = BCF2FieldWriter.getEncoder(headerLine, encoder);
+
+            // If this line's count is unbounded, or the inner encoder is one of the atomic specializations,
+            // the inner encoder can always figure out the correct number of BCF2 values to write out by itself.
+            // Otherwise we need to inspect the context to determine the number of values to encode
+            // and possibly error if too many values were provided
+            this.boundedNonAtomic = headerLine.getCountType() != VCFHeaderLineCount.UNBOUNDED && !(
+                siteEncoder instanceof BCF2FieldEncoder.AtomicIntFieldEncoder || siteEncoder instanceof BCF2FieldEncoder.AtomicFloatFieldEncoder
+            );
+        }
+
+        @Override
+        void encode(final VariantContext vc, final List<String> sampleNames) throws IOException {
+            for (final String s : sampleNames) {
+                final Genotype g = vc.getGenotype(s);
+                siteEncoder.load(g == null ? null : g.getExtendedAttribute(key));
+            }
+
+            if (boundedNonAtomic) {
+                siteEncoder.checkNValues(headerLine, vc);
+            }
+
+            siteEncoder.encodeType();
+            siteEncoder.encode();
+        }
+    }
+
+    /**
+     * Base class for FORMAT writers that access genotype fields stored directly
+     * as int fields in the Genotype object and not inside the attributes map.
+     */
+    abstract static class GenotypeInlineAtomicIntWriter extends GenotypeWriter {
+
+        // Used to store values to write out to avoid boxing
+        private int[] vs;
+
+        GenotypeInlineAtomicIntWriter(final VCFFormatHeaderLine headerLine, final int dictionaryOffset, final BCF2Encoder encoder) {
+            super(headerLine, dictionaryOffset, encoder);
+        }
+
+        @Override
+        void encode(final VariantContext vc, final List<String> sampleNames) throws IOException {
+            if (vs == null || vs.length < sampleNames.size()) {
+                vs = new int[sampleNames.size()];
+            }
+
+            BCF2Type type = BCF2Type.INT8;
+            int i = 0;
+
+            for (final String s : sampleNames) {
+                final Genotype g = vc.getGenotype(s);
+                final int v = g == null ? -1 : get(g);
+                if (v != -1) {
+                    type = BCF2Utils.maxIntegerType(type, BCF2Utils.determineIntegerType(v));
+                }
+                vs[i++] = v;
+            }
+
+            encoder.encodeType(1, type);
+
+            for (int j = 0; j < i; j++) {
+                final int v = vs[j];
+                if (v == -1) {
+                    encoder.encodeRawMissingValue(type);
+                } else {
+                    encoder.encodeRawInt(v, type);
+                }
+            }
+        }
+
+        abstract int get(final Genotype g);
+    }
+
+    static class DPWriter extends GenotypeInlineAtomicIntWriter {
+
+        DPWriter(final VCFFormatHeaderLine headerLine, final int dictionaryOffset, final BCF2Encoder encoder) {
+            super(headerLine, dictionaryOffset, encoder);
+        }
+
+        @Override
+        int get(final Genotype g) {
+            return g.getDP();
+        }
+    }
+
+    static class GQWriter extends GenotypeInlineAtomicIntWriter {
+
+        GQWriter(final VCFFormatHeaderLine headerLine, final int dictionaryOffset, final BCF2Encoder encoder) {
+            super(headerLine, dictionaryOffset, encoder);
+        }
+
+        @Override
+        int get(final Genotype g) {
+            return g.getGQ();
+        }
+    }
+
+    /**
+     * Base class for FORMAT writers that access genotype fields stored directly
+     * as int[] fields in the Genotype object and not inside the attributes map.
+     */
+    abstract static class GenotypeInlineVecIntWriter extends GenotypeWriter {
+
+        private final List<int[]> vs = new ArrayList<>();
+
+        GenotypeInlineVecIntWriter(final VCFFormatHeaderLine headerLine, final int dictionaryOffset, final BCF2Encoder encoder) {
+            super(headerLine, dictionaryOffset, encoder);
+        }
+
+        @Override
+        void encode(final VariantContext vc, final List<String> sampleNames) throws IOException {
+            BCF2Type type = BCF2Type.INT8;
+
+            // For both vector of int types represented as inline fields by htsjdk (AD and PL),
+            // the count type can be determined by inspecting the header
+            final int nValues = headerLine.getCount(vc);
+
+            // Find narrowest integer type that fits all values
+            for (final String s : sampleNames) {
+                final Genotype g = vc.getGenotype(s);
+                final int[] v = g == null ? null : get(g);
+                vs.add(v);
+
+                if (v == null) continue;
+                if (v.length > nValues)
+                    throw BCF2FieldWriter.tooManyValues(v.length, nValues, key, vc);
+
+                type = BCF2Utils.maxIntegerType(type, BCF2Utils.determineIntegerType(v));
+            }
+
+            encoder.encodeType(nValues, type);
+
+            for (final int[] vs : vs) {
+                if (vs == null) {
+                    encoder.encodePaddingValues(nValues, type);
+                } else {
+                    encoder.encodeRawVecInt(vs, nValues, type);
+                }
+            }
+            vs.clear();
+        }
+
+        abstract int[] get(final Genotype g);
+    }
+
+    static class ADWriter extends GenotypeInlineVecIntWriter {
+
+        ADWriter(final VCFFormatHeaderLine headerLine, final int dictionaryOffset, final BCF2Encoder encoder) {
+            super(headerLine, dictionaryOffset, encoder);
+        }
+
+        @Override
+        int[] get(final Genotype g) {
+            return g.getAD();
+        }
+    }
+
+    static class PLWriter extends GenotypeInlineVecIntWriter {
+
+        PLWriter(final VCFFormatHeaderLine headerLine, final int dictionaryOffset, final BCF2Encoder encoder) {
+            super(headerLine, dictionaryOffset, encoder);
+        }
+
+        @Override
+        int[] get(final Genotype g) {
+            return g.getPL();
+        }
+    }
+
+    /**
+     * Writer for the FT or filter field. This is a special case of the String writer
+     * where the type of the value is known to be String (and not List<String>)
+     * and null values must be specially handled by encoding them as PASS.
+     */
+    static class FTWriter extends GenotypeWriter {
+
+        private static final byte[] PASS = "PASS".getBytes(StandardCharsets.US_ASCII);
+
+        private final List<byte[]> vs = new ArrayList<>();
+
+        FTWriter(final VCFFormatHeaderLine headerLine, final int dictionaryOffset, final BCF2Encoder encoder) {
+            super(headerLine, dictionaryOffset, encoder);
+        }
+
+        @Override
+        void encode(final VariantContext vc, final List<String> sampleNames) throws IOException {
+            int nValues = 0;
+
+            for (final String s : sampleNames) {
+                final Genotype g = vc.getGenotype(s);
+                final String f;
+                final byte[] v;
+                if (g == null || (f = g.getFilters()) == null) {
+                    v = FTWriter.PASS;
+                } else {
+                    v = f.getBytes(StandardCharsets.UTF_8);
+                }
+                nValues = Math.max(nValues, v.length);
+                vs.add(v);
+            }
+
+            encoder.encodeType(nValues, BCF2Type.CHAR);
+            for (final byte[] v : vs) {
+                encoder.encodeRawString(v, nValues);
+            }
+            vs.clear();
+        }
+    }
+
+    /**
+     * Specialized writer for GT field.
+     */
+    static class GTWriter extends GenotypeWriter {
+
+        private final HashMap<Allele, Integer> alleleMapForTriPlus = new HashMap<>(5);
+        private Allele ref, alt1;
+
+        GTWriter(final VCFFormatHeaderLine headerLine, final int dictionaryOffset, final BCF2Encoder encoder) {
+            super(headerLine, dictionaryOffset, encoder);
+        }
+
+        @Override
+        void encode(final VariantContext vc, final List<String> sampleNames) throws IOException {
+            buildAlleleMap(vc);
+            final int nValues = vc.getMaxPloidy(2);
+            // Offsets should always fit into a signed 8-bit integer but do this check anyway for spec compliance
+            final BCF2Type type = BCF2Utils.determineIntegerType(vc.getNAlleles() << 1);
+
+            encoder.encodeType(nValues, type);
+
+            for (final String s : sampleNames) {
+                final Genotype g = vc.getGenotype(s);
+                if (g != null) {
+                    boolean notFirst = false;
+                    for (final Allele a : g.getAlleles()) {
+                        // TODO Genotype and Allele classes can't properly store phasing information for ploidy > 2
+                        //  Currently all non ref alleles are assumed to have the same phasing
+                        final int encoded = encodeAlleleWithoutPhasing(a) | ((g.isPhased() && notFirst) ? 0x01 : 0x00);
+                        encoder.encodeRawInt(encoded, type);
+                        notFirst = true;
+                    }
+                    // Pad with missing values if sample ploidy is less than maximum
+                    final int padding = nValues - g.getPloidy();
+                    if (padding > 0) {
+                        encoder.encodePaddingValues(padding, type);
+                    }
+                } else {
+                    // TODO read the spec more closely, look at htslib, this may not be correct
+                    // Entirely missing genotype, which we encode as vector of no call
+                    for (int i = 0; i < nValues; i++) {
+                        encoder.encodeRawInt(0, type);
+                    }
+                }
+            }
+        }
+
+        /**
+         * Fast path code to encode an allele without phasing information.
+         * Inline tests for == against ref (most common, first test)
+         * == alt1 (second most common, second test)
+         * == NO_CALL (third)
+         * and finally in the map from allele => offset for all alt 2+ alleles
+         *
+         * @param a the allele we want to encode
+         * @return the encoded allele without phasing information
+         */
+        private int encodeAlleleWithoutPhasing(final Allele a) {
+            if (a == ref) return 2;                 // ( 0 + 1) << 1
+            else if (a == alt1) return 4;           // ( 1 + 1) << 1
+            else if (a == Allele.NO_CALL) return 0; // (-1 + 1) << 1
+            else {
+                final Integer i = alleleMapForTriPlus.get(a);
+                if (i == null) throw new IllegalStateException("BUG: Couldn't find allele offset for allele " + a);
+                return i;
+            }
+        }
+
+        private void buildAlleleMap(final VariantContext vc) {
+            // ref and alt1 are handled by a fast path when determining the offset
+            // so they do not need to be placed in the map
+            final int nAlleles = vc.getNAlleles();
+            ref = vc.getReference();
+            alt1 = nAlleles > 1 ? vc.getAlternateAllele(0) : null;
+
+            if (nAlleles > 2) {
+                // for multi-allelics we need to clear the map, and add additional looks
+                alleleMapForTriPlus.clear();
+                final List<Allele> alleles = vc.getAlleles();
+                for (int i = 2; i < alleles.size(); i++) {
+                    // Perform encoding here so we only do it once instead of after every lookup
+                    alleleMapForTriPlus.put(alleles.get(i), (i + 1) << 1);
+                }
+            }
+        }
+    }
+
+
+    //////////////////////////////////////////////////
+    // Exception utilities                          //
+    //////////////////////////////////////////////////
+    static TribbleException tooManyValues(final int observed, final int expected, final String key, final VariantContext vc) {
+        final String error = "Observed number of values: %d exceeds expected number: %d for attribute: %s in VariantContext: %s";
+        return new TribbleException(String.format(error, observed, expected, key, vc));
+    }
+}
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriterManager.java b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriterManager.java
new file mode 100644
index 0000000000..b73a88036d
--- /dev/null
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriterManager.java
@@ -0,0 +1,106 @@
+package htsjdk.variant.bcf2.BCF2FieldWriter;
+
+import htsjdk.samtools.util.Log;
+import htsjdk.tribble.TribbleException;
+import htsjdk.variant.bcf2.BCF2Encoder;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFCompoundHeaderLine;
+import htsjdk.variant.vcf.VCFFormatHeaderLine;
+import htsjdk.variant.vcf.VCFHeader;
+import htsjdk.variant.vcf.VCFHeaderLineCount;
+import htsjdk.variant.vcf.VCFHeaderLineType;
+import htsjdk.variant.vcf.VCFInfoHeaderLine;
+import htsjdk.variant.vcf.VCFStandardHeaderLines;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class BCF2FieldWriterManager {
+    private static final Log log = Log.getInstance(BCF2FieldWriterManager.class);
+
+    private final Map<String, BCF2FieldWriter.SiteWriter> infoWriters;
+    private final Map<String, BCF2FieldWriter.GenotypeWriter> formatWriters;
+    private final List<String> sampleNames;
+
+    public BCF2FieldWriterManager(final VCFHeader header, final Map<String, Integer> dict, final BCF2Encoder encoder) {
+        infoWriters = new HashMap<>(header.getInfoHeaderLines().size());
+        for (final VCFInfoHeaderLine line : header.getInfoHeaderLines()) {
+            final String field = line.getID();
+            validateStandardHeader(line, VCFStandardHeaderLines.getInfoLine(field, false));
+            final int offset = dict.get(field);
+            final BCF2FieldWriter.SiteWriter writer = BCF2FieldWriter.createSiteWriter(line, offset, encoder);
+            infoWriters.put(field, writer);
+        }
+
+        formatWriters = new HashMap<>(header.getFormatHeaderLines().size());
+        for (final VCFFormatHeaderLine line : header.getFormatHeaderLines()) {
+            final String field = line.getID();
+            validateStandardHeader(line, VCFStandardHeaderLines.getFormatLine(field, false));
+            final int offset = dict.get(field);
+            final BCF2FieldWriter.GenotypeWriter writer = BCF2FieldWriter.createGenotypeWriter(line, offset, encoder);
+            formatWriters.put(field, writer);
+        }
+
+        sampleNames = header.getGenotypeSamples();
+    }
+
+    public void writeInfo(final VariantContext vc) throws IOException {
+        for (final String field : vc.getAttributes().keySet()) {
+            final BCF2FieldWriter.SiteWriter writer = infoWriters.get(field);
+            if (writer == null) errorUnexpectedFieldToWrite(vc, field, "INFO");
+            writer.encodeKey();
+            writer.encode(vc);
+        }
+    }
+
+    public void writeFormat(final VariantContext vc, final List<String> genotypeFields) throws IOException {
+        for (final String field : genotypeFields) {
+            final BCF2FieldWriter.GenotypeWriter writer = formatWriters.get(field);
+            if (writer == null) errorUnexpectedFieldToWrite(vc, field, "FORMAT");
+            writer.encodeKey();
+            writer.encode(vc, sampleNames);
+        }
+    }
+
+    private static <T extends VCFCompoundHeaderLine> void validateStandardHeader(
+        final T actualLine,
+        final T expectedLine
+    ) {
+        if (expectedLine == null) return;
+        final VCFHeaderLineType actualType = actualLine.getType();
+        final VCFHeaderLineType expectedType = expectedLine.getType();
+        if (actualType != expectedType) {
+            log.error(String.format(
+                "Header with standard key: `%s` has type: %s which does not match standard type: %s",
+                actualLine.getID(),
+                actualType,
+                expectedType
+            ));
+        }
+
+        final VCFHeaderLineCount actualCountType = actualLine.getCountType();
+        final VCFHeaderLineCount expectedCountType = expectedLine.getCountType();
+        if (actualCountType != expectedCountType || actualLine.isFixedCount() && actualLine.getCount() != expectedLine.getCount()) {
+            log.error(String.format(
+                "Header with standard key: `%s` has count: %s which does not match standard count: %s",
+                actualLine.getID(),
+                actualLine.isFixedCount() ? actualLine.getCount() : actualCountType,
+                expectedLine.isFixedCount() ? expectedLine.getCount() : expectedCountType
+            ));
+        }
+    }
+
+    private static void errorUnexpectedFieldToWrite(
+        final VariantContext vc,
+        final String field,
+        final String fieldType
+    ) {
+        throw new TribbleException(String.format(
+            "Found %s field %s of VariantContext at %s:%d from %s that has not been defined in the VCFHeader",
+            fieldType, field,
+            vc.getContig(), vc.getStart(), vc.getSource()
+        ));
+    }
+}
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2GenotypeFieldDecoders.java b/src/main/java/htsjdk/variant/bcf2/BCF2GenotypeFieldDecoders.java
index c406b6602d..173e095687 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2GenotypeFieldDecoders.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2GenotypeFieldDecoders.java
@@ -33,6 +33,7 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 
@@ -50,14 +51,13 @@ public class BCF2GenotypeFieldDecoders {
     private final static int MIN_SAMPLES_FOR_FASTPATH_GENOTYPES = 0; // TODO -- update to reasonable number
 
     // initialized once per writer to allow parallel writers to work
-    private final HashMap<String, Decoder> genotypeFieldDecoder = new HashMap<String, Decoder>();
+    private final HashMap<String, Decoder> genotypeFieldDecoder = new HashMap<>();
     private final Decoder defaultDecoder = new GenericDecoder();
 
     public BCF2GenotypeFieldDecoders(final VCFHeader header) {
         // TODO -- fill in appropriate decoders for each FORMAT field in the header
 
         genotypeFieldDecoder.put(VCFConstants.GENOTYPE_KEY, new GTDecoder());
-        // currently the generic decoder handles FILTER values properly, in so far as we don't tolerate multiple filter field values per genotype
         genotypeFieldDecoder.put(VCFConstants.GENOTYPE_FILTER_KEY, new FTDecoder());
         genotypeFieldDecoder.put(VCFConstants.DEPTH_KEY, new DPDecoder());
         genotypeFieldDecoder.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, new ADDecoder());
@@ -74,41 +74,41 @@ public BCF2GenotypeFieldDecoders(final VCFHeader header) {
     /**
      * Return decoder appropriate for field, or the generic decoder if no
      * specialized one is bound
+     *
      * @param field the GT field to decode
      * @return a non-null decoder
      */
     public Decoder getDecoder(final String field) {
-        final Decoder d = genotypeFieldDecoder.get(field);
-        return d == null ? defaultDecoder : d;
+        return genotypeFieldDecoder.getOrDefault(field, defaultDecoder);
     }
 
     /**
      * Decoder a field (implicit from creation) encoded as
      * typeDescriptor in the decoder object in the GenotypeBuilders
      * one for each sample in order.
-     *
+     * <p>
      * The way this works is that this decode method
      * iterates over the builders, decoding a genotype field
      * in BCF2 for each sample from decoder.
-     *
+     * <p>
      * This system allows us to easily use specialized
      * decoders for specific genotype field values. For example,
      * we use a special decoder to directly read the BCF2 data for
      * the PL field into a int[] rather than the generic List of Integer
      */
     public interface Decoder {
-        public void decode(final List<Allele> siteAlleles,
-                           final String field,
-                           final BCF2Decoder decoder,
-                           final byte typeDescriptor,
-                           final int numElements,
-                           final GenotypeBuilder[] gbs) throws IOException;
+        void decode(final List<Allele> siteAlleles,
+                    final String field,
+                    final BCF2Decoder decoder,
+                    final byte typeDescriptor,
+                    final int numElements,
+                    final GenotypeBuilder[] gbs) throws IOException;
     }
 
-    private class GTDecoder implements Decoder {
+    private static class GTDecoder implements Decoder {
         @Override
         public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
-            if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && numElements == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
+            if (ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && numElements == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES)
                 fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs);
             else {
                 generalDecode(siteAlleles, numElements, decoder, typeDescriptor, gbs);
@@ -117,44 +117,47 @@ public void decode(final List<Allele> siteAlleles, final String field, final BCF
 
         /**
          * fast path for many samples with diploid genotypes
-         *
+         * <p>
          * The way this would work is simple.  Create a List<Allele> diploidGenotypes[] object
          * After decoding the offset, if that sample is diploid compute the
          * offset into the alleles vector which is simply offset = allele0 * nAlleles + allele1
          * if there's a value at diploidGenotypes[offset], use it, otherwise create the genotype
          * cache it and use that
-         *
+         * <p>
          * Some notes.  If there are nAlleles at the site, there are implicitly actually
-         * n + 1 options including
+         * n + 1 options including ref
          */
         @SuppressWarnings({"unchecked"})
-        private final void fastBiallelicDiploidDecode(final List<Allele> siteAlleles,
-                                                      final BCF2Decoder decoder,
-                                                      final byte typeDescriptor,
-                                                      final GenotypeBuilder[] gbs) throws IOException {
+        private void fastBiallelicDiploidDecode(final List<Allele> siteAlleles,
+                                                final BCF2Decoder decoder,
+                                                final byte typeDescriptor,
+                                                final GenotypeBuilder[] gbs) throws IOException {
             final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
 
             final int nPossibleGenotypes = 3 * 3;
-            final Object allGenotypes[] = new Object[nPossibleGenotypes];
+            final Object[] allGenotypes = new Object[nPossibleGenotypes];
 
-            for ( final GenotypeBuilder gb : gbs ) {
+            for (final GenotypeBuilder gb : gbs) {
                 final int a1 = decoder.decodeInt(type);
                 final int a2 = decoder.decodeInt(type);
 
-                if ( a1 == type.getMissingBytes() ) {
-                    assert a2 == type.getMissingBytes();
+                final boolean phased;
+                if (a1 == decoder.getPaddingValue(type)) {
+                    assert a2 == decoder.getPaddingValue(type);
                     // no called sample GT = .
                     gb.alleles(null);
-                } else if ( a2 == type.getMissingBytes() ) {
-                    gb.alleles(Arrays.asList(getAlleleFromEncoded(siteAlleles, a1)));
+                    phased = false;
+                } else if (a2 == decoder.getPaddingValue(type)) {
+                    gb.alleles(Collections.singletonList(getAlleleFromEncoded(siteAlleles, a1)));
+                    phased = (a1 & 0x01) == 1;
                 } else {
                     // downshift to remove phase
                     final int offset = (a1 >> 1) * 3 + (a2 >> 1);
                     assert offset < allGenotypes.length;
 
                     // TODO -- how can I get rid of this cast?
-                    List<Allele> gt = (List<Allele>)allGenotypes[offset];
-                    if ( gt == null ) {
+                    List<Allele> gt = (List<Allele>) allGenotypes[offset];
+                    if (gt == null) {
                         final Allele allele1 = getAlleleFromEncoded(siteAlleles, a1);
                         final Allele allele2 = getAlleleFromEncoded(siteAlleles, a2);
                         gt = Arrays.asList(allele1, allele2);
@@ -162,116 +165,120 @@ private final void fastBiallelicDiploidDecode(final List<Allele> siteAlleles,
                     }
 
                     gb.alleles(gt);
+                    phased = (a2 & 0x01) == 1;
                 }
 
-                final boolean phased = (a2 & 0x01) == 1;
                 gb.phased(phased);
             }
         }
 
-        private final void generalDecode(final List<Allele> siteAlleles,
-                                         final int ploidy,
-                                         final BCF2Decoder decoder,
-                                         final byte typeDescriptor,
-                                         final GenotypeBuilder[] gbs) throws IOException {
+        private void generalDecode(final List<Allele> siteAlleles,
+                                   final int ploidy,
+                                   final BCF2Decoder decoder,
+                                   final byte typeDescriptor,
+                                   final GenotypeBuilder[] gbs) throws IOException {
             final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
 
             // a single cache for the encoded genotypes, since we don't actually need this vector
             final int[] tmp = new int[ploidy];
 
-            for ( final GenotypeBuilder gb : gbs ) {
+            for (final GenotypeBuilder gb : gbs) {
                 final int[] encoded = decoder.decodeIntArray(ploidy, type, tmp);
-                if ( encoded == null )
+                if (encoded == null)
                     // no called sample GT = .
                     gb.alleles(null);
                 else {
                     assert encoded.length > 0;
 
                     // we have at least some alleles to decode
-                    final List<Allele> gt = new ArrayList<Allele>(encoded.length);
+                    final List<Allele> gt = new ArrayList<>(encoded.length);
 
                     // note that the auto-pruning of fields magically handles different
                     // ploidy per sample at a site
-                    for ( final int encode : encoded )
+                    for (final int encode : encoded)
                         gt.add(getAlleleFromEncoded(siteAlleles, encode));
 
                     gb.alleles(gt);
+                    // TODO htsjdk's Genotype class cannot properly encode phasing for ploidy > 2
+                    //  See https://github.com/samtools/htsjdk/issues/1044
                     final boolean phased = ((encoded.length > 1 ? encoded[1] : encoded[0]) & 0x01) == 1;
                     gb.phased(phased);
                 }
             }
         }
 
-        private final Allele getAlleleFromEncoded(final List<Allele> siteAlleles, final int encode) {
-            final int offset = encode >> 1;
+        private Allele getAlleleFromEncoded(final List<Allele> siteAlleles, final int encode) {
+            final int offset = encode >>> 1;
             return offset == 0 ? Allele.NO_CALL : siteAlleles.get(offset - 1);
         }
     }
 
-    private class DPDecoder implements Decoder {
+    private static class DPDecoder implements Decoder {
         @Override
         public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
-            for ( final GenotypeBuilder gb : gbs ) {
+            for (final GenotypeBuilder gb : gbs) {
                 // the -1 is for missing
                 gb.DP(decoder.decodeInt(typeDescriptor, -1));
             }
         }
     }
 
-    private class GQDecoder implements Decoder {
+    private static class GQDecoder implements Decoder {
         @Override
         public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
-            for ( final GenotypeBuilder gb : gbs ) {
+            for (final GenotypeBuilder gb : gbs) {
                 // the -1 is for missing
                 gb.GQ(decoder.decodeInt(typeDescriptor, -1));
             }
         }
     }
 
-    private class ADDecoder implements Decoder {
+    private static class ADDecoder implements Decoder {
         @Override
         public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
-            for ( final GenotypeBuilder gb : gbs ) {
+            for (final GenotypeBuilder gb : gbs) {
                 gb.AD(decoder.decodeIntArray(typeDescriptor, numElements));
             }
         }
     }
 
-    private class PLDecoder implements Decoder {
+    private static class PLDecoder implements Decoder {
         @Override
         public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
-            for ( final GenotypeBuilder gb : gbs ) {
+            for (final GenotypeBuilder gb : gbs) {
                 gb.PL(decoder.decodeIntArray(typeDescriptor, numElements));
             }
         }
     }
 
-    private class GenericDecoder implements Decoder {
+    private static class GenericDecoder implements Decoder {
         @Override
         public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
-            for ( final GenotypeBuilder gb : gbs ) {
-                Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
-                if ( value != null ) { // don't add missing values
-                    if ( value instanceof List && ((List)value).size() == 1) {
-                        // todo -- I really hate this, and it suggests that the code isn't completely right
-                        // the reason it's here is that it's possible to prune down a vector to a singleton
-                        // value and there we have the contract that the value comes back as an atomic value
-                        // not a vector of size 1
-                        value = ((List)value).get(0);
-                    }
+            for (final GenotypeBuilder gb : gbs) {
+                final Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
+                if (value == null) continue;
+                if (value instanceof List && ((List) value).size() == 1) {
+                    // TODO not sure what this refers to, htsjdk itself doesn't make any assumptions about
+                    //  the concrete type of the data contained in the attributes map.
+                    //  Maybe there are upstream consumers who have this contract.
+
+                    // todo -- I really hate this, and it suggests that the code isn't completely right
+                    // the reason it's here is that it's possible to prune down a vector to a singleton
+                    // value and there we have the contract that the value comes back as an atomic value
+                    // not a vector of size 1
+                    gb.attribute(field, ((List) value).get(0));
+                } else {
                     gb.attribute(field, value);
                 }
             }
         }
     }
 
-    private class FTDecoder implements Decoder {
+    private static class FTDecoder implements Decoder {
         @Override
         public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
-            for ( final GenotypeBuilder gb : gbs ) {
-                Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
-                assert value == null || value instanceof String;
-                gb.filter((String)value);
+            for (final GenotypeBuilder gb : gbs) {
+                gb.filters(decoder.decodeExplodedStrings(numElements, ';'));
             }
         }
     }
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2LazyGenotypesDecoder.java b/src/main/java/htsjdk/variant/bcf2/BCF2LazyGenotypesDecoder.java
index aadea53dfb..a23c74c091 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2LazyGenotypesDecoder.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2LazyGenotypesDecoder.java
@@ -65,7 +65,8 @@ public LazyGenotypesContext.LazyData parse(final Object data) {
         try {
 
             // load our byte[] data into the decoder
-            final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes);
+            final BCF2Decoder decoder = BCF2Decoder.getDecoder(codec.getBCFVersion());
+            decoder.setRecordBytes(((BCF2Codec.LazyData)data).bytes);
 
             for ( int i = 0; i < nSamples; i++ )
                 builders[i].reset(true);
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Type.java b/src/main/java/htsjdk/variant/bcf2/BCF2Type.java
index 11c8edf6c5..89610c7569 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Type.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Type.java
@@ -1,27 +1,27 @@
 /*
-* Copyright (c) 2012 The Broad Institute
-* 
-* Permission is hereby granted, free of charge, to any person
-* obtaining a copy of this software and associated documentation
-* files (the "Software"), to deal in the Software without
-* restriction, including without limitation the rights to use,
-* copy, modify, merge, publish, distribute, sublicense, and/or sell
-* copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following
-* conditions:
-* 
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-* 
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
+ * Copyright (c) 2012 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
 
 package htsjdk.variant.bcf2;
 
@@ -39,62 +39,73 @@
 public enum BCF2Type {
     // the actual values themselves
     MISSING(0, 0, 0x00) {
-        @Override public int read(final InputStream in) throws IOException {
+        @Override
+        public int read(final InputStream in) throws IOException {
             throw new IllegalArgumentException("Cannot read MISSING type");
         }
-        @Override public void write(final int value, final OutputStream out) throws IOException {
+
+        @Override
+        public void write(final int value, final OutputStream out) throws IOException {
             throw new IllegalArgumentException("Cannot write MISSING type");
         }
     },
 
-    INT8 (1, 1, 0xFFFFFF80,        -127,        127) {
+    INT8(1, 1, 0xFFFFFF80, 0xFFFFFF81, -120, 127) {
         @Override
         public int read(final InputStream in) throws IOException {
-            return BCF2Utils.readByte(in);
+            // This cast to byte then implicit cast back to int is needed so that negative
+            // integers are sign extended to their proper 32 bit representation.
+            // The integer read from the stream before truncating to byte is an 32-bit integer
+            // with the 3 high bytes 0, and the widening conversion performs sign extension,
+            // the same applies for the read method of INT16.
+            return (byte) in.read();
         }
 
         @Override
         public void write(final int value, final OutputStream out) throws IOException {
-            out.write(0xFF & value);   // TODO -- do we need this operation?
+            // Do not need to mask off higher bytes because Java's OutputStream contract is to
+            // only write the bottom byte of the passed in int, the same applies to the write
+            // methods of the larger int sizes below.
+            out.write(value);
         }
     },
 
-    INT16(2, 2, 0xFFFF8000,      -32767,      32767) {
+    INT16(2, 2, 0xFFFF8000, 0xFFFF8001, -32760, 32767) {
         @Override
         public int read(final InputStream in) throws IOException {
-            final int b2 = BCF2Utils.readByte(in) & 0xFF;
-            final int b1 = BCF2Utils.readByte(in) & 0xFF;
-            return (short)((b1 << 8) | b2);
+            final int b2 = in.read();
+            final int b1 = in.read();
+            return (short) ((b1 << 8) | b2);
         }
 
         @Override
         public void write(final int value, final OutputStream out) throws IOException {
             // TODO -- optimization -- should we put this in a local buffer?
-            out.write((0x00FF & value));
-            out.write((0xFF00 & value) >> 8);
+            out.write(value);
+            out.write(value >> 8);
         }
     },
 
-    INT32(3, 4, 0x80000000, -2147483647, 2147483647) {
+    INT32(3, 4, 0x80000000, 0x80000001, -2147483640, 2147483647) {
         @Override
         public int read(final InputStream in) throws IOException {
-            final int b4 = BCF2Utils.readByte(in) & 0xFF;
-            final int b3 = BCF2Utils.readByte(in) & 0xFF;
-            final int b2 = BCF2Utils.readByte(in) & 0xFF;
-            final int b1 = BCF2Utils.readByte(in) & 0xFF;
-            return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4);
+            final int b4 = in.read();
+            final int b3 = in.read();
+            final int b2 = in.read();
+            final int b1 = in.read();
+            return b1 << 24 | b2 << 16 | b3 << 8 | b4;
         }
 
         @Override
         public void write(final int value, final OutputStream out) throws IOException {
-            out.write((0x000000FF & value));
-            out.write((0x0000FF00 & value) >> 8);
-            out.write((0x00FF0000 & value) >> 16);
-            out.write((0xFF000000 & value) >> 24);
+            out.write(value);
+            out.write(value >> 8);
+            out.write(value >> 16);
+            out.write(value >> 24);
         }
     },
 
-    FLOAT(5, 4, 0x7F800001) {
+    FLOAT(5, 4, 0x7F800001, 0x7F800002, 0, 0) {
         @Override
         public int read(final InputStream in) throws IOException {
             return INT32.read(in);
@@ -106,7 +117,10 @@ public void write(final int value, final OutputStream out) throws IOException {
         }
     },
 
-    CHAR (7, 1, 0x00000000) {
+    // CHAR isn't given a MISSING or EOV value in the spec, but for the purposes of
+    // padding strings (i.e. variable length vectors of chars), it is treated as if
+    // '\0' or NULL is both the MISSING and EOV value of CHAR
+    CHAR(7, 1, 0x00000000) {
         @Override
         public int read(final InputStream in) throws IOException {
             return INT8.read(in);
@@ -120,25 +134,40 @@ public void write(final int value, final OutputStream out) throws IOException {
 
     private final int id;
     private final Object missingJavaValue;
+
+    /*
+    Note that the values for these fields for INT8 and IN16 differ from those given in the spec
+    The values given here are as if they have been sign-extended to 32 bits from their native
+    integer width (meaning they have all bits above that width set, as the missing and EOV
+    values all have their highest bit set in their native width)
+
+    This is so that they compare equal to the values returned by the various
+    integer types' read methods, which must also sign-extend their return values so
+    we can return a uniformly sized 32-bit int
+     */
     private final int missingBytes;
+    private final int EOVBytes;
     private final int sizeInBytes;
+
     private final long minValue, maxValue;
 
     BCF2Type(final int id, final int sizeInBytes, final int missingBytes) {
-        this(id, sizeInBytes, missingBytes, 0, 0);
+        this(id, sizeInBytes, missingBytes, 0, 0, 0);
     }
 
-    BCF2Type(final int id, final int sizeInBytes, final int missingBytes, final long minValue, final long maxValue) {
+    BCF2Type(final int id, final int sizeInBytes, final int missingBytes, final int EOVBytes, final long minValue, final long maxValue) {
         this.id = id;
         this.sizeInBytes = sizeInBytes;
         this.missingJavaValue = null;
         this.missingBytes = missingBytes;
+        this.EOVBytes = EOVBytes;
         this.minValue = minValue;
         this.maxValue = maxValue;
     }
 
     /**
      * How many bytes are used to represent this type on disk?
+     *
      * @return
      */
     public int getSizeInBytes() {
@@ -147,19 +176,24 @@ public int getSizeInBytes() {
 
     /**
      * The ID according to the BCF2 specification
+     *
      * @return
      */
-    public int getID() { return id; }
+    public int getID() {
+        return id;
+    }
 
     /**
      * Can we encode value v in this type, according to its declared range.
-     *
+     * <p>
      * Only makes sense for integer values
      *
      * @param v
      * @return
      */
-    public final boolean withinRange(final long v) { return v >= minValue && v <= maxValue; }
+    public final boolean withinRange(final long v) {
+        return v <= maxValue && v >= minValue;
+    }
 
     /**
      * Return the java object (aka null) that is used to represent a missing value for this
@@ -167,7 +201,9 @@ public int getSizeInBytes() {
      *
      * @return
      */
-    public Object getMissingJavaValue() { return missingJavaValue; }
+    public Object getMissingJavaValue() {
+        return missingJavaValue;
+    }
 
     /**
      * The bytes (encoded as an int) that are used to represent a missing value
@@ -175,7 +211,19 @@ public int getSizeInBytes() {
      *
      * @return
      */
-    public int getMissingBytes() { return missingBytes; }
+    public int getMissingBytes() {
+        return missingBytes;
+    }
+
+    /**
+     * The bytes (encoded as an int) that are used to represent an end of vector value
+     * for this type in BCF2
+     *
+     * @return
+     */
+    public int getEOVBytes() {
+        return EOVBytes;
+    }
 
     /**
      * An enum set of the types that might represent Integer values
@@ -195,7 +243,7 @@ public boolean isIntegerType() {
 
     /**
      * Read a value from in stream of this BCF2 type as an int [32 bit] collection of bits
-     *
+     * <p>
      * For intX and char values this is just the int / byte value of the underlying data represented as a 32 bit int
      * For a char the result must be converted to a char by (char)(byte)(0x0F &amp; value)
      * For doubles it's necessary to convert subsequently this value to a double via Double.bitsToDouble()
@@ -211,4 +259,16 @@ public int read(final InputStream in) throws IOException {
     public void write(final int value, final OutputStream out) throws IOException {
         throw new IllegalArgumentException("Not implemented");
     }
+
+    private enum Special {
+        MISSING,
+        EOV,
+    }
+
+    /**
+     * @return a unique End Of Vector object used by the low level decoder
+     */
+    public static Object EOVValue() {
+        return Special.EOV;
+    }
 }
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Utils.java b/src/main/java/htsjdk/variant/bcf2/BCF2Utils.java
index 545ede7497..f64f49a9b6 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Utils.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Utils.java
@@ -27,36 +27,27 @@
 
 import htsjdk.samtools.util.FileExtensions;
 import htsjdk.tribble.TribbleException;
-import htsjdk.variant.vcf.VCFConstants;
 import htsjdk.variant.vcf.VCFHeader;
 import htsjdk.variant.vcf.VCFHeaderLine;
 import htsjdk.variant.vcf.VCFIDHeaderLine;
 import htsjdk.variant.vcf.VCFSimpleHeaderLine;
 
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
-import java.io.InputStream;
-import java.lang.reflect.Array;
-import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
-import java.util.Set;
 
 /**
  * Common utilities for working with BCF2 files
- *
+ * <p>
  * Includes convenience methods for encoding, decoding BCF2 type descriptors (size + type)
  *
  * @author depristo
  * @since 5/12
  */
 public final class BCF2Utils {
-    public static final int MAX_ALLELES_IN_GENOTYPES = 127;
 
     public static final int OVERFLOW_ELEMENT_MARKER = 15;
     public static final int MAX_INLINE_ELEMENTS = 14;
@@ -66,55 +57,16 @@ public final class BCF2Utils {
 
     static {
         int maxID = -1;
-        for ( BCF2Type v : BCF2Type.values() ) maxID = Math.max(v.getID(), maxID);
-        ID_TO_ENUM = new BCF2Type[maxID+1];
-        for ( BCF2Type v : BCF2Type.values() ) ID_TO_ENUM[v.getID()] = v;
+        for (final BCF2Type v : BCF2Type.values()) maxID = Math.max(v.getID(), maxID);
+        ID_TO_ENUM = new BCF2Type[maxID + 1];
+        for (final BCF2Type v : BCF2Type.values()) ID_TO_ENUM[v.getID()] = v;
     }
 
-    private BCF2Utils() {}
-
-    /**
-     * Create a strings dictionary from the VCF header
-     *
-     * The dictionary is an ordered list of common VCF identifers (FILTER, INFO, and FORMAT)
-     * fields.
-     *
-     * Note that its critical that the list be dedupped and sorted in a consistent manner each time,
-     * as the BCF2 offsets are encoded relative to this dictionary, and if it isn't determined exactly
-     * the same way as in the header each time it's very bad
-     *
-     * @param header the VCFHeader from which to build the dictionary
-     * @return a non-null dictionary of elements, may be empty
-     */
-    public static ArrayList<String> makeDictionary(final VCFHeader header) {
-        final Set<String> seen = new HashSet<String>();
-        final ArrayList<String> dict = new ArrayList<String>();
-
-        // special case the special PASS field which doesn't show up in the FILTER field definitions
-        seen.add(VCFConstants.PASSES_FILTERS_v4);
-        dict.add(VCFConstants.PASSES_FILTERS_v4);
-
-        // set up the strings dictionary
-        for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) {
-            if ( line.shouldBeAddedToDictionary() ) {
-                if (!line.isIDHeaderLine()) {
-                    //is there a better way to ensure that shouldBeAddedToDictionary==true only when isIDHeaderLine==true
-                    throw new TribbleException(String.format(
-                            "The header line %s cannot be added to the BCF dictionary since its not an ID header line",
-                            line));
-                }
-                if ( ! seen.contains(line.getID())) {
-                    dict.add(line.getID());
-                    seen.add(line.getID());
-                }
-            }
-        }
-
-        return dict;
+    private BCF2Utils() {
     }
 
-    public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
-        return (byte)((0x0F & nElements) << 4 | (type.getID() & 0x0F));
+    public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type) {
+        return (byte) ((0x0F & nElements) << 4 | (type.getID() & 0x0F));
     }
 
     public static int decodeSize(final byte typeDescriptor) {
@@ -133,58 +85,12 @@ public static boolean sizeIsOverflow(final byte typeDescriptor) {
         return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
     }
 
-    public static byte readByte(final InputStream stream) throws IOException {
-        return (byte)(stream.read() & 0xFF);
-    }
-
-    /**
-     * Collapse multiple strings into a comma separated list
-     *
-     * ["s1", "s2", "s3"] =&gt; ",s1,s2,s3"
-     *
-     * @param strings size &gt; 1 list of strings
-     * @return
-     */
-    public static String collapseStringList(final List<String> strings) {
-        if ( strings.isEmpty() ) return "";
-        else if ( strings.size() == 1 ) return strings.get(0);
-        else {
-            final StringBuilder b = new StringBuilder();
-            for ( final String s : strings ) {
-                if ( s != null ) {
-                    assert s.indexOf(",") == -1; // no commas in individual strings
-                    b.append(',').append(s);
-                }
-            }
-            return b.toString();
-        }
-    }
-
-    /**
-     * Inverse operation of collapseStringList.
-     *
-     * ",s1,s2,s3" =&gt; ["s1", "s2", "s3"]
-     *
-     *
-     * @param collapsed
-     * @return
-     */
-    public static List<String> explodeStringList(final String collapsed) {
-        assert isCollapsedString(collapsed);
-        final String[] exploded = collapsed.substring(1).split(",");
-        return Arrays.asList(exploded);
-    }
-
-    public static boolean isCollapsedString(final String s) {
-        return !s.isEmpty() && s.charAt(0) == ',';
-    }
-
     /**
      * Returns a good name for a shadow BCF file for vcfFile.
-     *
+     * <p>
      * foo.vcf =&gt; foo.bcf
      * foo.xxx =&gt; foo.xxx.bcf
-     *
+     * <p>
      * If the resulting BCF file cannot be written, return null.  Happens
      * when vcfFile = /dev/null for example
      *
@@ -193,11 +99,11 @@ public static boolean isCollapsedString(final String s) {
      */
     public static final File shadowBCF(final File vcfFile) {
         final String path = vcfFile.getAbsolutePath();
-        if ( path.contains(FileExtensions.VCF) )
+        if (path.contains(FileExtensions.VCF))
             return new File(path.replace(FileExtensions.VCF, FileExtensions.BCF));
         else {
-            final File bcf = new File( path + FileExtensions.BCF );
-            if ( bcf.canRead() )
+            final File bcf = new File(path + FileExtensions.BCF);
+            if (bcf.canRead())
                 return bcf;
             else {
                 try {
@@ -206,9 +112,7 @@ public static final File shadowBCF(final File vcfFile) {
                     o.close();
                     bcf.delete();
                     return bcf;
-                } catch ( FileNotFoundException e ) {
-                    return null;
-                } catch ( IOException e ) {
+                } catch (final IOException e) {
                     return null;
                 }
             }
@@ -216,8 +120,8 @@ public static final File shadowBCF(final File vcfFile) {
     }
 
     public static BCF2Type determineIntegerType(final int value) {
-        for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
-            if ( potentialType.withinRange(value) )
+        for (final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
+            if (potentialType.withinRange(value))
                 return potentialType;
         }
 
@@ -227,9 +131,9 @@ public static BCF2Type determineIntegerType(final int value) {
     public static BCF2Type determineIntegerType(final int[] values) {
         // find the min and max values in the array
         int max = 0, min = 0;
-        for ( final int v : values ) {
-            if ( v > max ) max = v;
-            if ( v < min ) min = v;
+        for (final int v : values) {
+            if (v > max) max = v;
+            if (v < min) min = v;
         }
 
         final BCF2Type maxType = determineIntegerType(max);
@@ -241,7 +145,7 @@ public static BCF2Type determineIntegerType(final int[] values) {
 
     /**
      * Returns the maximum BCF2 integer size of t1 and t2
-     *
+     * <p>
      * For example, if t1 == INT8 and t2 == INT16 returns INT16
      *
      * @param t1
@@ -249,64 +153,49 @@ public static BCF2Type determineIntegerType(final int[] values) {
      * @return
      */
     public static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
-        switch ( t1 ) {
-            case INT8: return t2;
-            case INT16: return t2 == BCF2Type.INT32 ? t2 : t1;
-            case INT32: return t1;
-            default: throw new TribbleException("BUG: unexpected BCF2Type " + t1);
+        switch (t1) {
+            case INT8:
+                return t2;
+            case INT16:
+                return t2 == BCF2Type.INT32 ? t2 : t1;
+            case INT32:
+                return t1;
+            default:
+                throw new TribbleException("BUG: unexpected BCF2Type " + t1);
         }
     }
 
     public static BCF2Type determineIntegerType(final List<Integer> values) {
         BCF2Type maxType = BCF2Type.INT8;
-        for ( final int value : values ) {
+        for (final Integer value : values) {
+            if (value == null) continue;
             final BCF2Type type1 = determineIntegerType(value);
-            switch ( type1 ) {
-                case INT8: break;
-                case INT16: maxType = BCF2Type.INT16; break;
-                case INT32: return BCF2Type.INT32; // fast path for largest possible value
-                default: throw new TribbleException("Unexpected integer type " + type1 );
+            switch (type1) {
+                case INT8:
+                    break;
+                case INT16:
+                    maxType = BCF2Type.INT16;
+                    break;
+                case INT32:
+                    return BCF2Type.INT32; // fast path for largest possible value
+                default:
+                    throw new TribbleException("Unexpected integer type " + type1);
             }
         }
         return maxType;
     }
 
-    /**
-     * Helper function that takes an object and returns a list representation
-     * of it:
-     *
-     * o == null =&gt; []
-     * o is a list =&gt; o
-     * else =&gt; [o]
-     *
-     * @param c  the class of the object
-     * @param o  the object to convert to a Java List
-     * @return
-     */
-    public static <T> List<T> toList(final Class<T> c, final Object o) {
-        if ( o == null ) return Collections.emptyList();
-        else if ( o instanceof List ) return (List<T>)o;
-        else if ( o.getClass().isArray() ) {
-            final int arraySize = Array.getLength(o);
-            final List<T> list = new ArrayList<T>(arraySize);
-            for (int i=0; i<arraySize; i++)
-                list.add((T)Array.get(o, i));
-            return list;
-        }
-        else return Collections.singletonList((T)o);
-    }
-
     /**
      * Are the elements and their order in the output and input headers consistent so that
      * we can write out the raw genotypes block without decoding and recoding it?
-     *
+     * <p>
      * If the order of INFO, FILTER, or contig elements in the output header is different than
      * in the input header we must decode the blocks using the input header and then recode them
      * based on the new output order.
-     *
+     * <p>
      * If they are consistent, we can simply pass through the raw genotypes block bytes, which is
      * a *huge* performance win for large blocks.
-     *
+     * <p>
      * Many common operations on BCF2 files (merging them for -nt, selecting a subset of records, etc)
      * don't modify the ordering of the header fields and so can safely pass through the genotypes
      * undecoded.  Some operations -- those at add filters or info fields -- can change the ordering
@@ -314,28 +203,25 @@ else if ( o.getClass().isArray() ) {
      */
     public static boolean headerLinesAreOrderedConsistently(final VCFHeader outputHeader, final VCFHeader genotypesBlockHeader) {
         // first, we have to have the same samples in the same order
-        if ( ! nullAsEmpty(outputHeader.getSampleNamesInOrder()).equals(nullAsEmpty(genotypesBlockHeader.getSampleNamesInOrder())) )
+        if (!nullAsEmpty(outputHeader.getSampleNamesInOrder()).equals(nullAsEmpty(genotypesBlockHeader.getSampleNamesInOrder())))
             return false;
 
-        final Iterator<VCFSimpleHeaderLine> outputLinesIt = outputHeader.getIDHeaderLines().iterator();
-        final Iterator<VCFSimpleHeaderLine> inputLinesIt = genotypesBlockHeader.getIDHeaderLines().iterator();
+        final Iterator<? extends VCFHeaderLine> outputLinesIt = outputHeader.getIDHeaderLines().iterator();
 
-        while ( inputLinesIt.hasNext() ) {
-            if ( ! outputLinesIt.hasNext() ) // missing lines in output
+        for (final VCFHeaderLine headerLine : genotypesBlockHeader.getIDHeaderLines()) {
+            if (!outputLinesIt.hasNext()) // missing lines in output
                 return false;
 
-            final VCFSimpleHeaderLine outputLine = outputLinesIt.next();
-            final VCFSimpleHeaderLine inputLine = inputLinesIt.next();
-
-            if ( ! inputLine.getClass().equals(outputLine.getClass()) || ! inputLine.getID().equals(outputLine.getID()) )
+            final VCFHeaderLine outputLine = outputLinesIt.next();
+            if (!headerLine.getClass().equals(outputLine.getClass()) || !headerLine.getID().equals(outputLine.getID()))
                 return false;
         }
 
         return true;
     }
 
-    private static <T> List<T> nullAsEmpty(List<T> l) {
-        if ( l == null )
+    private static <T> List<T> nullAsEmpty(final List<T> l) {
+        if (l == null)
             return Collections.emptyList();
         else
             return l;
diff --git a/src/main/java/htsjdk/variant/bcf2/BCFVersion.java b/src/main/java/htsjdk/variant/bcf2/BCFVersion.java
index b18b83e4aa..7bec9ef192 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCFVersion.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCFVersion.java
@@ -29,6 +29,8 @@
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
 
 /**
  * Simple holder for BCF version information
@@ -44,6 +46,12 @@ public final class BCFVersion {
      */
     public static final byte[] MAGIC_HEADER_START = "BCF".getBytes();
 
+    public static final BCFVersion BCF2_1Version = new BCFVersion(2, 1);
+    public static final BCFVersion BCF2_2Version = new BCFVersion(2, 2);
+
+    public static final Set<BCFVersion> SUPPORTED_VERSIONS = new HashSet<>(Arrays.asList(BCF2_1Version, BCF2_2Version));
+
+
     final int majorVersion;
     final int minorVersion;
 
diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantContext.java b/src/main/java/htsjdk/variant/variantcontext/VariantContext.java
index a63d940670..a2c47f9e7a 100644
--- a/src/main/java/htsjdk/variant/variantcontext/VariantContext.java
+++ b/src/main/java/htsjdk/variant/variantcontext/VariantContext.java
@@ -273,7 +273,6 @@ public class VariantContext implements HtsRecord, Feature, Serializable {
      * Determine which genotype fields are in use in the genotypes in VC
      * @return an ordered list of genotype fields in use in VC.  If vc has genotypes this will always include GT first
      */
-
     public List<String> calcVCFGenotypeKeys(final VCFHeader header) {
         final Set<String> keys = new HashSet<>();
 
@@ -299,21 +298,23 @@ public List<String> calcVCFGenotypeKeys(final VCFHeader header) {
         if ( sawPL ) keys.add(VCFConstants.GENOTYPE_PL_KEY);
         if ( sawGenotypeFilter ) keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
 
-        List<String> sortedList = ParsingUtils.sortList(new ArrayList<>(keys));
-
-        // make sure the GT is first
+        final List<String> list = new ArrayList<>(6 + keys.size());
+        // Make sure the GT is first if present
         if (sawGoodGT) {
-            final List<String> newList = new ArrayList<>(sortedList.size() + 1);
-            newList.add(VCFConstants.GENOTYPE_KEY);
-            newList.addAll(sortedList);
-            sortedList = newList;
+            list.add(VCFConstants.GENOTYPE_KEY);
+            list.addAll(keys);
+            // Sort, skipping GT which will be at the first position of the list
+            Collections.sort(list.subList(1, list.size()));
+        } else {
+            list.addAll(keys);
+            Collections.sort(list);
         }
 
-        if (sortedList.isEmpty() && header.hasGenotypingData()) {
+        if (list.isEmpty() && header.hasGenotypingData()) {
             // this needs to be done in case all samples are no-calls
             return Collections.singletonList(VCFConstants.GENOTYPE_KEY);
         } else {
-            return sortedList;
+            return list;
         }
     }
 
@@ -469,7 +470,9 @@ protected VariantContext(final String source,
         this.stop = stop;
 
         // intern for efficiency.  equals calls will generate NPE if ID is inappropriately passed in as null
-        if ( ID == null || ID.equals("") ) throw new IllegalArgumentException("ID field cannot be the null or the empty string");
+        if ( ID == null || ID.equals("") ) {
+            throw new IllegalArgumentException("ID field cannot be the null or the empty string");
+        }
         this.ID = ID.equals(VCFConstants.EMPTY_ID_FIELD) ? VCFConstants.EMPTY_ID_FIELD : ID;
 
         this.commonInfo = new CommonInfo(source, log10PError, filters, attributes);
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Encoder.java b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Encoder.java
deleted file mode 100644
index 495cd93ec9..0000000000
--- a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Encoder.java
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
-* Copyright (c) 2012 The Broad Institute
-* 
-* Permission is hereby granted, free of charge, to any person
-* obtaining a copy of this software and associated documentation
-* files (the "Software"), to deal in the Software without
-* restriction, including without limitation the rights to use,
-* copy, modify, merge, publish, distribute, sublicense, and/or sell
-* copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following
-* conditions:
-* 
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-* 
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-package htsjdk.variant.variantcontext.writer;
-
-import htsjdk.variant.bcf2.BCF2Type;
-import htsjdk.variant.bcf2.BCF2Utils;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * See #BCFWriter for documentation on this classes role in encoding BCF2 files
- *
- * @author Mark DePristo
- * @since 06/12
- */
-public final class BCF2Encoder {
-    // TODO -- increase default size?
-    public static final int WRITE_BUFFER_INITIAL_SIZE = 16384;
-    private ByteArrayOutputStream encodeStream = new ByteArrayOutputStream(WRITE_BUFFER_INITIAL_SIZE);
-
-    // --------------------------------------------------------------------------------
-    //
-    // Functions to return the data being encoded here
-    //
-    // --------------------------------------------------------------------------------
-
-    public byte[] getRecordBytes() {
-        byte[] bytes = encodeStream.toByteArray();
-        encodeStream.reset();
-        return bytes;
-    }
-
-    // --------------------------------------------------------------------------------
-    //
-    // Writing typed values (have type byte)
-    //
-    // --------------------------------------------------------------------------------
-
-    public final void encodeTypedMissing(final BCF2Type type) throws IOException {
-        encodeType(0, type);
-    }
-
-    public final void encodeTyped(final Object value, final BCF2Type type) throws IOException {
-        if ( value == null )
-            encodeTypedMissing(type);
-        else {
-            switch ( type ) {
-                case INT8:
-                case INT16:
-                case INT32: encodeTypedInt((Integer)value, type); break;
-                case FLOAT: encodeTypedFloat((Double) value); break;
-                case CHAR:  encodeTypedString((String) value); break;
-                default:    throw new IllegalArgumentException("Illegal type encountered " + type);
-            }
-        }
-    }
-
-    public final void encodeTypedInt(final int v) throws IOException {
-        final BCF2Type type = BCF2Utils.determineIntegerType(v);
-        encodeTypedInt(v, type);
-    }
-
-    public final void encodeTypedInt(final int v, final BCF2Type type) throws IOException {
-        encodeType(1, type);
-        encodeRawInt(v, type);
-    }
-
-    public final void encodeTypedString(final String s) throws IOException {
-        encodeTypedString(s.getBytes());
-    }
-
-    public final void encodeTypedString(final byte[] s) throws IOException {
-        if ( s == null )
-            encodeType(0, BCF2Type.CHAR);
-        else {
-            encodeType(s.length, BCF2Type.CHAR);
-            for ( int i = 0; i < s.length; i++ ) {
-                encodeRawChar(s[i]);
-            }
-        }
-    }
-
-    public final void encodeTypedFloat(final double d) throws IOException {
-        encodeType(1, BCF2Type.FLOAT);
-        encodeRawFloat(d);
-    }
-
-    public final void encodeTyped(List<? extends Object> v, final BCF2Type type) throws IOException {
-        if ( type == BCF2Type.CHAR && !v.isEmpty()) {
-            final String s = BCF2Utils.collapseStringList((List<String>) v);
-            v = stringToBytes(s);
-        }
-
-        encodeType(v.size(), type);
-        encodeRawValues(v, type);
-    }
-
-    // --------------------------------------------------------------------------------
-    //
-    // Writing raw values (don't have a type byte)
-    //
-    // --------------------------------------------------------------------------------
-
-    public final <T extends Object> void encodeRawValues(final Collection<T> v, final BCF2Type type) throws IOException {
-        for ( final T v1 : v ) {
-            encodeRawValue(v1, type);
-        }
-    }
-
-    public final <T extends Object> void encodeRawValue(final T value, final BCF2Type type) throws IOException {
-        try {
-            if ( value == type.getMissingJavaValue() )
-                encodeRawMissingValue(type);
-            else {
-                switch (type) {
-                    case INT8:
-                    case INT16:
-                    case INT32: encodeRawBytes((Integer) value, type); break;
-                    case FLOAT: encodeRawFloat((Double) value); break;
-                    case CHAR:  encodeRawChar((Byte) value); break;
-                    default:    throw new IllegalArgumentException("Illegal type encountered " + type);
-                }
-            }
-        } catch ( ClassCastException e ) {
-            throw new ClassCastException("BUG: invalid type cast to " + type + " from " + value);
-        }
-    }
-
-    public final void encodeRawMissingValue(final BCF2Type type) throws IOException {
-        encodeRawBytes(type.getMissingBytes(), type);
-    }
-
-    public final void encodeRawMissingValues(final int size, final BCF2Type type) throws IOException {
-        for ( int i = 0; i < size; i++ )
-            encodeRawMissingValue(type);
-    }
-
-    // --------------------------------------------------------------------------------
-    //
-    // low-level encoders
-    //
-    // --------------------------------------------------------------------------------
-
-    public final void encodeRawChar(final byte c) throws IOException {
-        encodeStream.write(c);
-    }
-
-    public final void encodeRawFloat(final double value) throws IOException {
-        encodeRawBytes(Float.floatToIntBits((float) value), BCF2Type.FLOAT);
-    }
-
-    public final void encodeType(final int size, final BCF2Type type) throws IOException {
-        if ( size <= BCF2Utils.MAX_INLINE_ELEMENTS ) {
-            final int typeByte = BCF2Utils.encodeTypeDescriptor(size, type);
-            encodeStream.write(typeByte);
-        } else {
-            final int typeByte = BCF2Utils.encodeTypeDescriptor(BCF2Utils.OVERFLOW_ELEMENT_MARKER, type);
-            encodeStream.write(typeByte);
-            // write in the overflow size
-            encodeTypedInt(size);
-        }
-    }
-
-    public final void encodeRawInt(final int value, final BCF2Type type) throws IOException {
-        type.write(value, encodeStream);
-    }
-
-    public final void encodeRawBytes(final int value, final BCF2Type type) throws IOException {
-        type.write(value, encodeStream);
-    }
-
-    // --------------------------------------------------------------------------------
-    //
-    // utility functions
-    //
-    // --------------------------------------------------------------------------------
-
-    public void encodeRawString(final String s, final int sizeToWrite) throws IOException {
-        final byte[] bytes = s.getBytes();
-        for ( int i = 0; i < sizeToWrite; i++ )
-            if ( i < bytes.length )
-                encodeRawChar(bytes[i]);
-            else
-                encodeRawMissingValue(BCF2Type.CHAR);
-    }
-
-    /**
-     * Totally generic encoder that examines o, determines the best way to encode it, and encodes it
-     *
-     * This method is incredibly slow, but it's only used for UnitTests so it doesn't matter
-     *
-     * @param o
-     * @return
-     */
-    public final BCF2Type encode(final Object o) throws IOException {
-        if ( o == null ) throw new IllegalArgumentException("Generic encode cannot deal with null values");
-
-        if ( o instanceof List ) {
-            final BCF2Type type = determineBCFType(((List) o).get(0));
-            encodeTyped((List) o, type);
-            return type;
-        } else {
-            final BCF2Type type = determineBCFType(o);
-            encodeTyped(o, type);
-            return type;
-        }
-    }
-
-    private final BCF2Type determineBCFType(final Object arg) {
-        final Object toType = arg instanceof List ? ((List)arg).get(0) : arg;
-
-        if ( toType instanceof Integer )
-            return BCF2Utils.determineIntegerType((Integer) toType);
-        else if ( toType instanceof String )
-            return BCF2Type.CHAR;
-        else if ( toType instanceof Double )
-            return BCF2Type.FLOAT;
-        else
-            throw new IllegalArgumentException("No native encoding for Object of type " + arg.getClass().getSimpleName());
-    }
-
-    private final List<Byte> stringToBytes(final String v) throws IOException {
-        if ( v == null || v.equals("") )
-            return Collections.emptyList();
-        else {
-            // TODO -- this needs to be optimized away for efficiency
-            final byte[] bytes = v.getBytes();
-            final List<Byte> l = new ArrayList<Byte>(bytes.length);
-            for ( int i = 0; i < bytes.length; i++) l.add(bytes[i]);
-            return l;
-        }
-    }
-}
\ No newline at end of file
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldEncoder.java b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldEncoder.java
deleted file mode 100644
index 7d1f0de43d..0000000000
--- a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldEncoder.java
+++ /dev/null
@@ -1,455 +0,0 @@
-/*
-* Copyright (c) 2012 The Broad Institute
-* 
-* Permission is hereby granted, free of charge, to any person
-* obtaining a copy of this software and associated documentation
-* files (the "Software"), to deal in the Software without
-* restriction, including without limitation the rights to use,
-* copy, modify, merge, publish, distribute, sublicense, and/or sell
-* copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following
-* conditions:
-* 
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-* 
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-package htsjdk.variant.variantcontext.writer;
-
-import htsjdk.variant.bcf2.BCF2Type;
-import htsjdk.variant.bcf2.BCF2Utils;
-import htsjdk.variant.variantcontext.VariantContext;
-import htsjdk.variant.vcf.VCFCompoundHeaderLine;
-import htsjdk.variant.vcf.VCFHeaderLineCount;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-
-/**
- * See #BCFWriter for documentation on this classes role in encoding BCF2 files
- *
- * @author Mark DePristo
- * @since 06/12
- */
-public abstract class BCF2FieldEncoder {
-    /**
-     * The header line describing the field we will encode values of
-     */
-    final VCFCompoundHeaderLine headerLine;
-
-    /**
-     * The BCF2 type we'll use to encoder this field, if it can be determined statically.
-     * If not, this variable must be null
-     */
-    final BCF2Type staticType;
-
-    /**
-     * The integer offset into the strings map of the BCF2 file corresponding to this
-     * field.
-     */
-    final int dictionaryOffset;
-
-    /**
-     * The integer type we use to encode our dictionary offset in the BCF2 file
-     */
-    final BCF2Type dictionaryOffsetType;
-
-    // ----------------------------------------------------------------------
-    //
-    // Constructor
-    //
-    // ----------------------------------------------------------------------
-
-    private BCF2FieldEncoder(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict, final BCF2Type staticType) {
-        this.headerLine = headerLine;
-        this.staticType = staticType;
-
-        final Integer offset = dict.get(getField());
-        if ( offset == null ) throw new IllegalStateException("Format error: could not find string " + getField() + " in header as required by BCF");
-        this.dictionaryOffset = offset;
-        dictionaryOffsetType = BCF2Utils.determineIntegerType(offset);
-    }
-
-    // ----------------------------------------------------------------------
-    //
-    // Basic accessors
-    //
-    // ----------------------------------------------------------------------
-
-    public final String getField() { return headerLine.getID(); }
-
-    /**
-     * Write the field key (dictionary offset and type) into the BCF2Encoder stream
-     *
-     * @param encoder where we write our dictionary offset
-     * @throws IOException
-     */
-    public final void writeFieldKey(final BCF2Encoder encoder) throws IOException {
-        encoder.encodeTypedInt(dictionaryOffset, dictionaryOffsetType);
-    }
-
-    @Override
-    public String toString() {
-        return "BCF2FieldEncoder for " + getField() + " with count " + getCountType() + " encoded with " + getClass().getSimpleName();
-    }
-
-    // ----------------------------------------------------------------------
-    //
-    // methods to determine the number of encoded elements
-    //
-    // ----------------------------------------------------------------------
-
-    protected final VCFHeaderLineCount getCountType() {
-        return headerLine.getCountType();
-    }
-
-    /**
-     * @return True if this field has a constant, fixed number of elements (such as 1 for an atomic integer)
-     */
-    public boolean hasConstantNumElements() {
-        return getCountType() == VCFHeaderLineCount.INTEGER;
-    }
-
-    /**
-     * @return True if the only way to determine how many elements this field contains is by
-     * inspecting the actual value directly, such as when the number of elements
-     * is a variable length list per site or per genotype.
-     */
-    public boolean hasValueDeterminedNumElements() {
-        return getCountType() == VCFHeaderLineCount.UNBOUNDED;
-    }
-
-    /**
-     * @return True if this field has a non-fixed number of elements that depends only on the properties
-     * of the current VariantContext, such as one value per Allele or per genotype configuration.
-     */
-    public boolean hasContextDeterminedNumElements() {
-        return ! hasConstantNumElements() && ! hasValueDeterminedNumElements();
-    }
-
-    /**
-     * @return the number of elements, assuming this field has a constant number of elements.
-     */
-    public int numElements() {
-        return headerLine.getCount();
-    }
-
-    /**
-     * @return the number of elements by looking at the actual value provided
-     */
-    public int numElements(final Object value) {
-        return numElementsFromValue(value);
-    }
-
-    /**
-     * @return the number of elements, assuming this field has context-determined number of elements.
-     */
-    public int numElements(final VariantContext vc) {
-        return headerLine.getCount(vc);
-    }
-
-    /**
-     * A convenience access for the number of elements.
-     * @param vc
-     * @param value
-     * @return the number of encoded elements, either from the fixed number
-     * it has, from the VC, or from the value itself.
-     */
-    public final int numElements(final VariantContext vc, final Object value) {
-        if ( hasConstantNumElements() ) return numElements();
-        else if ( hasContextDeterminedNumElements() ) return numElements(vc);
-        else return numElements(value);
-    }
-
-    /**
-     * Given a value, return the number of elements we will encode for it.
-     *
-     * Assumes the value is encoded as a List
-     *
-     * @param value
-     * @return the number of elements we will encode for {@param value}.
-     */
-    protected int numElementsFromValue(final Object value) {
-        if ( value == null ) return 0;
-        else if ( value instanceof List ) return ((List) value).size();
-        else return 1;
-    }
-
-    // ----------------------------------------------------------------------
-    //
-    // methods to determine the BCF2 type of the encoded values
-    //
-    // ----------------------------------------------------------------------
-
-    /**
-     * Is the BCF2 type of this field static, or does it have to be determine from
-     * the actual field value itself?
-     * @return true if the field is static
-     */
-    public final boolean isStaticallyTyped() { return ! isDynamicallyTyped(); }
-
-    /**
-     * Is the BCF2 type of this field static, or does it have to be determine from
-     * the actual field value itself?
-     * @return true if the field is not static
-     */
-    public final boolean isDynamicallyTyped() { return staticType == null; }
-
-    /**
-     * Get the BCF2 type for this field, either from the static type of the
-     * field itself or by inspecting the value itself.
-     *
-     * @return the BCF2 type for this field
-     */
-    public final BCF2Type getType(final Object value) {
-        return isDynamicallyTyped() ? getDynamicType(value) : getStaticType();
-    }
-
-    public final BCF2Type getStaticType() {
-        return staticType;
-    }
-
-    public BCF2Type getDynamicType(final Object value) {
-        throw new IllegalStateException("BUG: cannot get dynamic type for statically typed BCF2 field " + getField());
-    }
-
-    // ----------------------------------------------------------------------
-    //
-    // methods to encode values, including the key abstract method
-    //
-    // ----------------------------------------------------------------------
-
-    /**
-     * Key abstract method that should encode a value of the given type into the encoder.
-     *
-     * Value will be of a type appropriate to the underlying encoder.  If the genotype field is represented as
-     * an int[], this will be value, and the encoder needs to handle encoding all of the values in the int[].
-     *
-     * The argument should be used, not the getType() method in the superclass as an outer loop might have
-     * decided a more general type (int16) to use, even through this encoder could have been done with int8.
-     *
-     * If minValues &gt; 0, then encodeValue must write in at least minValues items from value.  If value is atomic,
-     * this means that minValues - 1 MISSING values should be added to the encoder.  If minValues is a collection
-     * type (int[]) then minValues - values.length should be added.  This argument is intended to handle padding
-     * of values in genotype fields.
-     *
-     * @param encoder
-     * @param value
-     * @param type
-     * @param minValues
-     * @throws IOException
-     */
-    public abstract void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException;
-
-    // ----------------------------------------------------------------------
-    //
-    // Subclass to encode Strings
-    //
-    // ----------------------------------------------------------------------
-
-    public static class StringOrCharacter extends BCF2FieldEncoder {
-        public StringOrCharacter(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
-            super(headerLine, dict, BCF2Type.CHAR);
-        }
-
-        @Override
-        public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
-            final String s = javaStringToBCF2String(value);
-            encoder.encodeRawString(s, Math.max(s.length(), minValues));
-        }
-
-        //
-        // Regardless of what the header says, BCF2 strings and characters are always encoded
-        // as arrays of CHAR type, which has a variable number of elements depending on the
-        // exact string being encoded
-        //
-        @Override public boolean hasConstantNumElements()          { return false; }
-        @Override public boolean hasContextDeterminedNumElements() { return false; }
-        @Override public boolean hasValueDeterminedNumElements()   { return true; }
-        @Override protected int numElementsFromValue(final Object value) {
-            return value == null ? 0 : javaStringToBCF2String(value).length();
-        }
-
-        /**
-         * Recode the incoming object to a String, compacting it into a
-         * BCF2 string if the value is a list.
-         *
-         * @param value a String or List<String> to encode, or null
-         * @return a non-null string to encode
-         */
-        private String javaStringToBCF2String(final Object value) {
-            if ( value == null )
-                return "";
-            else if (value instanceof List) {
-                final List<String> l = (List<String>)value;
-                return BCF2Utils.collapseStringList(l);
-            } else if ( value.getClass().isArray() ) {
-                final List<String> l = new ArrayList<String>();
-                Collections.addAll(l, (String[])value);
-                return BCF2Utils.collapseStringList(l);
-            } else
-                return (String)value;
-        }
-    }
-
-    // ----------------------------------------------------------------------
-    //
-    // Subclass to encode FLAG
-    //
-    // ----------------------------------------------------------------------
-
-    public static class Flag extends BCF2FieldEncoder {
-        public Flag(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
-            super(headerLine, dict, BCF2Type.INT8);
-            if ( ! headerLine.isFixedCount() || headerLine.getCount() != 0 )
-                throw new IllegalStateException("Flag encoder only supports atomic flags for field " + getField());
-        }
-
-        @Override
-        public int numElements() {
-            return 1; // the header says 0 but we will write 1 value
-        }
-
-        @Override
-        public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
-            encoder.encodeRawBytes(1, getStaticType());
-        }
-    }
-
-    // ----------------------------------------------------------------------
-    //
-    // Subclass to encode FLOAT
-    //
-    // ----------------------------------------------------------------------
-
-    public static class Float extends BCF2FieldEncoder {
-        final boolean isAtomic;
-
-        public Float(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
-            super(headerLine, dict, BCF2Type.FLOAT);
-            isAtomic = hasConstantNumElements() && numElements() == 1;
-        }
-
-        @Override
-        public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
-            int count = 0;
-            // TODO -- can be restructured to avoid toList operation
-            if ( isAtomic ) {
-                // fast path for fields with 1 fixed float value
-                if ( value != null ) {
-                    encoder.encodeRawFloat((Double)value);
-                    count++;
-                }
-            } else {
-                // handle generic case
-                final List<Double> doubles = BCF2Utils.toList(Double.class, value);
-                for ( final Double d : doubles ) {
-                    if ( d != null ) { // necessary because .,. => [null, null] in VC
-                        encoder.encodeRawFloat(d);
-                        count++;
-                    }
-                }
-            }
-            for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type);
-        }
-    }
-
-    // ----------------------------------------------------------------------
-    //
-    // Subclass to encode int[]
-    //
-    // ----------------------------------------------------------------------
-
-    public static class IntArray extends BCF2FieldEncoder {
-        public IntArray(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
-            super(headerLine, dict, null);
-        }
-
-        @Override
-        protected int numElementsFromValue(final Object value) {
-            return value == null ? 0 : ((int[])value).length;
-        }
-
-        @Override
-        public BCF2Type getDynamicType(final Object value) {
-            return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType((int[])value);
-        }
-
-        @Override
-        public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
-            int count = 0;
-            if ( value != null ) {
-                for ( final int i : (int[])value ) {
-                    encoder.encodeRawInt(i, type);
-                    count++;
-                }
-            }
-            for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type);
-        }
-    }
-
-    // ----------------------------------------------------------------------
-    //
-    // Subclass to encode List<Integer>
-    //
-    // ----------------------------------------------------------------------
-
-    /**
-     * Specialized int encoder for atomic (non-list) integers
-     */
-    public static class AtomicInt extends BCF2FieldEncoder {
-        public AtomicInt(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
-            super(headerLine, dict, null);
-        }
-
-        @Override
-        public BCF2Type getDynamicType(final Object value) {
-            return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType((Integer)value);
-        }
-
-        @Override
-        public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
-            int count = 0;
-            if ( value != null ) {
-                encoder.encodeRawInt((Integer)value, type);
-                count++;
-            }
-            for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type);
-        }
-    }
-
-    public static class GenericInts extends BCF2FieldEncoder {
-        public GenericInts(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
-            super(headerLine, dict, null);
-        }
-
-        @Override
-        public BCF2Type getDynamicType(final Object value) {
-            return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType(BCF2Utils.toList(Integer.class, value));
-        }
-
-        @Override
-        public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
-            int count = 0;
-            for ( final Integer i : BCF2Utils.toList(Integer.class, value) ) {
-                if ( i != null ) { // necessary because .,. => [null, null] in VC
-                    encoder.encodeRawInt(i, type);
-                    count++;
-                }
-            }
-            for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type);
-        }
-    }
-}
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldWriter.java b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldWriter.java
deleted file mode 100644
index 857cedfe3a..0000000000
--- a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldWriter.java
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
-* Copyright (c) 2012 The Broad Institute
-* 
-* Permission is hereby granted, free of charge, to any person
-* obtaining a copy of this software and associated documentation
-* files (the "Software"), to deal in the Software without
-* restriction, including without limitation the rights to use,
-* copy, modify, merge, publish, distribute, sublicense, and/or sell
-* copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following
-* conditions:
-* 
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-* 
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-package htsjdk.variant.variantcontext.writer;
-
-import htsjdk.variant.bcf2.BCF2Type;
-import htsjdk.variant.bcf2.BCF2Utils;
-import htsjdk.variant.variantcontext.Allele;
-import htsjdk.variant.variantcontext.Genotype;
-import htsjdk.variant.variantcontext.VariantContext;
-import htsjdk.variant.vcf.VCFHeader;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * See #BCFWriter for documentation on this classes role in encoding BCF2 files
- *
- * @author Mark DePristo
- * @since 06/12
- */
-public abstract class BCF2FieldWriter {
-    private final VCFHeader header;
-    private final BCF2FieldEncoder fieldEncoder;
-
-    protected BCF2FieldWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
-        this.header = header;
-        this.fieldEncoder = fieldEncoder;
-    }
-
-    protected VCFHeader getHeader() { return header; }
-    protected BCF2FieldEncoder getFieldEncoder() {
-        return fieldEncoder;
-    }
-    protected String getField() { return getFieldEncoder().getField(); }
-
-    public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
-        fieldEncoder.writeFieldKey(encoder);
-    }
-
-    public void done(final BCF2Encoder encoder, final VariantContext vc) throws IOException { } // TODO -- overload done so that we null out values and test for correctness
-
-    @Override
-    public String toString() {
-        return "BCF2FieldWriter " + getClass().getSimpleName() + " with encoder " + getFieldEncoder();
-    }
-
-    // --------------------------------------------------------------------------------
-    //
-    // Sites writers
-    //
-    // --------------------------------------------------------------------------------
-
-    public static abstract class SiteWriter extends BCF2FieldWriter {
-        protected SiteWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
-            super(header, fieldEncoder);
-        }
-
-        public abstract void site(final BCF2Encoder encoder, final VariantContext vc) throws IOException;
-    }
-
-    public static class GenericSiteWriter extends SiteWriter {
-        public GenericSiteWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
-            super(header, fieldEncoder);
-        }
-
-        @Override
-        public void site(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
-            final Object rawValue = vc.getAttribute(getField(), null);
-            final BCF2Type type = getFieldEncoder().getType(rawValue);
-            if ( rawValue == null ) {
-                // the value is missing, just write in null
-                encoder.encodeType(0, type);
-            } else {
-                final int valueCount = getFieldEncoder().numElements(vc, rawValue);
-                encoder.encodeType(valueCount, type);
-                getFieldEncoder().encodeValue(encoder, rawValue, type, valueCount);
-            }
-        }
-    }
-
-    // --------------------------------------------------------------------------------
-    //
-    // Genotypes writers
-    //
-    // --------------------------------------------------------------------------------
-
-    public static abstract class GenotypesWriter extends BCF2FieldWriter {
-        int nValuesPerGenotype = -1;
-        BCF2Type encodingType = null;
-
-        protected GenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
-            super(header, fieldEncoder);
-
-            if ( fieldEncoder.hasConstantNumElements() ) {
-                nValuesPerGenotype = getFieldEncoder().numElements();
-            }
-        }
-
-        @Override
-        public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
-            // writes the key information
-            super.start(encoder, vc);
-
-            // only update if we need to
-            if ( ! getFieldEncoder().hasConstantNumElements() ) {
-                if ( getFieldEncoder().hasContextDeterminedNumElements() )
-                    // we are cheap -- just depends on genotype of allele counts
-                    nValuesPerGenotype = getFieldEncoder().numElements(vc);
-                else
-                    // we have to go fishing through the values themselves (expensive)
-                    nValuesPerGenotype = computeMaxSizeOfGenotypeFieldFromValues(vc);
-            }
-
-            encoder.encodeType(nValuesPerGenotype, encodingType);
-        }
-
-        public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
-            final Object fieldValue = g.getExtendedAttribute(getField(), null);
-            getFieldEncoder().encodeValue(encoder, fieldValue, encodingType, nValuesPerGenotype);
-        }
-
-        protected int numElements(final VariantContext vc, final Genotype g) {
-            return getFieldEncoder().numElements(vc, g.getExtendedAttribute(getField()));
-        }
-
-        private final int computeMaxSizeOfGenotypeFieldFromValues(final VariantContext vc) {
-            int size = -1;
-
-            for ( final Genotype g : vc.getGenotypes() ) {
-                size = Math.max(size, numElements(vc, g));
-            }
-
-            return size;
-        }
-    }
-
-    public static class StaticallyTypeGenotypesWriter extends GenotypesWriter {
-        public StaticallyTypeGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
-            super(header, fieldEncoder);
-            encodingType = getFieldEncoder().getStaticType();
-        }
-    }
-
-    public static class IntegerTypeGenotypesWriter extends GenotypesWriter {
-        public IntegerTypeGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
-            super(header, fieldEncoder);
-        }
-
-        @Override
-        public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
-            // the only value that is dynamic are integers
-            final List<Integer> values = new ArrayList<Integer>(vc.getNSamples());
-            for ( final Genotype g : vc.getGenotypes() ) {
-                for ( final Integer i : BCF2Utils.toList(Integer.class, g.getExtendedAttribute(getField(), null)) ) {
-                    if ( i != null ) values.add(i);
-                }
-            }
-
-            encodingType = BCF2Utils.determineIntegerType(values);
-            super.start(encoder, vc);
-        }
-    }
-
-    public static class IGFGenotypesWriter extends GenotypesWriter {
-        final IntGenotypeFieldAccessors.Accessor ige;
-
-        public IGFGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder, final IntGenotypeFieldAccessors.Accessor ige) {
-            super(header, fieldEncoder);
-            this.ige = ige;
-
-            if ( ! (fieldEncoder instanceof BCF2FieldEncoder.IntArray) )
-                throw new IllegalArgumentException("BUG: IntGenotypesWriter requires IntArray encoder for field " + getField());
-        }
-
-        @Override
-        public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
-            // TODO
-            // TODO this piece of code consumes like 10% of the runtime alone because fo the vc.getGenotypes() iteration
-            // TODO
-            encodingType = BCF2Type.INT8;
-            for ( final Genotype g : vc.getGenotypes() ) {
-                final int[] pls = ige.getValues(g);
-                final BCF2Type plsType = getFieldEncoder().getType(pls);
-                encodingType = BCF2Utils.maxIntegerType(encodingType, plsType);
-                if ( encodingType == BCF2Type.INT32 )
-                    break; // stop early
-            }
-
-            super.start(encoder, vc);
-        }
-
-        @Override
-        public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
-            getFieldEncoder().encodeValue(encoder, ige.getValues(g), encodingType, nValuesPerGenotype);
-        }
-
-        @Override
-        protected int numElements(final VariantContext vc, final Genotype g) {
-            return ige.getSize(g);
-        }
-    }
-
-    public static class FTGenotypesWriter extends StaticallyTypeGenotypesWriter {
-        public FTGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
-            super(header, fieldEncoder);
-        }
-
-        @Override
-        public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
-            final String fieldValue = g.getFilters();
-            getFieldEncoder().encodeValue(encoder, fieldValue, encodingType, nValuesPerGenotype);
-        }
-
-        @Override
-        protected int numElements(final VariantContext vc, final Genotype g) {
-            return getFieldEncoder().numElements(vc, g.getFilters());
-        }
-    }
-
-    public static class GTWriter extends GenotypesWriter {
-        final Map<Allele, Integer> alleleMapForTriPlus = new HashMap<Allele, Integer>(5);
-        Allele ref, alt1;
-
-        public GTWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
-            super(header, fieldEncoder);
-        }
-
-        @Override
-        public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
-            if ( vc.getNAlleles() > BCF2Utils.MAX_ALLELES_IN_GENOTYPES )
-                throw new IllegalStateException("Current BCF2 encoder cannot handle sites " +
-                        "with > " + BCF2Utils.MAX_ALLELES_IN_GENOTYPES + " alleles, but you have "
-                        + vc.getNAlleles() + " at " + vc.getContig() + ":" + vc.getStart());
-
-            encodingType = BCF2Type.INT8;
-            buildAlleleMap(vc);
-            nValuesPerGenotype = vc.getMaxPloidy(2);
-
-            super.start(encoder, vc);
-        }
-
-        @Override
-        public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
-            final int samplePloidy = g.getPloidy();
-            for ( int i = 0; i < nValuesPerGenotype; i++ ) {
-                if ( i < samplePloidy ) {
-                    // we encode the actual allele
-                    final Allele a = g.getAllele(i);
-                    final int offset = getAlleleOffset(a);
-                    final int encoded = ((offset+1) << 1) | ((g.isPhased() && i!=0) ? 0x01 : 0x00);
-                    encoder.encodeRawBytes(encoded, encodingType);
-                } else {
-                    // we need to pad with missing as we have ploidy < max for this sample
-                    encoder.encodeRawBytes(encodingType.getMissingBytes(), encodingType);
-                }
-            }
-        }
-
-        /**
-         * Fast path code to determine the offset.
-         *
-         * Inline tests for == against ref (most common, first test)
-         * == alt1 (second most common, second test)
-         * == NO_CALL (third)
-         * and finally in the map from allele => offset for all alt 2+ alleles
-         *
-         * @param a the allele whose offset we wish to determine
-         * @return the offset (from 0) of the allele in the list of variant context alleles (-1 means NO_CALL)
-         */
-        private final int getAlleleOffset(final Allele a) {
-            if ( a == ref ) return 0;
-            else if ( a == alt1 ) return 1;
-            else if ( a == Allele.NO_CALL ) return -1;
-            else {
-                final Integer o = alleleMapForTriPlus.get(a);
-                if ( o == null ) throw new IllegalStateException("BUG: Couldn't find allele offset for allele " + a);
-                return o;
-            }
-        }
-
-        private final void buildAlleleMap(final VariantContext vc) {
-            // these are fast path options to determine the offsets for
-            final int nAlleles = vc.getNAlleles();
-            ref = vc.getReference();
-            alt1 = nAlleles > 1 ? vc.getAlternateAllele(0) : null;
-
-            if ( nAlleles > 2 ) {
-                // for multi-allelics we need to clear the map, and add additional looks
-                alleleMapForTriPlus.clear();
-                final List<Allele> alleles = vc.getAlleles();
-                for ( int i = 2; i < alleles.size(); i++ ) {
-                    alleleMapForTriPlus.put(alleles.get(i), i);
-                }
-            }
-        }
-    }
-}
-
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldWriterManager.java b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldWriterManager.java
deleted file mode 100644
index 20f9ce6aa4..0000000000
--- a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldWriterManager.java
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
-* Copyright (c) 2012 The Broad Institute
-* 
-* Permission is hereby granted, free of charge, to any person
-* obtaining a copy of this software and associated documentation
-* files (the "Software"), to deal in the Software without
-* restriction, including without limitation the rights to use,
-* copy, modify, merge, publish, distribute, sublicense, and/or sell
-* copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following
-* conditions:
-* 
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-* 
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-package htsjdk.variant.variantcontext.writer;
-
-import htsjdk.variant.utils.GeneralUtils;
-import htsjdk.variant.vcf.VCFCompoundHeaderLine;
-import htsjdk.variant.vcf.VCFConstants;
-import htsjdk.variant.vcf.VCFFormatHeaderLine;
-import htsjdk.variant.vcf.VCFHeader;
-import htsjdk.variant.vcf.VCFHeaderLineType;
-import htsjdk.variant.vcf.VCFInfoHeaderLine;
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * See #BCFWriter for documentation on this classes role in encoding BCF2 files
- *
- * @author Mark DePristo
- * @since 06/12
- */
-public class BCF2FieldWriterManager {
-    final Map<String, BCF2FieldWriter.SiteWriter> siteWriters = new HashMap<String, BCF2FieldWriter.SiteWriter>();
-    final Map<String, BCF2FieldWriter.GenotypesWriter> genotypesWriters = new HashMap<String, BCF2FieldWriter.GenotypesWriter>();
-    final IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
-
-    public BCF2FieldWriterManager() { }
-
-    /**
-     * Setup the FieldWriters appropriate to each INFO and FORMAT in the VCF header
-     *
-     * Must be called before any of the getter methods will work
-     *
-     * @param header a VCFHeader containing description for every INFO and FORMAT field we'll attempt to write out to BCF
-     * @param encoder the encoder we are going to use to write out the BCF2 data
-     * @param stringDictionary a map from VCFHeader strings to their offsets for encoding
-     */
-    public void setup(final VCFHeader header, final BCF2Encoder encoder, final Map<String, Integer> stringDictionary) {
-        for (final VCFInfoHeaderLine line : header.getInfoHeaderLines()) {
-            final String field = line.getID();
-            final BCF2FieldWriter.SiteWriter writer = createInfoWriter(header, line, encoder, stringDictionary);
-            add(siteWriters, field, writer);
-        }
-
-        for (final VCFFormatHeaderLine line : header.getFormatHeaderLines()) {
-            final String field = line.getID();
-            final BCF2FieldWriter.GenotypesWriter writer = createGenotypesWriter(header, line, encoder, stringDictionary);
-            add(genotypesWriters, field, writer);
-        }
-    }
-
-    private final <T> void add(final Map<String, T> map, final String field, final T writer) {
-        if ( map.containsKey(field) )
-            throw new IllegalStateException("BUG: field " + field + " already seen in VCFHeader while building BCF2 field encoders");
-        map.put(field, writer);
-    }
-
-    // -----------------------------------------------------------------
-    //
-    // Master routine to look at the header, a specific line, and
-    // build an appropriate SiteWriter for that header element
-    //
-    // -----------------------------------------------------------------
-
-    private BCF2FieldWriter.SiteWriter createInfoWriter(final VCFHeader header,
-                                                        final VCFInfoHeaderLine line,
-                                                        final BCF2Encoder encoder,
-                                                        final Map<String, Integer> dict) {
-        return new BCF2FieldWriter.GenericSiteWriter(header, createFieldEncoder(line, encoder, dict, false));
-    }
-
-    private BCF2FieldEncoder createFieldEncoder(final VCFCompoundHeaderLine line,
-                                                final BCF2Encoder encoder,
-                                                final Map<String, Integer> dict,
-                                                final boolean createGenotypesEncoders ) {
-
-        if ( createGenotypesEncoders && intGenotypeFieldAccessors.getAccessor(line.getID()) != null ) {
-            if ( GeneralUtils.DEBUG_MODE_ENABLED && line.getType() != VCFHeaderLineType.Integer )
-                System.err.println("Warning: field " + line.getID() + " expected to encode an integer but saw " + line.getType() + " for record " + line);
-            return new BCF2FieldEncoder.IntArray(line, dict);
-        } else if ( createGenotypesEncoders && line.getID().equals(VCFConstants.GENOTYPE_KEY) ) {
-            return new BCF2FieldEncoder.GenericInts(line, dict);
-        } else {
-            switch ( line.getType() ) {
-                case Character:
-                case String:
-                    return new BCF2FieldEncoder.StringOrCharacter(line, dict);
-                case Flag:
-                    return new BCF2FieldEncoder.Flag(line, dict);
-                case Float:
-                    return new BCF2FieldEncoder.Float(line, dict);
-                case Integer:
-                    if ( line.isFixedCount() && line.getCount() == 1 )
-                        return new BCF2FieldEncoder.AtomicInt(line, dict);
-                    else
-                        return new BCF2FieldEncoder.GenericInts(line, dict);
-                default:
-                    throw new IllegalArgumentException("Unexpected type for field " + line.getID());
-            }
-        }
-    }
-
-    // -----------------------------------------------------------------
-    //
-    // Master routine to look at the header, a specific line, and
-    // build an appropriate Genotypes for that header element
-    //
-    // -----------------------------------------------------------------
-
-    private BCF2FieldWriter.GenotypesWriter createGenotypesWriter(final VCFHeader header,
-                                                                  final VCFFormatHeaderLine line,
-                                                                  final BCF2Encoder encoder,
-                                                                  final Map<String, Integer> dict) {
-        final String field = line.getID();
-        final BCF2FieldEncoder fieldEncoder = createFieldEncoder(line, encoder, dict, true);
-
-        if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
-            return new BCF2FieldWriter.GTWriter(header, fieldEncoder);
-        } else if ( line.getID().equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
-            return new BCF2FieldWriter.FTGenotypesWriter(header, fieldEncoder);
-        } else if ( intGenotypeFieldAccessors.getAccessor(field) != null ) {
-            return new BCF2FieldWriter.IGFGenotypesWriter(header, fieldEncoder, intGenotypeFieldAccessors.getAccessor(field));
-        } else if ( line.getType() == VCFHeaderLineType.Integer ) {
-            return new BCF2FieldWriter.IntegerTypeGenotypesWriter(header, fieldEncoder);
-        } else {
-            return new BCF2FieldWriter.StaticallyTypeGenotypesWriter(header, fieldEncoder);
-        }
-    }
-
-    // -----------------------------------------------------------------
-    //
-    // Accessors to get site / genotype writers
-    //
-    // -----------------------------------------------------------------
-
-    /**
-     * Get a site writer specialized to encode values for site info field
-     * @param field key found in the VCF header INFO records
-     * @return non-null writer if one can be found, or null if none exists for field
-     */
-    public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String field) {
-        return getWriter(field, siteWriters);
-    }
-
-    /**
-     * Get a genotypes writer specialized to encode values for genotypes field
-     * @param field key found in the VCF header FORMAT records
-     * @return non-null writer if one can be found, or null if none exists for field
-     */
-    public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String field) {
-        return getWriter(field, genotypesWriters);
-    }
-
-    public <T> T getWriter(final String key, final Map<String, T> map) {
-        return map.get(key);
-    }
-}
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
index 78990f5f3f..fd95161be2 100644
--- a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
+++ b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
@@ -28,22 +28,22 @@
 import htsjdk.samtools.SAMSequenceDictionary;
 import htsjdk.samtools.util.IOUtil;
 import htsjdk.samtools.util.RuntimeIOException;
+import htsjdk.tribble.TribbleException;
 import htsjdk.tribble.index.IndexCreator;
 import htsjdk.variant.bcf2.BCF2Codec;
+import htsjdk.variant.bcf2.BCF2Dictionary;
+import htsjdk.variant.bcf2.BCF2Encoder;
 import htsjdk.variant.bcf2.BCF2Type;
 import htsjdk.variant.bcf2.BCF2Utils;
 import htsjdk.variant.bcf2.BCFVersion;
 import htsjdk.variant.utils.GeneralUtils;
 import htsjdk.variant.variantcontext.Allele;
-import htsjdk.variant.variantcontext.Genotype;
-import htsjdk.variant.variantcontext.GenotypeBuilder;
 import htsjdk.variant.variantcontext.LazyGenotypesContext;
 import htsjdk.variant.variantcontext.VariantContext;
 import htsjdk.variant.variantcontext.VariantContextBuilder;
-import htsjdk.variant.vcf.VCFConstants;
+import htsjdk.variant.bcf2.BCF2FieldWriter.BCF2FieldWriterManager;
 import htsjdk.variant.vcf.VCFContigHeaderLine;
 import htsjdk.variant.vcf.VCFHeader;
-import htsjdk.variant.vcf.VCFHeaderVersion;
 import htsjdk.variant.vcf.VCFUtils;
 
 import java.io.ByteArrayOutputStream;
@@ -52,19 +52,16 @@
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.nio.file.Path;
-import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.HashMap;
-import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 
 /**
  * VariantContextWriter that emits BCF2 binary encoding
- *
+ * <p>
  * Overall structure of this writer is complex for efficiency reasons
- *
+ * <p>
  * -- The BCF2Writer manages the low-level BCF2 encoder, the mappings
  * from contigs and strings to offsets, the VCF header, and holds the
  * lower-level encoders that map from VC and Genotype fields to their
@@ -72,29 +69,23 @@
  * like POS, contig, the size of info and genotype data, QUAL, etc.  It
  * has loops over the INFO and GENOTYPES to encode each individual datum
  * with the generic field encoders, but the actual encoding work is
- * done with by the FieldWriters classes themselves
- *
+ * done with by the FieldWriters classes themselves.  The piece of code
+ * that determines which FieldWriters to associate with each SITE and
+ * GENOTYPE field is the BCF2FieldWriterManager.
+ * <p>
  * -- BCF2FieldWriter are specialized classes for writing out SITE and
  * genotype information for specific SITE/GENOTYPE fields (like AC for
  * sites and GQ for genotypes).  These are objects in themselves because
- * the manage all of the complexity of relating the types in the VCF header
+ * they manage all of the complexity of relating the types in the VCF header
  * with the proper encoding in BCF as well as the type representing this
  * in java.  Relating all three of these pieces of information together
- * is the main complexity challenge in the encoder.  The piece of code
- * that determines which FieldWriters to associate with each SITE and
- * GENOTYPE field is the BCF2FieldWriterManager.  These FieldWriters
- * are specialized for specific combinations of encoders (see below)
- * and contexts (genotypes) for efficiency, so they smartly manage
- * the writing of PLs (encoded as int[]) directly into the lowest
- * level BCFEncoder.
- *
- * -- At the third level is the BCF2FieldEncoder, relatively simple
- * pieces of code that handle the task of determining the right
- * BCF2 type for specific field values, as well as reporting back
- * information such as the number of elements used to encode it
- * (simple for atomic values like Integer but complex for PLs
- * or lists of strings)
- *
+ * is the main complexity challenge in the encoder.  These classes are
+ * responsible for extracting the necessary data from the VariantContext
+ * or Genotype, determining its BCF type and size, and writing it out.
+ * These FieldWriters are specialized for specific combinations of VCF type
+ * and contexts for efficiency, so they smartly manage the writing of PLs
+ * (encoded as int[]) directly into the lowest level BCFEncoder.
+ * <p>
  * -- At the lowest level is the BCF2Encoder itself.  This provides
  * just the limited encoding methods specified by the BCF2 specification.  This encoder
  * doesn't do anything but make it possible to conveniently write out valid low-level
@@ -105,19 +96,22 @@
  */
 class BCF2Writer extends IndexingVariantContextWriter {
     public static final int MAJOR_VERSION = 2;
-    public static final int MINOR_VERSION = 1;
+    public static final int MINOR_VERSION = 2;
+
+    public static final BCFVersion VERSION = new BCFVersion(MAJOR_VERSION, MINOR_VERSION);
 
     final private static boolean ALLOW_MISSING_CONTIG_LINES = false;
 
     private final OutputStream outputStream;      // Note: do not flush until completely done writing, to avoid issues with eventual BGZF support
     private VCFHeader header;
-    private final Map<String, Integer> contigDictionary = new HashMap<String, Integer>();
-    private final Map<String, Integer> stringDictionaryMap = new LinkedHashMap<String, Integer>();
+    private final Map<String, Integer> contigDictionary = new HashMap<>();
+    private final Map<String, Integer> stringDictionaryMap = new HashMap<>();
     private final boolean doNotWriteGenotypes;
-    private String[] sampleNames = null;
+    private final Map<VariantContext, List<String>> genotypeKeys = new HashMap<>();
+
+    private BCF2Encoder encoder; // initialized after the header arrives
 
-    private final BCF2Encoder encoder = new BCF2Encoder(); // initialized after the header arrives
-    final BCF2FieldWriterManager fieldManager = new BCF2FieldWriterManager();
+    private BCF2FieldWriterManager fieldWriterManager;
 
     /**
      * cached results for whether we can write out raw genotypes data.
@@ -135,15 +129,15 @@ public BCF2Writer(final File location, final OutputStream output, final SAMSeque
     }
 
     public BCF2Writer(final Path location, final OutputStream output, final SAMSequenceDictionary refDict,
-        final boolean enableOnTheFlyIndexing, final boolean doNotWriteGenotypes) {
+                      final boolean enableOnTheFlyIndexing, final boolean doNotWriteGenotypes) {
         super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing);
         this.outputStream = getOutputStream();
         this.doNotWriteGenotypes = doNotWriteGenotypes;
     }
 
     public BCF2Writer(final File location, final OutputStream output, final SAMSequenceDictionary refDict,
-        final IndexCreator indexCreator,
-        final boolean enableOnTheFlyIndexing, final boolean doNotWriteGenotypes) {
+                      final IndexCreator indexCreator,
+                      final boolean enableOnTheFlyIndexing, final boolean doNotWriteGenotypes) {
         this(IOUtil.toPath(location), output, refDict, indexCreator, enableOnTheFlyIndexing,
             doNotWriteGenotypes);
     }
@@ -163,7 +157,7 @@ public BCF2Writer(final Path location, final OutputStream output, final SAMSeque
     // --------------------------------------------------------------------------------
 
     @Override
-    public void writeHeader(VCFHeader header) {
+    public void writeHeader(final VCFHeader header) {
         setHeader(header);
 
         try {
@@ -175,33 +169,54 @@ public void writeHeader(VCFHeader header) {
             writer.close();
 
             final byte[] headerBytes = capture.toByteArray();
-            new BCFVersion(MAJOR_VERSION, MINOR_VERSION).write(outputStream);
+            BCF2Writer.VERSION.write(outputStream);
             BCF2Type.INT32.write(headerBytes.length, outputStream);
             outputStream.write(headerBytes);
             outputHasBeenWritten = true;
-        } catch (IOException e) {
+        } catch (final IOException e) {
             throw new RuntimeIOException("BCF2 stream: Got IOException while trying to write BCF2 header", e);
         }
     }
 
     @Override
-    public void add( VariantContext vc ) {
-        if ( doNotWriteGenotypes )
+    public void add(VariantContext vc) {
+        if (doNotWriteGenotypes)
             vc = new VariantContextBuilder(vc).noGenotypes().make();
         vc = vc.fullyDecode(header, false);
 
         super.add(vc); // allow on the fly indexing
 
         try {
-            final byte[] infoBlock = buildSitesData(vc);
-            final byte[] genotypesBlock = buildSamplesData(vc);
+            // Sites data
+            buildSitesData(vc);
+            final int sitesLength = encoder.getSize();
+
+            // Genotypes data
+            final int genotypesLength;
+            final BCF2Codec.LazyData lazyData = getLazyData(vc);  // has critical side effects
+            if (lazyData != null) {
+                // we never decoded any data from this BCF file so we don't need to re-encode the samples data
+                genotypesLength = lazyData.bytes.length;
+            } else {
+                // we have to do work to convert the VC into a BCF2 byte stream
+                buildSamplesData(vc);
+                genotypesLength = encoder.getSize() - sitesLength;
+            }
+
+            // Write lengths
+            BCF2Type.INT32.write(sitesLength, outputStream);
+            BCF2Type.INT32.write(genotypesLength, outputStream);
 
-            // write the two blocks to disk
-            writeBlock(infoBlock, genotypesBlock);
+            // Write the encoder's buffer into the output stream
+            // If there was no lazy data, this also contains the genotypes data
+            encoder.write(outputStream);
+            if (lazyData != null) {
+                // The encoder only contained sites data, so we need to write the lazy data
+                outputStream.write(lazyData.bytes);
+            }
             outputHasBeenWritten = true;
-        }
-        catch ( IOException e ) {
-            throw new RuntimeIOException("Error writing record to BCF2 file: " + vc.toString(), e);
+        } catch (final IOException e) {
+            throw new RuntimeIOException("Error writing record to BCF2 file: " + vc, e);
         }
     }
 
@@ -209,8 +224,7 @@ public void add( VariantContext vc ) {
     public void close() {
         try {
             outputStream.flush();
-        }
-        catch ( IOException e ) {
+        } catch (final IOException e) {
             throw new RuntimeIOException("Failed to flush BCF2 file");
         }
         super.close();
@@ -221,39 +235,50 @@ public void setHeader(final VCFHeader header) {
         if (outputHasBeenWritten) {
             throw new IllegalStateException("The header cannot be modified after the header or variants have been written to the output stream.");
         }
+
+        // TODO we default to 2.2 here, is this alright?
+        encoder = BCF2Encoder.getEncoder(BCF2Codec.ALLOWED_BCF_VERSION);
+
         // make sure the header is sorted correctly
-        this.header = doNotWriteGenotypes ? new VCFHeader(header.getMetaDataInSortedOrder()) : new VCFHeader(
-                header.getMetaDataInSortedOrder(), header.getGenotypeSamples());
+        this.header = doNotWriteGenotypes
+            ? new VCFHeader(header.getMetaDataInSortedOrder())
+            : new VCFHeader(header.getMetaDataInSortedOrder(), header.getGenotypeSamples());
+
+        // TODO should follow up on hts-specs and clarify the relationship between ##dictionary and IDX fields
+        // Error on ##dictionary lines, we don't know what to do with them
+        if (this.header.getMetaDataInInputOrder().stream().anyMatch(line -> line.getKey().equals("dictionary"))) {
+            throw new TribbleException("Use of the ##dictionary line is not supported");
+        }
+
         // create the config offsets map
-        if ( this.header.getContigLines().isEmpty() ) {
-            if ( ALLOW_MISSING_CONTIG_LINES ) {
-                if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
+        if (this.header.getContigLines().isEmpty()) {
+            if (ALLOW_MISSING_CONTIG_LINES) {
+                if (GeneralUtils.DEBUG_MODE_ENABLED) {
                     System.err.println("No contig dictionary found in header, falling back to reference sequence dictionary");
                 }
+                // The reference sequence dictionary should never contain IDX fields
                 createContigDictionary(VCFUtils.makeContigHeaderLines(getRefDict(), null));
             } else {
                 throw new IllegalStateException("Cannot write BCF2 file with missing contig lines");
             }
         } else {
-            createContigDictionary(this.header.getContigLines());
-        }
-        // set up the map from dictionary string values -> offset
-        final ArrayList<String> dict = BCF2Utils.makeDictionary(this.header);
-        for ( int i = 0; i < dict.size(); i++ ) {
-            stringDictionaryMap.put(dict.get(i), i);
+            final BCF2Dictionary dict = BCF2Dictionary.makeBCF2ContigDictionary(header, BCF2Writer.VERSION);
+            dict.forEach((offset, string) -> contigDictionary.put(string, offset));
         }
 
-        sampleNames = this.header.getGenotypeSamples().toArray(new String[this.header.getNGenotypeSamples()]);
-        // setup the field encodings
-        fieldManager.setup(this.header, encoder, stringDictionaryMap);
+        // Create offset -> string map then turn inside-out
+        final BCF2Dictionary dict = BCF2Dictionary.makeBCF2StringDictionary(this.header, BCF2Writer.VERSION);
+        dict.forEach((offset, string) -> stringDictionaryMap.put(string, offset));
 
+        // Set up the field encodings
+        fieldWriterManager = new BCF2FieldWriterManager(header, stringDictionaryMap, encoder);
     }
 
     // --------------------------------------------------------------------------------
     //
     // implicit block
     //
-    // The first four records of BCF are inline untype encoded data of:
+    // The first four records of BCF are inline untyped encoded data of:
     //
     // 4 byte integer chrom offset
     // 4 byte integer start
@@ -261,23 +286,23 @@ public void setHeader(final VCFHeader header) {
     // 4 byte float qual
     //
     // --------------------------------------------------------------------------------
-    private byte[] buildSitesData( VariantContext vc ) throws IOException {
+    private void buildSitesData(final VariantContext vc) throws IOException {
         final int contigIndex = contigDictionary.get(vc.getContig());
-        if ( contigIndex == -1 )
+        if (contigIndex == -1)
             throw new IllegalStateException(String.format("Contig %s not found in sequence dictionary from reference", vc.getContig()));
 
-        // note use of encodeRawValue to not insert the typing byte
-        encoder.encodeRawValue(contigIndex, BCF2Type.INT32);
+        // note use of encodeRawInt to not insert the typing byte
+        encoder.encodeRawInt(contigIndex, BCF2Type.INT32);
 
         // pos.  GATK is 1 based, BCF2 is 0 based
-        encoder.encodeRawValue(vc.getStart() - 1, BCF2Type.INT32);
+        encoder.encodeRawInt(vc.getStart() - 1, BCF2Type.INT32);
 
         // ref length.  GATK is closed, but BCF2 is open so the ref length is GATK end - GATK start + 1
         // for example, a SNP is in GATK at 1:10-10, which has ref length 10 - 10 + 1 = 1
-        encoder.encodeRawValue(vc.getEnd() - vc.getStart() + 1, BCF2Type.INT32);
+        encoder.encodeRawInt(vc.getEnd() - vc.getStart() + 1, BCF2Type.INT32);
 
         // qual
-        if ( vc.hasLog10PError() )
+        if (vc.hasLog10PError())
             encoder.encodeRawFloat((float) vc.getPhredScaledQual());
         else
             encoder.encodeRawMissingValue(BCF2Type.FLOAT);
@@ -295,14 +320,12 @@ private byte[] buildSitesData( VariantContext vc ) throws IOException {
         buildAlleles(vc);
         buildFilter(vc);
         buildInfo(vc);
-
-        return encoder.getRecordBytes();
     }
 
 
     /**
      * Can we safely write on the raw (undecoded) genotypes of an input VC?
-     *
+     * <p>
      * The cache depends on the undecoded lazy data header == lastVCFHeaderOfUnparsedGenotypes, in
      * which case we return the previous result.  If it's not cached, we use the BCF2Util to
      * compare the VC header with our header (expensive) and cache it.
@@ -311,9 +334,9 @@ private byte[] buildSitesData( VariantContext vc ) throws IOException {
      * @return
      */
     private boolean canSafelyWriteRawGenotypesBytes(final BCF2Codec.LazyData lazyData) {
-        if ( lazyData.header != lastVCFHeaderOfUnparsedGenotypes ) {
+        if (lazyData.header != lastVCFHeaderOfUnparsedGenotypes) {
             // result is already cached
-            canPassOnUnparsedGenotypeDataForLastVCFHeader = BCF2Utils.headerLinesAreOrderedConsistently(this.header,lazyData.header);
+            canPassOnUnparsedGenotypeDataForLastVCFHeader = BCF2Utils.headerLinesAreOrderedConsistently(this.header, lazyData.header);
             lastVCFHeaderOfUnparsedGenotypes = lazyData.header;
         }
 
@@ -321,12 +344,12 @@ private boolean canSafelyWriteRawGenotypesBytes(final BCF2Codec.LazyData lazyDat
     }
 
     private BCF2Codec.LazyData getLazyData(final VariantContext vc) {
-        if ( vc.getGenotypes().isLazyWithData() ) {
-            final LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes();
+        if (vc.getGenotypes().isLazyWithData()) {
+            final LazyGenotypesContext lgc = (LazyGenotypesContext) vc.getGenotypes();
 
-            if ( lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData &&
-                    canSafelyWriteRawGenotypesBytes((BCF2Codec.LazyData) lgc.getUnparsedGenotypeData())) {
-                return (BCF2Codec.LazyData)lgc.getUnparsedGenotypeData();
+            if (lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData &&
+                canSafelyWriteRawGenotypesBytes((BCF2Codec.LazyData) lgc.getUnparsedGenotypeData())) {
+                return (BCF2Codec.LazyData) lgc.getUnparsedGenotypeData();
             } else {
                 lgc.decode(); // WARNING -- required to avoid keeping around bad lazy data for too long
             }
@@ -337,7 +360,7 @@ private BCF2Codec.LazyData getLazyData(final VariantContext vc) {
 
     /**
      * Try to get the nGenotypeFields as efficiently as possible.
-     *
+     * <p>
      * If this is a lazy BCF2 object just grab the field count from there,
      * otherwise do the whole counting by types test in the actual data
      *
@@ -346,80 +369,46 @@ private BCF2Codec.LazyData getLazyData(final VariantContext vc) {
      */
     private int getNGenotypeFormatFields(final VariantContext vc) {
         final BCF2Codec.LazyData lazyData = getLazyData(vc);
-        return lazyData != null ? lazyData.nGenotypeFields : vc.calcVCFGenotypeKeys(header).size();
+        if (lazyData == null) {
+            // Calculate genotype keys of a VariantContext and cache result
+            // This computation can be expensive as it needs to inspect every genotype in the VC,
+            // so we cache the result as it will be needed again when writing the genotype information
+            return genotypeKeys.computeIfAbsent(vc, v -> v.calcVCFGenotypeKeys(header)).size();
+        } else {
+            return lazyData.nGenotypeFields;
+        }
     }
 
-    private void buildID( VariantContext vc ) throws IOException {
+    private void buildID(final VariantContext vc) throws IOException {
         encoder.encodeTypedString(vc.getID());
     }
 
-    private void buildAlleles( VariantContext vc ) throws IOException {
-        for ( Allele allele : vc.getAlleles() ) {
+    private void buildAlleles(final VariantContext vc) throws IOException {
+        for (final Allele allele : vc.getAlleles()) {
             final byte[] s = allele.getDisplayBases();
-            if ( s == null )
+            if (s == null)
                 throw new IllegalStateException("BUG: BCF2Writer encountered null padded allele" + allele);
             encoder.encodeTypedString(s);
         }
     }
 
-    private void buildFilter( VariantContext vc ) throws IOException {
-        if ( vc.isFiltered() ) {
+    private void buildFilter(final VariantContext vc) throws IOException {
+        if (vc.isFiltered()) {
             encodeStringsByRef(vc.getFilters());
-        } else if ( vc.filtersWereApplied() ) {
-            encodeStringsByRef(Collections.singleton(VCFConstants.PASSES_FILTERS_v4));
+        } else if (vc.filtersWereApplied()) {
+            // PASS is always implicitly encoded as 0
+            encoder.encodeTypedInt(0, BCF2Type.INT8);
         } else {
             encoder.encodeTypedMissing(BCF2Type.INT8);
         }
     }
 
-    private void buildInfo( VariantContext vc ) throws IOException {
-        for ( Map.Entry<String, Object> infoFieldEntry : vc.getAttributes().entrySet() ) {
-            final String field = infoFieldEntry.getKey();
-            final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(field);
-            if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "INFO");
-            writer.start(encoder, vc);
-            writer.site(encoder, vc);
-            writer.done(encoder, vc);
-        }
+    private void buildInfo(final VariantContext vc) throws IOException {
+        fieldWriterManager.writeInfo(vc);
     }
 
-    private byte[] buildSamplesData(final VariantContext vc) throws IOException {
-        final BCF2Codec.LazyData lazyData = getLazyData(vc);  // has critical side effects
-        if ( lazyData != null ) {
-            // we never decoded any data from this BCF file, so just pass it back
-            return lazyData.bytes;
-        }
-
-        // we have to do work to convert the VC into a BCF2 byte stream
-        final List<String> genotypeFields = vc.calcVCFGenotypeKeys(header);
-        for ( final String field : genotypeFields ) {
-            final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field);
-            if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT");
-
-            assert writer != null;
-
-            writer.start(encoder, vc);
-            for ( final String name : sampleNames ) {
-                Genotype g = vc.getGenotype(name);
-                if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype);
-                writer.addGenotype(encoder, vc, g);
-            }
-            writer.done(encoder, vc);
-        }
-        return encoder.getRecordBytes();
-    }
-
-    /**
-     * Throws a meaningful error message when a field (INFO or FORMAT) is found when writing out a file
-     * but there's no header line for it.
-     *
-     * @param vc
-     * @param field
-     * @param fieldType
-     */
-    private void errorUnexpectedFieldToWrite(final VariantContext vc, final String field, final String fieldType) {
-        throw new IllegalStateException("Found field " + field + " in the " + fieldType + " fields of VariantContext at " +
-                vc.getContig() + ":" + vc.getStart() + " from " + vc.getSource() + " but this hasn't been defined in the VCFHeader");
+    private void buildSamplesData(final VariantContext vc) throws IOException {
+        fieldWriterManager.writeFormat(vc, genotypeKeys.get(vc));
     }
 
     // --------------------------------------------------------------------------------
@@ -428,34 +417,20 @@ private void errorUnexpectedFieldToWrite(final VariantContext vc, final String f
     //
     // --------------------------------------------------------------------------------
 
-    /**
-     * Write the data in the encoder to the outputstream as a length encoded
-     * block of data.  After this call the encoder stream will be ready to
-     * start a new data block
-     *
-     * @throws IOException
-     */
-    private void writeBlock(final byte[] infoBlock, final byte[] genotypesBlock) throws IOException {
-        BCF2Type.INT32.write(infoBlock.length, outputStream);
-        BCF2Type.INT32.write(genotypesBlock.length, outputStream);
-        outputStream.write(infoBlock);
-        outputStream.write(genotypesBlock);
-    }
-
-    private BCF2Type encodeStringsByRef(final Collection<String> strings) throws IOException {
-        final List<Integer> offsets = new ArrayList<Integer>(strings.size());
+    private void encodeStringsByRef(final Collection<String> strings) throws IOException {
+        final int[] offsets = new int[strings.size()];
+        int i = 0;
 
-        // iterate over strings until we find one that needs 16 bits, and break
-        for ( final String string : strings ) {
+        // Map strings to their position in string dictionary
+        for (final String string : strings) {
             final Integer got = stringDictionaryMap.get(string);
-            if ( got == null ) throw new IllegalStateException("Format error: could not find string " + string + " in header as required by BCF");
-            final int offset = got;
-            offsets.add(offset);
+            if (got == null)
+                throw new IllegalStateException("Format error: could not find string " + string + " in header as required by BCF");
+            offsets[i] = got;
+            i++;
         }
 
-        final BCF2Type type = BCF2Utils.determineIntegerType(offsets);
-        encoder.encodeTyped(offsets, type);
-        return type;
+        encoder.encodeTypedVecInt(offsets);
     }
 
     /**
@@ -465,7 +440,7 @@ private BCF2Type encodeStringsByRef(final Collection<String> strings) throws IOE
      */
     private void createContigDictionary(final Collection<VCFContigHeaderLine> contigLines) {
         int offset = 0;
-        for ( VCFContigHeaderLine contig : contigLines )
+        for (final VCFContigHeaderLine contig : contigLines)
             contigDictionary.put(contig.getID(), offset++);
     }
 }
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java b/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java
index 215eaf996b..0dd3e9d77c 100644
--- a/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java
+++ b/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java
@@ -481,7 +481,8 @@ else if (STREAM_TYPES.contains(this.outType))
                 if ((refDict == null) && (options.contains(Options.INDEX_ON_THE_FLY)))
                     throw new IllegalArgumentException("A reference dictionary is required for creating Tribble indices on the fly");
 
-                writer = createBCFWriter(outPath, outStreamFromFile);
+                // BCFs are always bgzipped, but the compression level can be set to 0 to only apply trivial compression
+                writer = createBCFWriter(outPath, new BlockCompressedOutputStream(outStreamFromFile, outPath));
                 break;
             case VCF_STREAM:
                 writer = createVCFWriter(null, outStreamFromFile);
@@ -492,7 +493,7 @@ else if (STREAM_TYPES.contains(this.outType))
                     options.remove(Options.INDEX_ON_THE_FLY);
                 }
 
-                writer = createBCFWriter(null, outStream);
+                writer = createBCFWriter(null, new BlockCompressedOutputStream(outStreamFromFile, outPath));
                 break;
         }
 
diff --git a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java
index cacff036b5..753a1c16f1 100644
--- a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java
+++ b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java
@@ -405,7 +405,12 @@ private Set<String> parsePrimaryHeaderLine(final String headerLine) {
         }
 
         while ( col < columns.length ) {
-            sampleNames.add(columns[col++]);
+            // Sample names must be unique
+            if (sampleNames.contains(columns[col])) {
+                throw new TribbleException.InvalidHeader("duplicate sample name: " + columns[col]);
+            } else {
+                sampleNames.add(columns[col++]);
+            }
         }
 
         if ( sawFormatTag && sampleNames.isEmpty())
diff --git a/src/main/java/htsjdk/variant/vcf/VCFFileReader.java b/src/main/java/htsjdk/variant/vcf/VCFFileReader.java
index 2ab29ddcb4..7deade9374 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFFileReader.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFFileReader.java
@@ -55,14 +55,21 @@ public class VCFFileReader implements VCFReader {
      * Returns true if the given file appears to be a BCF file.
      */
     public static boolean isBCF(final File file) {
-        return isBCF(file.toPath());
+        return isBCF(file.toString());
     }
 
     /**
      * Returns true if the given path appears to be a BCF file.
      */
     public static boolean isBCF(final Path path) {
-        return path.toUri().getRawPath().endsWith(FileExtensions.BCF);
+        return isBCF(path.toUri().getRawPath());
+    }
+
+    /**
+     * Returns true if the given path appears to be a BCF file.
+     */
+    public static boolean isBCF(final String path) {
+        return path.endsWith(FileExtensions.BCF) || path.endsWith(FileExtensions.COMPRESSED_BCF);
     }
 
     /**
diff --git a/src/main/java/htsjdk/variant/vcf/VCFFilterHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFFilterHeaderLine.java
index 1b890db1b1..101ff304c6 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFFilterHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFFilterHeaderLine.java
@@ -87,11 +87,6 @@ private void validate() {
         }
     }
 
-    @Override
-    public boolean shouldBeAddedToDictionary() {
-        return true;
-    }
-
     /**
      * get the "Description" field
      * @return the "Description" field
diff --git a/src/main/java/htsjdk/variant/vcf/VCFFormatHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFFormatHeaderLine.java
index fc75ee5291..1e927b7d05 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFFormatHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFFormatHeaderLine.java
@@ -93,12 +93,8 @@ public static VCFFormatHeaderLine getMergedFormatHeaderLine(
 
     private void validate() {
         if (this.getType() == VCFHeaderLineType.Flag) {
-            throw new TribbleException(String.format("Flag is an unsupported type for format fields: ", this.toStringEncoding()));
+            throw new TribbleException("Flag is an unsupported type for format fields: " + this.toStringEncoding());
         }
     }
 
-    @Override
-    public boolean shouldBeAddedToDictionary() {
-        return true;
-    }
 }
\ No newline at end of file
diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFHeaderLine.java
index 9214f7095f..c1bec06d47 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFHeaderLine.java
@@ -134,7 +134,7 @@ public Optional<VCFValidationFailure<VCFHeaderLine>> getValidationFailure(final
     }
 
     /**
-     * Validate that the header line conforms to {@code vcfTargetVersion.
+     * Validate that the header line conforms to {@code vcfTargetVersion}.
      * @param vcfTargetVersion
      * @throws {@link TribbleException.VersionValidationFailure} if this header line fails to conform
      */
@@ -160,16 +160,6 @@ protected Optional<String> validateKeyOrID(final String keyString) {
         }
     }
 
-    /**
-     * By default the header lines won't be added to the BCF dictionary, unless this method is overriden
-     * (for example in FORMAT, INFO or FILTER header lines).
-     *
-     * @return false
-     */
-    public boolean shouldBeAddedToDictionary() {
-        return false;
-    }
-
     public String toString() {
         return toStringEncoding();
     }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeaderLineTranslator.java b/src/main/java/htsjdk/variant/vcf/VCFHeaderLineTranslator.java
index a22ecd2102..2397e28641 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFHeaderLineTranslator.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFHeaderLineTranslator.java
@@ -159,14 +159,21 @@ public Map<String, String> parseLine(String valueLine, List<String> expectedTagO
             throw new TribbleException.InvalidHeader("Unclosed quote in header line value " + valueLine);
         }
 
+
         // Validate the order of all discovered tags against requiredTagOrder. All tags are treated as
         // "optional". Succeeding does not mean that all expected tags in the list were seen. Also, all
         // structured header lines can have "extra" tags, with no order specified, so additional tags
         // are tolerated.
         if ( expectedTagOrder != null ) {
+            // If there are N expected tags present in the parsed header, the first N tags must exactly
+            // match the order of the expected tags list, the remaining tags are considered optional
+            int numExpectedTagsPresent = 0;
+            for (final String expectedTag : expectedTagOrder) {
+                if (ret.containsKey(expectedTag)) numExpectedTagsPresent++;
+            }
             index = 0;
-            for (String str : ret.keySet()) {
-                if (index >= expectedTagOrder.size()) {
+            for (final String str : ret.keySet()) {
+                if (index == numExpectedTagsPresent) {
                     break; // done - end of requiredTagOrder list
                 } else if (!expectedTagOrder.get(index).equals(str)) {
                     throw new TribbleException.InvalidHeader(
diff --git a/src/main/java/htsjdk/variant/vcf/VCFInfoHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFInfoHeaderLine.java
index 4a116e1381..410409ca12 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFInfoHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFInfoHeaderLine.java
@@ -110,9 +110,4 @@ protected Optional<String> validateKeyOrID(final String id) {
             : super.validateKeyOrID(id);
     }
 
-    @Override
-    public boolean shouldBeAddedToDictionary() {
-        return true;
-    }
-
 }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java
index 2c53899f1d..a5271114d4 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java
@@ -233,10 +233,6 @@ private void validate() {
             throw new TribbleException(
                     String.format("The required ID tag is missing or not the first attribute: key=%s", super.getKey()));
         }
-        final Optional<String> validationFailure = validateKeyOrID(getGenericFieldValue(ID_ATTRIBUTE));
-        if (validationFailure.isPresent()) {
-            throw new TribbleException.VersionValidationFailure(validationFailure.get());
-        }
     }
 
     // Perform all text transformations required to encode an attribute value
diff --git a/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java b/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java
index 0d61cf35e4..6dd5f3906f 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java
@@ -207,7 +207,17 @@ public T repair(final T line) {
                                            + (badCount ? " -- counts disagree; header has " + line.getCount() + " but standard is " + standard.getCount() : "")
                                            + (badDesc ? " -- descriptions disagree; header has '" + line.getDescription() + "' but standard is '" + standard.getDescription() + "'": ""));
                     }
-                    return standard;
+                    // Create a new set so we can modify it without mutating the standard line
+                    final Set<String> additionalFields = new HashSet<>(line.getGenericFields().keySet());
+                    additionalFields.removeAll(standard.getGenericFields().keySet());
+
+                    if (additionalFields.isEmpty()) {
+                        return standard;
+                    } else {
+                        // We need to handle the case where a line has nonstandard attributes, but also additional
+                        // attributes of its own that would be lost if we simply returned the standard line
+                        return mergeStandardLine(standard, line, additionalFields);
+                    }
                 } else {
                     return line;
                 }
@@ -216,6 +226,26 @@ public T repair(final T line) {
             }
         }
 
+        private T mergeStandardLine(final T standard, final T line, final Set<String> additionalFields) {
+            // Create a new line identical to the standard line
+            final VCFCompoundHeaderLine mergedLine;
+            if (standard instanceof VCFFormatHeaderLine) {
+                mergedLine = standard.isFixedCount()
+                    ? new VCFFormatHeaderLine(standard.getID(), standard.getCount(), standard.getType(), standard.getDescription())
+                    : new VCFFormatHeaderLine(standard.getID(), standard.getCountType(), standard.getType(), standard.getDescription());
+            } else {
+                mergedLine = standard.isFixedCount()
+                    ? new VCFInfoHeaderLine(standard.getID(), standard.getCount(), standard.getType(), standard.getDescription())
+                    : new VCFInfoHeaderLine(standard.getID(), standard.getCountType(), standard.getType(), standard.getDescription());
+            }
+
+            final Map<String, String> originalGenericFields = line.getGenericFields();
+            for (final String field : additionalFields) {
+                mergedLine.updateGenericField(field, originalGenericFields.get(field));
+            }
+            return (T) mergedLine;
+        }
+
         public Set<String> addToHeader(final Set<VCFHeaderLine> headerLines, final Collection<String> IDs, final boolean throwErrorForMissing) {
             final Set<String> missing = new HashSet<String>();
             for ( final String ID : IDs ) {
diff --git a/src/test/java/htsjdk/samtools/SamStreamsTest.java b/src/test/java/htsjdk/samtools/SamStreamsTest.java
index d08a14dabf..7611c762f3 100644
--- a/src/test/java/htsjdk/samtools/SamStreamsTest.java
+++ b/src/test/java/htsjdk/samtools/SamStreamsTest.java
@@ -28,6 +28,7 @@
 import htsjdk.samtools.seekablestream.SeekableFileStream;
 import htsjdk.samtools.seekablestream.SeekableStream;
 import htsjdk.samtools.seekablestream.SeekableStreamFactory;
+import htsjdk.samtools.util.IOUtil;
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
@@ -44,7 +45,7 @@ public class SamStreamsTest extends HtsjdkTest {
     public void testDataFormat(final String inputFile, final boolean isGzippedSAMFile, final boolean isBAMFile, final boolean isCRAMFile) throws Exception {
         final File input = new File(TEST_DATA_DIR, inputFile);
         try(final InputStream fis = new BufferedInputStream(new FileInputStream(input))) { //must be buffered or the isGzippedSAMFile will blow up
-            Assert.assertEquals(SamStreams.isGzippedSAMFile(fis), isGzippedSAMFile, "isGzippedSAMFile:" + inputFile);
+            Assert.assertEquals(IOUtil.isGZIPInputStream(fis), isGzippedSAMFile, "isGzippedSAMFile:" + inputFile);
             Assert.assertEquals(SamStreams.isBAMFile(fis), isBAMFile,   "isBAMFile:" + inputFile);
             Assert.assertEquals(SamStreams.isCRAMFile(fis), isCRAMFile, "isCRAMFile:" + inputFile);
         }
diff --git a/src/test/java/htsjdk/utils/BCFToolsTestUtils.java b/src/test/java/htsjdk/utils/BCFToolsTestUtils.java
new file mode 100644
index 0000000000..8193791e93
--- /dev/null
+++ b/src/test/java/htsjdk/utils/BCFToolsTestUtils.java
@@ -0,0 +1,136 @@
+package htsjdk.utils;
+
+import htsjdk.samtools.util.FileExtensions;
+import htsjdk.samtools.util.ProcessExecutor;
+import htsjdk.samtools.util.RuntimeIOException;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+public class BCFToolsTestUtils {
+    private static final String BCFTOOLS_BINARY_ENV_VARIABLE = "HTSJDK_BCFTOOLS_BIN";
+    public static final String expectedBCFtoolsVersion = "1.13";
+
+    /**
+     * @return true if bcftools is available, otherwise false
+     */
+    public static boolean isBCFToolsAvailable() {
+        final String binPath = getBCFToolsBin();
+        final Path binFile = Paths.get(binPath);
+        return Files.exists(binFile);
+    }
+
+    /**
+     * @throws RuntimeException if bcftools executable is not available
+     */
+    public static void assertBCFToolsAvailable() {
+        if (!isBCFToolsAvailable()) {
+            throw new RuntimeException(String.format(
+                "No bcftools executable can be found." +
+                    " The %s environment variable must be set to the name of the local bcftools executable.",
+                BCFTOOLS_BINARY_ENV_VARIABLE
+            ));
+        }
+    }
+
+    /**
+     * @return the name and location of the local bcftools executable as specified by the environment
+     * variable HTSJDK_BCFTOOLS_BIN, or the default value of "/usr/local/bin/bcftools" if the environment
+     * variable is not set
+     */
+    public static String getBCFToolsBin() {
+        final String bcftoolsPath = System.getenv(BCFTOOLS_BINARY_ENV_VARIABLE);
+        return bcftoolsPath == null ? "/usr/local/bin/bcftools" : bcftoolsPath;
+    }
+
+    /**
+     * Execute a bcftools command line if a local bcftools executable is available see {@link #isBCFToolsAvailable()}.
+     *
+     * @param commandLine bcftools command line string, excluding the "bcftools" prefix. For example:
+     *                    {@code "view my.vcf > my.bcf"}
+     * @return the {@link ProcessExecutor.ExitStatusAndOutput} resulting from the command execution, if
+     * the command succeeds
+     * @throws RuntimeException if the command fails, or if a local bcftools executable is not available.
+     */
+    public static ProcessExecutor.ExitStatusAndOutput executeBCFToolsCommand(final String commandLine) {
+        assertBCFToolsAvailable();
+        final String commandString = String.format("%s %s", getBCFToolsBin(), commandLine);
+        final ProcessExecutor.ExitStatusAndOutput processStatus =
+            ProcessExecutor.executeAndReturnInterleavedOutput(commandString);
+        if (processStatus.exitStatus != 0) {
+            // bcftools seems to write some errors to stdout
+            throw new RuntimeException(
+                String.format(
+                    "Failure code %d returned from bcftools command %s\n (stderr: %.500s)\n (stdout: %.500s)\n",
+                    processStatus.exitStatus,
+                    commandString,
+                    processStatus.stderr == null ? "" : processStatus.stderr,
+                    processStatus.stdout == null ? "" : processStatus.stdout
+                )
+            );
+        }
+        return processStatus;
+    }
+
+    /**
+     * Convert an input VCF file to a temporary BCF file using the bcftools "view" command. The temp
+     * file will be deleted when the process exits. Use {@link #isBCFToolsAvailable()} to determine if it's safe
+     * to use this method.
+     *
+     * @param inputVCF           input file to convert
+     * @param commandLineOptions additional command line options (--input-fmt-option or --output-fmt-option)
+     * @return a temporary file containing the bcftools-generated results.
+     */
+    public static File VCFtoBCF(
+        final File inputVCF,
+        final String commandLineOptions
+    ) {
+        assertBCFToolsAvailable();
+        try {
+            final File tempBCFFile = File.createTempFile("bcftoolsTemporaryBCF", FileExtensions.BCF);
+            tempBCFFile.deleteOnExit();
+            final String commandString = String.format(
+                "view %s %s -o %s",
+                commandLineOptions == null ? "" : commandLineOptions,
+                inputVCF.getAbsolutePath(),
+                tempBCFFile.getAbsolutePath()
+            );
+            executeBCFToolsCommand(commandString);
+            return tempBCFFile;
+        } catch (final IOException e) {
+            throw new RuntimeIOException(e);
+        }
+    }
+
+    /**
+     * Convert an input BCF file to a temporary VCF file using the bcftools "view" command. The temp
+     * file will be deleted when the process exits. Use {@link #isBCFToolsAvailable()} to determine if it's safe
+     * to use this method.
+     *
+     * @param inputBCF           input file to convert
+     * @param commandLineOptions additional command line options (--input-fmt-option or --output-fmt-option)
+     * @return a temporary file containing the bcftools-generated results.
+     */
+    public static File BCFToVCF(
+        final File inputBCF,
+        final String commandLineOptions
+    ) {
+        assertBCFToolsAvailable();
+        try {
+            final File tempVCFFile = File.createTempFile("bcftoolsTemporaryVCF" + inputBCF, FileExtensions.VCF);
+            final String commandString = String.format(
+                "view %s %s -o %s",
+                commandLineOptions == null ? "" : commandLineOptions,
+                inputBCF.getAbsolutePath(),
+                tempVCFFile.getAbsolutePath()
+            );
+            executeBCFToolsCommand(commandString);
+            return tempVCFFile;
+        } catch (final IOException e) {
+            throw new RuntimeIOException(e);
+        }
+    }
+}
diff --git a/src/test/java/htsjdk/utils/BCFToolsTestUtilsTest.java b/src/test/java/htsjdk/utils/BCFToolsTestUtilsTest.java
new file mode 100644
index 0000000000..9fd5451f55
--- /dev/null
+++ b/src/test/java/htsjdk/utils/BCFToolsTestUtilsTest.java
@@ -0,0 +1,35 @@
+package htsjdk.utils;
+
+import htsjdk.HtsjdkTest;
+import htsjdk.samtools.util.ProcessExecutor;
+import org.testng.Assert;
+import org.testng.SkipException;
+import org.testng.annotations.Test;
+
+public class BCFToolsTestUtilsTest extends HtsjdkTest {
+
+    @Test
+    public void testBCFToolsIsAvailable() {
+        Assert.assertTrue(BCFToolsTestUtils.isBCFToolsAvailable());
+    }
+
+    @Test
+    public void testBCFToolsVersion() {
+        if (!BCFToolsTestUtils.isBCFToolsAvailable()) {
+            throw new SkipException("bcftools not available on local device");
+        }
+        // If this test runs, but fails because version validation fails, then the local bcftools version is
+        // not the one expected by the htsjdk tests
+        final ProcessExecutor.ExitStatusAndOutput processStatus = BCFToolsTestUtils.executeBCFToolsCommand("--version");
+        Assert.assertTrue(processStatus.stdout.contains(BCFToolsTestUtils.expectedBCFtoolsVersion));
+    }
+
+
+    @Test(expectedExceptions = RuntimeException.class)
+    public void testBCFToolsPresentButCommandFails() {
+        if (!BCFToolsTestUtils.isBCFToolsAvailable()) {
+            throw new SkipException("bcftools not available on local device");
+        }
+        BCFToolsTestUtils.executeBCFToolsCommand("--notABcftoolsCommand");
+    }
+}
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2DictionaryTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2DictionaryTest.java
new file mode 100644
index 0000000000..9d5b09a0ec
--- /dev/null
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2DictionaryTest.java
@@ -0,0 +1,101 @@
+package htsjdk.variant.bcf2;
+
+import htsjdk.tribble.TribbleException;
+import htsjdk.variant.VariantBaseTest;
+import htsjdk.variant.vcf.VCFContigHeaderLine;
+import htsjdk.variant.vcf.VCFFilterHeaderLine;
+import htsjdk.variant.vcf.VCFFormatHeaderLine;
+import htsjdk.variant.vcf.VCFHeader;
+import htsjdk.variant.vcf.VCFHeaderLine;
+import htsjdk.variant.vcf.VCFHeaderLineCount;
+import htsjdk.variant.vcf.VCFHeaderLineType;
+import htsjdk.variant.vcf.VCFHeaderVersion;
+import htsjdk.variant.vcf.VCFInfoHeaderLine;
+import htsjdk.variant.vcf.VCFSimpleHeaderLine;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.LinkedHashSet;
+import java.util.List;
+
+public class BCF2DictionaryTest extends VariantBaseTest {
+
+    @DataProvider(name = "dictionaryProvider")
+    public Object[][] dictionaryProvider() {
+        final List<Object[]> cases = new ArrayList<>();
+
+        final List<VCFHeaderLine> inputLines = new ArrayList<>();
+        int counter = 0;
+        inputLines.add(new VCFHeaderLine(VCFHeader.DEFAULT_VCF_VERSION.getFormatString(), VCFHeader.DEFAULT_VCF_VERSION.getVersionString()));
+        inputLines.add(new VCFFilterHeaderLine("l" + counter++));
+        inputLines.add(new VCFFilterHeaderLine("l" + counter++));
+        inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter));
+        inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter));
+        inputLines.add(new VCFInfoHeaderLine("A" + counter++, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        inputLines.add(new VCFInfoHeaderLine("A" + counter++, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        inputLines.add(new VCFHeaderLine("x", "misc"));
+        inputLines.add(new VCFHeaderLine("y", "misc"));
+        inputLines.add(new VCFFilterHeaderLine("aFilter", "misc"));
+        inputLines.add(new VCFFormatHeaderLine("A" + counter++, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        inputLines.add(new VCFFormatHeaderLine("A" + counter++, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
+        final VCFHeader inputHeader = new VCFHeader(new LinkedHashSet<>(inputLines));
+
+        for (final BCFVersion version : BCFVersion.SUPPORTED_VERSIONS) {
+            final BCF2Dictionary dict = BCF2Dictionary.makeBCF2StringDictionary(inputHeader, version);
+            cases.add(new Object[]{dict});
+        }
+
+        return cases.toArray(new Object[0][]);
+    }
+
+    @Test(dataProvider = "dictionaryProvider")
+    public void testCreateDictionary(final BCF2Dictionary dict) {
+        final int dict_size = dict.size();
+        Assert.assertEquals(8, dict_size);
+    }
+
+    /*
+    @DataProvider(name = "inconsistentIDXProvider")
+    public Object[][] inconsistentIDXProvider() {
+        final List<Object[]> cases = new ArrayList<>();
+
+        // TODO can't create FILTER/FORMAT/INFO lines with arbitrary attributes
+        //  should probably be addressed as part of refactoring, would be simpler and more consistent
+        for (final BCFVersion version : BCFVersion.SUPPORTED_VERSIONS) {
+            // String lines with inconsistent IDX
+            {
+                int counter = 0;
+                final List<VCFHeaderLine> inputLines = new ArrayList<>();
+                inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++)));
+                inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++)).getGenericFieldValue());
+
+                new VCFSimpleHeaderLine()
+
+
+                final VCFHeader header = new VCFHeader(new LinkedHashSet<>(inputLines));
+                final BCF2Dictionary dict = BCF2Dictionary.makeBCF2StringDictionary(header, version);
+                cases.add(new Object[]{dict});
+            }
+
+            // Contig lines with inconsistent IDX
+            {
+
+            }
+        }
+
+        return cases.toArray(new Object[0][]);
+    }
+
+    @Test(expectedExceptions = {TribbleException.class})
+    public void inconsistentIDX(final VCFHeader header, final BCFVersion version, final boolean string) {
+        if (string) {
+            BCF2Dictionary.makeBCF2StringDictionary(header, version);
+        } else {
+            BCF2Dictionary.makeBCF2ContigDictionary(header, version);
+        }
+    }
+     */
+}
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2EncoderDecoderUnitTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2EncoderDecoderUnitTest.java
index d0d3a88fe2..050931444b 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCF2EncoderDecoderUnitTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2EncoderDecoderUnitTest.java
@@ -1,33 +1,33 @@
 /*
-* Copyright (c) 2012 The Broad Institute
-* 
-* Permission is hereby granted, free of charge, to any person
-* obtaining a copy of this software and associated documentation
-* files (the "Software"), to deal in the Software without
-* restriction, including without limitation the rights to use,
-* copy, modify, merge, publish, distribute, sublicense, and/or sell
-* copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following
-* conditions:
-* 
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-* 
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
+ * Copyright (c) 2012 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
 
 package htsjdk.variant.bcf2;
 
 // the imports for unit testing.
+
 import htsjdk.variant.VariantBaseTest;
-import htsjdk.variant.variantcontext.writer.BCF2Encoder;
 import org.testng.Assert;
 import org.testng.annotations.BeforeSuite;
 import org.testng.annotations.DataProvider;
@@ -45,9 +45,9 @@
 
 public class BCF2EncoderDecoderUnitTest extends VariantBaseTest {
     private final double FLOAT_TOLERANCE = 1e-6;
-    final List<BCF2TypedValue> primitives = new ArrayList<BCF2TypedValue>();
-    final List<BCF2TypedValue> basicTypes = new ArrayList<BCF2TypedValue>();
-    final List<BCF2TypedValue> forCombinations = new ArrayList<BCF2TypedValue>();
+    final List<BCF2TypedValue> primitives = new ArrayList<>();
+    final List<BCF2TypedValue> basicTypes = new ArrayList<>();
+    final List<BCF2TypedValue> forCombinations = new ArrayList<>();
 
     @BeforeSuite
     public void before() {
@@ -63,23 +63,23 @@ public void before() {
         primitives.add(new BCF2TypedValue(-1, BCF2Type.INT8));
         primitives.add(new BCF2TypedValue(100, BCF2Type.INT8));
         primitives.add(new BCF2TypedValue(-100, BCF2Type.INT8));
-        primitives.add(new BCF2TypedValue(-127, BCF2Type.INT8));    // last value in range
-        primitives.add(new BCF2TypedValue( 127, BCF2Type.INT8));    // last value in range
+        primitives.add(new BCF2TypedValue(-120, BCF2Type.INT8));    // last value in range
+        primitives.add(new BCF2TypedValue(127, BCF2Type.INT8));    // last value in range
 
         // medium ints
         primitives.add(new BCF2TypedValue(-1000, BCF2Type.INT16));
         primitives.add(new BCF2TypedValue(1000, BCF2Type.INT16));
         primitives.add(new BCF2TypedValue(-128, BCF2Type.INT16));    // first value in range
-        primitives.add(new BCF2TypedValue( 128, BCF2Type.INT16));    // first value in range
-        primitives.add(new BCF2TypedValue(-32767, BCF2Type.INT16)); // last value in range
-        primitives.add(new BCF2TypedValue( 32767, BCF2Type.INT16)); // last value in range
+        primitives.add(new BCF2TypedValue(128, BCF2Type.INT16));    // first value in range
+        primitives.add(new BCF2TypedValue(-32760, BCF2Type.INT16)); // last value in range
+        primitives.add(new BCF2TypedValue(32767, BCF2Type.INT16)); // last value in range
 
         // larger ints
         primitives.add(new BCF2TypedValue(-32768, BCF2Type.INT32)); // first value in range
-        primitives.add(new BCF2TypedValue( 32768, BCF2Type.INT32)); // first value in range
+        primitives.add(new BCF2TypedValue(32768, BCF2Type.INT32)); // first value in range
         primitives.add(new BCF2TypedValue(-100000, BCF2Type.INT32));
         primitives.add(new BCF2TypedValue(100000, BCF2Type.INT32));
-        primitives.add(new BCF2TypedValue(-2147483647, BCF2Type.INT32));
+        primitives.add(new BCF2TypedValue(-2147483640, BCF2Type.INT32));
         primitives.add(new BCF2TypedValue(2147483647, BCF2Type.INT32));
 
         // floats
@@ -116,7 +116,7 @@ public void before() {
         primitives.add(new BCF2TypedValue("ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ", BCF2Type.CHAR));
 
         // missing values
-        for ( BCF2Type type : BCF2Type.values() ) {
+        for (final BCF2Type type : BCF2Type.values()) {
             primitives.add(new BCF2TypedValue(null, type));
         }
 
@@ -124,7 +124,7 @@ public void before() {
         forCombinations.add(new BCF2TypedValue(100, BCF2Type.INT8));
         forCombinations.add(new BCF2TypedValue(-100, BCF2Type.INT8));
         forCombinations.add(new BCF2TypedValue(-128, BCF2Type.INT16));    // first value in range
-        forCombinations.add(new BCF2TypedValue( 128, BCF2Type.INT16));    // first value in range
+        forCombinations.add(new BCF2TypedValue(128, BCF2Type.INT16));    // first value in range
         forCombinations.add(new BCF2TypedValue(-100000, BCF2Type.INT32));
         forCombinations.add(new BCF2TypedValue(100000, BCF2Type.INT32));
         forCombinations.add(new BCF2TypedValue(0.0, BCF2Type.FLOAT));
@@ -135,7 +135,7 @@ public void before() {
         forCombinations.add(new BCF2TypedValue("ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ", BCF2Type.CHAR));
 
         // missing values
-        for ( BCF2Type type : BCF2Type.values() ) {
+        for (final BCF2Type type : BCF2Type.values()) {
             forCombinations.add(new BCF2TypedValue(null, type));
         }
     }
@@ -146,16 +146,16 @@ public void before() {
     //
     // --------------------------------------------------------------------------------
 
-    private class BCF2TypedValue {
+    private static class BCF2TypedValue {
         final BCF2Type type;
         final Object value;
 
         private BCF2TypedValue(final int value, final BCF2Type type) {
-            this(new Integer(value), type);
+            this(Integer.valueOf(value), type);
         }
 
         private BCF2TypedValue(final double value, final BCF2Type type) {
-            this(new Double(value), type);
+            this(Double.valueOf(value), type);
         }
 
         private BCF2TypedValue(final Object value, final BCF2Type type) {
@@ -163,7 +163,9 @@ private BCF2TypedValue(final Object value, final BCF2Type type) {
             this.value = value;
         }
 
-        public boolean isMissing() { return value == null; }
+        public boolean isMissing() {
+            return value == null;
+        }
 
         @Override
         public String toString() {
@@ -179,68 +181,56 @@ public String toString() {
 
     @DataProvider(name = "BCF2EncodingTestProviderBasicTypes")
     public Object[][] BCF2EncodingTestProviderBasicTypes() {
-        List<Object[]> tests = new ArrayList<Object[]>();
-        for ( BCF2TypedValue tv : basicTypes )
-            tests.add(new Object[]{Arrays.asList(tv)});
+        final List<Object[]> tests = new ArrayList<>();
+        for (final BCFVersion version : BCFVersion.SUPPORTED_VERSIONS)
+            for (final BCF2TypedValue tv : basicTypes)
+                tests.add(new Object[]{Collections.singletonList(tv), version});
         return tests.toArray(new Object[][]{});
     }
 
     private interface EncodeMe {
-        public void encode(final BCF2Encoder encoder, final BCF2TypedValue tv) throws IOException;
+        void encode(final BCF2Encoder encoder, final BCF2TypedValue tv) throws IOException;
     }
 
 
     @Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
-    public void testBCF2BasicTypesWithStaticCalls(final List<BCF2TypedValue> toEncode) throws IOException {
-        testBCF2BasicTypesWithEncodeMe(toEncode,
-                new EncodeMe() {
-                    @Override
-                    public void encode(final BCF2Encoder encoder, final BCF2TypedValue tv) throws IOException {
-                        switch ( tv.type ) {
-                            case INT8:
-                            case INT16:
-                            case INT32:
-                                encoder.encodeTypedInt((Integer)tv.value, tv.type);
-                                break;
-                            case FLOAT:
-                                encoder.encodeTypedFloat((Double)tv.value);
-                                break;
-                            case CHAR:
-                                encoder.encodeTypedString((String)tv.value);
-                                break;
-                        }
-                    }
-                });
-    }
-
-    @Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
-    public void testBCF2BasicTypesWithObjectType(final List<BCF2TypedValue> toEncode) throws IOException {
-        testBCF2BasicTypesWithEncodeMe(toEncode,
-                new EncodeMe() {
-                    @Override
-                    public void encode(final BCF2Encoder encoder, final BCF2TypedValue tv) throws IOException {
-                        encoder.encodeTyped(tv.value, tv.type);
-                    }
-                });
+    public void testBCF2BasicTypesWithStaticCalls(final List<BCF2TypedValue> toEncode, final BCFVersion version) throws IOException {
+        testBCF2BasicTypesWithEncodeMe(
+            toEncode,
+            (encoder, tv) -> {
+                switch (tv.type) {
+                    case INT8:
+                    case INT16:
+                    case INT32:
+                        encoder.encodeTypedInt((Integer) tv.value, tv.type);
+                        break;
+                    case FLOAT:
+                        encoder.encodeTypedFloat((Double) tv.value);
+                        break;
+                    case CHAR:
+                        encoder.encodeTypedString((String) tv.value);
+                        break;
+                }
+            },
+            version
+        );
     }
 
     @Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
-    public void testBCF2BasicTypesWithObjectNoType(final List<BCF2TypedValue> toEncode) throws IOException {
-        testBCF2BasicTypesWithEncodeMe(toEncode,
-                new EncodeMe() {
-                    @Override
-                    public void encode(final BCF2Encoder encoder, final BCF2TypedValue tv) throws IOException {
-                        encoder.encode(tv.value);
-                    }
-                });
+    public void testBCF2BasicTypesWithObjectType(final List<BCF2TypedValue> toEncode, final BCFVersion version) throws IOException {
+        testBCF2BasicTypesWithEncodeMe(
+            toEncode,
+            (encoder, tv) -> encoder.encodeTyped(tv.value, tv.type),
+            version
+        );
     }
 
-    public void testBCF2BasicTypesWithEncodeMe(final List<BCF2TypedValue> toEncode, final EncodeMe func) throws IOException {
-        for ( final BCF2TypedValue tv : toEncode ) {
-            BCF2Encoder encoder = new BCF2Encoder();
+    public void testBCF2BasicTypesWithEncodeMe(final List<BCF2TypedValue> toEncode, final EncodeMe func, final BCFVersion version) throws IOException {
+        for (final BCF2TypedValue tv : toEncode) {
+            final BCF2Encoder encoder = BCF2Encoder.getEncoder(version);
             func.encode(encoder, tv);
 
-            BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes());
+            final BCF2Decoder decoder = BCF2Decoder.getDecoder(version, encoder.getRecordBytes());
             final Object decoded = decoder.decodeTypedValue();
 
             Assert.assertNotNull(decoded);
@@ -250,20 +240,20 @@ public void testBCF2BasicTypesWithEncodeMe(final List<BCF2TypedValue> toEncode,
     }
 
     @Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
-    public void testBCF2EncodingVectors(final List<BCF2TypedValue> toEncode) throws IOException {
-        for ( final BCF2TypedValue tv : toEncode ) {
-            for ( final int length : Arrays.asList(2, 5, 10, 15, 20, 25) ) {
-                BCF2Encoder encoder = new BCF2Encoder();
-                List<Object> expected = Collections.nCopies(length, tv.value);
+    public void testBCF2EncodingVectors(final List<BCF2TypedValue> toEncode, final BCFVersion version) throws IOException {
+        for (final BCF2TypedValue tv : toEncode) {
+            for (final int length : Arrays.asList(2, 5, 10, 15, 20, 25)) {
+                final BCF2Encoder encoder = BCF2Encoder.getEncoder(version);
+                final List<Object> expected = Collections.nCopies(length, tv.value);
                 encoder.encodeTyped(expected, tv.type);
 
-                BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes());
+                final BCF2Decoder decoder = BCF2Decoder.getDecoder(version, encoder.getRecordBytes());
                 final Object decoded = decoder.decodeTypedValue();
 
                 Assert.assertTrue(decoded instanceof List);
-                final List<Object> decodedList = (List<Object>)decoded;
+                final List<Object> decodedList = (List<Object>) decoded;
                 Assert.assertEquals(decodedList.size(), expected.size());
-                for ( Object decodedValue : decodedList )
+                for (final Object decodedValue : decodedList)
                     myAssertEquals(tv, decodedValue);
             }
         }
@@ -271,16 +261,17 @@ public void testBCF2EncodingVectors(final List<BCF2TypedValue> toEncode) throws
 
     @DataProvider(name = "BCF2EncodingTestProviderSingletons")
     public Object[][] BCF2EncodingTestProviderSingletons() {
-        List<Object[]> tests = new ArrayList<Object[]>();
-        for ( BCF2TypedValue tv : primitives )
-            tests.add(new Object[]{Arrays.asList(tv)});
+        final List<Object[]> tests = new ArrayList<>();
+        for (final BCFVersion version : BCFVersion.SUPPORTED_VERSIONS)
+            for (final BCF2TypedValue tv : primitives)
+                tests.add(new Object[]{Collections.singletonList(tv), version});
         return tests.toArray(new Object[][]{});
     }
 
     @Test(dataProvider = "BCF2EncodingTestProviderSingletons")
-    public void testBCF2EncodingSingletons(final List<BCF2TypedValue> toEncode) throws IOException {
-        final byte[] record = encodeRecord(toEncode);
-        decodeRecord(toEncode, record);
+    public void testBCF2EncodingSingletons(final List<BCF2TypedValue> toEncode, final BCFVersion version) throws IOException {
+        final byte[] record = encodeRecord(toEncode, version);
+        decodeRecord(toEncode, record, version);
     }
 
     // -----------------------------------------------------------------
@@ -291,29 +282,30 @@ public void testBCF2EncodingSingletons(final List<BCF2TypedValue> toEncode) thro
 
     @DataProvider(name = "BCF2EncodingTestProviderSequences")
     public Object[][] BCF2EncodingTestProviderSequences() {
-        List<Object[]> tests = new ArrayList<Object[]>();
-        for ( BCF2TypedValue tv1 : forCombinations )
-            for ( BCF2TypedValue tv2 : forCombinations )
-                for ( BCF2TypedValue tv3 : forCombinations )
-                    tests.add(new Object[]{Arrays.asList(tv1, tv2, tv3)});
+        final List<Object[]> tests = new ArrayList<>();
+        for (final BCFVersion version : BCFVersion.SUPPORTED_VERSIONS)
+            for (final BCF2TypedValue tv1 : forCombinations)
+                for (final BCF2TypedValue tv2 : forCombinations)
+                    for (final BCF2TypedValue tv3 : forCombinations)
+                        tests.add(new Object[]{Arrays.asList(tv1, tv2, tv3), version});
         return tests.toArray(new Object[][]{});
     }
 
     @Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
-    public void testBCF2EncodingVectorsWithMissing(final List<BCF2TypedValue> toEncode) throws IOException {
-        for ( final BCF2TypedValue tv : toEncode ) {
-            if ( tv.type != BCF2Type.CHAR ) {
-                for ( final int length : Arrays.asList(2, 5, 10, 15, 20, 25) ) {
+    public void testBCF2EncodingVectorsWithMissing(final List<BCF2TypedValue> toEncode, final BCFVersion version) throws IOException {
+        for (final BCF2TypedValue tv : toEncode) {
+            if (tv.type != BCF2Type.CHAR) {
+                for (final int length : Arrays.asList(2, 5, 10, 15, 20, 25)) {
                     final byte td = BCF2Utils.encodeTypeDescriptor(1, tv.type);
 
-                    final BCF2Encoder encoder = new BCF2Encoder();
-                    for ( int i = 0; i < length; i++ ) {
+                    final BCF2Encoder encoder = BCF2Encoder.getEncoder(version);
+                    for (int i = 0; i < length; i++) {
                         encoder.encodeRawValue(i % 2 == 0 ? null : tv.value, tv.type);
                     }
 
-                    final BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes());
+                    final BCF2Decoder decoder = BCF2Decoder.getDecoder(version, encoder.getRecordBytes());
 
-                    for ( int i = 0; i < length; i++ ) {
+                    for (int i = 0; i < length; i++) {
                         final Object decoded = decoder.decodeTypedValue(td);
                         myAssertEquals(i % 2 == 0 ? new BCF2TypedValue(null, tv.type) : tv, decoded);
                     }
@@ -323,9 +315,9 @@ public void testBCF2EncodingVectorsWithMissing(final List<BCF2TypedValue> toEnco
     }
 
     @Test(dataProvider = "BCF2EncodingTestProviderSequences", dependsOnMethods = "testBCF2EncodingSingletons")
-    public void testBCF2EncodingTestProviderSequences(final List<BCF2TypedValue> toEncode) throws IOException {
-        final byte[] record = encodeRecord(toEncode);
-        decodeRecord(toEncode, record);
+    public void testBCF2EncodingTestProviderSequences(final List<BCF2TypedValue> toEncode, final BCFVersion version) throws IOException {
+        final byte[] record = encodeRecord(toEncode, version);
+        decodeRecord(toEncode, record, version);
     }
 
     // -----------------------------------------------------------------
@@ -334,20 +326,58 @@ public void testBCF2EncodingTestProviderSequences(final List<BCF2TypedValue> toE
     //
     // -----------------------------------------------------------------
 
+    @DataProvider(name = "Strings")
+    public Object[][] stringsProvider() {
+        final List<Object[]> tests = new ArrayList<>();
+        for (final BCFVersion version : BCFVersion.SUPPORTED_VERSIONS) {
+            tests.add(new Object[]{"", version});
+            tests.add(new Object[]{" ", version});
+            tests.add(new Object[]{"s", version});
+            tests.add(new Object[]{"sss", version});
+        }
+        return tests.toArray(new Object[][]{});
+    }
+
+    @Test(dataProvider = "Strings")
+    public void testEncodingOfListOfString(final String s, final BCFVersion version) throws IOException {
+        final BCF2Encoder encoder = BCF2Encoder.getEncoder(version);
+        encoder.encodeTypedString(s);
+
+        final BCF2Decoder decoder = BCF2Decoder.getDecoder(version, encoder.getRecordBytes());
+        final String decoded = decoder.decodeUnexplodedString();
+
+        Assert.assertEquals(s, decoded);
+    }
+
     @DataProvider(name = "ListOfStrings")
-    public Object[][] listOfStringsProvider() {
-        List<Object[]> tests = new ArrayList<Object[]>();
-        tests.add(new Object[]{Arrays.asList("s1", "s2"), ",s1,s2"});
-        tests.add(new Object[]{Arrays.asList("s1", "s2", "s3"), ",s1,s2,s3"});
-        tests.add(new Object[]{Arrays.asList("s1", "s2", "s3", "s4"), ",s1,s2,s3,s4"});
+    public Object[][] listofStringsProvider() {
+        final List<Object[]> tests = new ArrayList<>();
+        for (final BCFVersion version : BCFVersion.SUPPORTED_VERSIONS) {
+            for (final int padding : Arrays.asList(0, 1, 5)) {
+                tests.add(new Object[]{Collections.emptyList(), padding, version});
+                tests.add(new Object[]{Collections.singletonList("s"), padding, version});
+                tests.add(new Object[]{Arrays.asList("s", ""), padding, version});
+                tests.add(new Object[]{Arrays.asList("s", "ss", "sss"), padding, version});
+            }
+        }
         return tests.toArray(new Object[][]{});
     }
 
     @Test(dataProvider = "ListOfStrings")
-    public void testEncodingListOfString(List<String> strings, String expected) throws IOException {
-        final String collapsed = BCF2Utils.collapseStringList(strings);
-        Assert.assertEquals(collapsed, expected);
-        Assert.assertEquals(BCF2Utils.explodeStringList(collapsed), strings);
+    public void testEncodingOfListOfString(final List<String> strings, final int padding, final BCFVersion version) {
+        final BCF2Encoder encoder = BCF2Encoder.getEncoder(version);
+        final byte[] bytes = encoder.compactStrings(strings);
+        final int paddedSize = bytes.length + padding;
+        encoder.encodeRawString(bytes, paddedSize);
+
+        final BCF2Decoder decoder = BCF2Decoder.getDecoder(version, encoder.getRecordBytes());
+        final List<String> decodedStrings = decoder.decodeExplodedStrings(paddedSize, ',');
+
+        // Padding values not included
+        Assert.assertEquals(strings, decodedStrings);
+
+        // The decoder should have drained all the remaining padding values from the stream
+        Assert.assertTrue(decoder.blockIsFullyDecoded());
     }
 
     // -----------------------------------------------------------------
@@ -358,16 +388,16 @@ public void testEncodingListOfString(List<String> strings, String expected) thro
 
     @DataProvider(name = "BestIntTypeTests")
     public Object[][] BestIntTypeTests() {
-        List<Object[]> tests = new ArrayList<Object[]>();
-        tests.add(new Object[]{Arrays.asList(1), BCF2Type.INT8});
+        final List<Object[]> tests = new ArrayList<>();
+        tests.add(new Object[]{Collections.singletonList(1), BCF2Type.INT8});
         tests.add(new Object[]{Arrays.asList(1, 10), BCF2Type.INT8});
         tests.add(new Object[]{Arrays.asList(1, 10, 100), BCF2Type.INT8});
         tests.add(new Object[]{Arrays.asList(1, -1), BCF2Type.INT8});
         tests.add(new Object[]{Arrays.asList(1, 1000), BCF2Type.INT16});
         tests.add(new Object[]{Arrays.asList(1, 1000, 10), BCF2Type.INT16});
         tests.add(new Object[]{Arrays.asList(1, 1000, 100), BCF2Type.INT16});
-        tests.add(new Object[]{Arrays.asList(1000), BCF2Type.INT16});
-        tests.add(new Object[]{Arrays.asList(100000), BCF2Type.INT32});
+        tests.add(new Object[]{Collections.singletonList(1000), BCF2Type.INT16});
+        tests.add(new Object[]{Collections.singletonList(100000), BCF2Type.INT32});
         tests.add(new Object[]{Arrays.asList(100000, 10), BCF2Type.INT32});
         tests.add(new Object[]{Arrays.asList(100000, 100), BCF2Type.INT32});
         tests.add(new Object[]{Arrays.asList(100000, 1, -10), BCF2Type.INT32});
@@ -376,22 +406,21 @@ public Object[][] BestIntTypeTests() {
     }
 
     @Test(dataProvider = "BestIntTypeTests")
-    public void determineBestEncoding(final List<Integer> ints, final BCF2Type expectedType) throws IOException {
+    public void determineBestEncoding(final List<Integer> ints, final BCF2Type expectedType) {
         Assert.assertEquals(BCF2Utils.determineIntegerType(ints), expectedType);
         Assert.assertEquals(BCF2Utils.determineIntegerType(toPrimitive(ints.toArray(new Integer[0]))), expectedType);
     }
 
-    private static int[] toPrimitive ( final Integer[] array ) {
-        if ( array == null ) {
+    private static int[] toPrimitive(final Integer[] array) {
+        if (array == null) {
             return null;
-        }
-        else if ( array.length == 0 ) {
+        } else if (array.length == 0) {
             return new int[0];
         }
 
         final int[] result = new int[array.length];
         for (int i = 0; i < array.length; i++) {
-            result[i] = array[i].intValue();
+            result[i] = array[i];
         }
         return result;
     }
@@ -403,20 +432,20 @@ else if ( array.length == 0 ) {
     // -----------------------------------------------------------------
 
     @Test(dataProvider = "BCF2EncodingTestProviderSequences", dependsOnMethods = "testBCF2EncodingTestProviderSequences")
-    public void testReadAndSkipWithMultipleBlocks(final List<BCF2TypedValue> block) throws IOException {
-        testReadAndSkipWithMultipleBlocks(block, forCombinations);
-        testReadAndSkipWithMultipleBlocks(forCombinations, block);
+    public void testReadAndSkipWithMultipleBlocks(final List<BCF2TypedValue> block, final BCFVersion version) throws IOException {
+        testReadAndSkipWithMultipleBlocks(block, forCombinations, version);
+        testReadAndSkipWithMultipleBlocks(forCombinations, block, version);
     }
 
-    public void testReadAndSkipWithMultipleBlocks(final List<BCF2TypedValue> block1, final List<BCF2TypedValue> block2) throws IOException {
-        final byte[] record1 = encodeRecord(block1);
-        final byte[] record2 = encodeRecord(block2);
+    public void testReadAndSkipWithMultipleBlocks(final List<BCF2TypedValue> block1, final List<BCF2TypedValue> block2, final BCFVersion version) throws IOException {
+        final byte[] record1 = encodeRecord(block1, version);
+        final byte[] record2 = encodeRecord(block2, version);
 
         // each record is individually good
-        decodeRecord(block1, record1);
-        decodeRecord(block2, record2);
+        decodeRecord(block1, record1, version);
+        decodeRecord(block2, record2, version);
 
-        BCF2Decoder decoder = new BCF2Decoder();
+        final BCF2Decoder decoder = BCF2Decoder.getDecoder(version);
 
         // test setting
         decoder.setRecordBytes(record1);
@@ -426,7 +455,7 @@ public void testReadAndSkipWithMultipleBlocks(final List<BCF2TypedValue> block1,
 
         // test combining the streams
         final byte[] combined = combineRecords(record1, record2);
-        final List<BCF2TypedValue> combinedObjects = new ArrayList<BCF2TypedValue>(block1);
+        final List<BCF2TypedValue> combinedObjects = new ArrayList<>(block1);
         combinedObjects.addAll(block2);
 
         // the combined bytes is the same as the combined objects
@@ -447,70 +476,60 @@ public void testReadAndSkipWithMultipleBlocks(final List<BCF2TypedValue> block1,
     //
     // Test encoding / decoding arrays of ints
     //
-    // This checks that we can encode and decode correctly with the
-    // low-level decodeIntArray function arrays of values.  This
-    // has to be pretty comprehensive as decodeIntArray is a highly optimized
+    // This checks that we can correctly encode and decode int[] with
+    // the low-level decodeIntArray function arrays. This has to be
+    // pretty comprehensive as decodeIntArray is a highly optimized
     // piece of code with lots of edge cases.  The values we are encoding
     // don't really matter -- just that the values come back as expected.
     //
+    // decodeIntArray is only meant to decode arrays that are guaranteed
+    // to not have internal missing values, but may be missing (or EOV)
+    // padded, so we are interested in whether the encoder correctly
+    // truncates padded arrays while draining the stream.
     // -----------------------------------------------------------------
 
-    @DataProvider(name = "IntArrays")
-    public Object[][] makeIntArrays() {
-        List<Object[]> tests = new ArrayList<Object[]>();
+    @DataProvider(name = "BCF2_2IntArrays")
+    public Object[][] IntArrays() {
+        final List<Object[]> tests = new ArrayList<>();
+        for (final BCFVersion version : BCFVersion.SUPPORTED_VERSIONS) {
+            for (final int nValues : Arrays.asList(0, 1, 2, 5, 10, 100)) {
+                for (final int nPad : Arrays.asList(0, 1, 2, 5, 10, 100)) {
+                    final int nElements = nValues + nPad;
 
-        for ( int nValues : Arrays.asList(0, 1, 2, 5, 10, 100) ) {
-            for ( int nPad : Arrays.asList(0, 1, 2, 5, 10, 100) ) {
-                int nElements = nValues + nPad;
+                    final int[] vs = new int[nValues];
 
-                List<Integer> values = new ArrayList<Integer>(nElements);
+                    // add nValues from 0 to nValues - 1
+                    for (int i = 0; i < nValues; i++)
+                        vs[i] = i;
 
-                // add nValues from 0 to nValues - 1
-                for ( int i = 0; i < nValues; i++ )
-                    values.add(i);
-
-                // add nPad nulls
-                for ( int i = 0; i < nPad; i++ )
-                    values.add(null);
-
-                tests.add(new Object[]{values});
+                    tests.add(new Object[]{vs, nElements, version});
+                }
             }
         }
 
         return tests.toArray(new Object[][]{});
     }
 
-    @Test(dataProvider = "IntArrays")
-    public void testIntArrays(final List<Integer> ints) throws IOException {
-        final BCF2Encoder encoder = new BCF2Encoder();
-        encoder.encodeTyped(ints, BCF2Type.INT16);
+    @Test(dataProvider = "BCF2_2IntArrays")
+    public void testBCF2_2IntArrays(final int[] ints, final int paddedSize, final BCFVersion version) throws IOException {
+        final BCF2Encoder encoder = BCF2Encoder.getEncoder(version);
+        encoder.encodeTypedVecInt(ints, paddedSize);
 
-        final BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes());
-
-        final byte typeDescriptor = decoder.readTypeDescriptor();
+        final BCF2Decoder decoder = BCF2Decoder.getDecoder(version, encoder.getRecordBytes());
 
         // read the int[] with the low-level version
+        final byte typeDescriptor = decoder.readTypeDescriptor();
         final int size = decoder.decodeNumberOfElements(typeDescriptor);
         final int[] decoded = decoder.decodeIntArray(typeDescriptor, size);
 
-        if ( isMissing(ints) ) {
-            // we expect that the result is null in this case
-            Assert.assertNull(decoded, "Encoded all missing values -- expected null");
+        if (ints.length == 0) {
+            Assert.assertNull(decoded);
         } else {
-            // we expect at least some values to come back
-            Assert.assertTrue(decoded.length > 0, "Must have at least 1 element for non-null encoded data");
-
-            // check corresponding values
-            for ( int i = 0; i < ints.size(); i++ ) {
-                final Integer expected = ints.get(i);
-
-                if ( expected == null ) {
-                    Assert.assertTrue(decoded.length <= i, "we expect decoded to be truncated for missing values");
-                } else {
-                    Assert.assertTrue(decoded.length > i, "we expected at least " + i + " values in decoded array");
-                    Assert.assertEquals(decoded[i], (int)expected);
-                }
-            }
+            // Padding values not included
+            Assert.assertEquals(ints.length, decoded.length);
+
+            // The decoder should have drained all the remaining padding values from the stream
+            Assert.assertTrue(decoder.blockIsFullyDecoded());
         }
     }
 
@@ -520,24 +539,17 @@ public void testIntArrays(final List<Integer> ints) throws IOException {
     //
     // -----------------------------------------------------------------
 
-    private final byte[] combineRecords(final byte[] record1, final byte[] record2) throws IOException {
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    private byte[] combineRecords(final byte[] record1, final byte[] record2) throws IOException {
+        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
         baos.write(record1);
         baos.write(record2);
         return baos.toByteArray();
     }
 
-    private final byte[] encodeRecord(final List<BCF2TypedValue> toEncode) throws IOException {
-        BCF2Encoder encoder = new BCF2Encoder();
-
-        for ( final BCF2TypedValue tv : toEncode ) {
-            if ( tv.isMissing() )
-                encoder.encodeTypedMissing(tv.type);
-            else {
-                final BCF2Type encodedType = encoder.encode(tv.value);
-                if ( tv.type != null ) // only if we have an expectation
-                    Assert.assertEquals(encodedType, tv.type);
-            }
+    private byte[] encodeRecord(final List<BCF2TypedValue> toEncode, final BCFVersion version) throws IOException {
+        final BCF2Encoder encoder = BCF2Encoder.getEncoder(version);
+        for (final BCF2TypedValue tv : toEncode) {
+            encoder.encodeTyped(tv.value, tv.type);
         }
 
         // check output
@@ -547,12 +559,12 @@ private final byte[] encodeRecord(final List<BCF2TypedValue> toEncode) throws IO
         return record;
     }
 
-    private final void decodeRecord(final List<BCF2TypedValue> toEncode, final byte[] record) throws IOException {
-        decodeRecord(toEncode, new BCF2Decoder(record));
+    private void decodeRecord(final List<BCF2TypedValue> toEncode, final byte[] record, final BCFVersion version) throws IOException {
+        decodeRecord(toEncode, BCF2Decoder.getDecoder(version, record));
     }
 
-    private final void decodeRecord(final List<BCF2TypedValue> toEncode, final BCF2Decoder decoder) throws IOException {
-        for ( final BCF2TypedValue tv : toEncode ) {
+    private void decodeRecord(final List<BCF2TypedValue> toEncode, final BCF2Decoder decoder) throws IOException {
+        for (final BCF2TypedValue tv : toEncode) {
             Assert.assertFalse(decoder.blockIsFullyDecoded());
             final Object decoded = decoder.decodeTypedValue();
 
@@ -562,25 +574,17 @@ private final void decodeRecord(final List<BCF2TypedValue> toEncode, final BCF2D
         Assert.assertTrue(decoder.blockIsFullyDecoded());
     }
 
-    private final void myAssertEquals(final BCF2TypedValue tv, final Object decoded) {
-        if ( tv.value == null ) { // special needs for instanceof double
-            Assert.assertEquals(decoded, tv.value);
-        } else if ( tv.type == BCF2Type.FLOAT ) { // need tolerance for floats, and they aren't null
+    private void myAssertEquals(final BCF2TypedValue tv, final Object decoded) {
+        if (tv.value == null) { // special needs for instanceof double
+            Assert.assertNull(decoded);
+        } else if (tv.type == BCF2Type.FLOAT) { // need tolerance for floats, and they aren't null
             Assert.assertTrue(decoded instanceof Double);
 
-            final double valueFloat = (Double)tv.value;
-            final double decodedFloat = (Double)decoded;
+            final double valueFloat = (Double) tv.value;
+            final double decodedFloat = (Double) decoded;
 
             VariantBaseTest.assertEqualsDoubleSmart(decodedFloat, valueFloat, FLOAT_TOLERANCE);
         } else
             Assert.assertEquals(decoded, tv.value);
     }
-
-    private final boolean isMissing(final List<Integer> values) {
-        if ( values != null )
-            for ( Integer value : values )
-                if ( value != null )
-                    return false;
-        return true;
-    }
 }
\ No newline at end of file
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java
new file mode 100644
index 0000000000..afb198286e
--- /dev/null
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java
@@ -0,0 +1,464 @@
+package htsjdk.variant.bcf2.BCF2FieldWriter;
+
+import htsjdk.variant.VariantBaseTest;
+import htsjdk.variant.bcf2.BCF2Type;
+import htsjdk.variant.bcf2.BCF2Utils;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.Genotype;
+import htsjdk.variant.variantcontext.GenotypeBuilder;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextBuilder;
+import htsjdk.variant.bcf2.BCF2Encoder;
+import htsjdk.variant.vcf.VCFFormatHeaderLine;
+import htsjdk.variant.vcf.VCFHeaderLineCount;
+import htsjdk.variant.vcf.VCFHeaderLineType;
+import htsjdk.variant.vcf.VCFInfoHeaderLine;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class BCF2FieldEncoderTest extends VariantBaseTest {
+
+    private static final BCF2Encoder.BCF2_2Encoder ENCODER = new BCF2Encoder.BCF2_2Encoder();
+    private static final BCF2FieldEncoder.AtomicIntFieldEncoder ATOMIC_INT = new BCF2FieldEncoder.AtomicIntFieldEncoder(ENCODER);
+    private static final BCF2FieldEncoder.AtomicFloatFieldEncoder ATOMIC_FLOAT = new BCF2FieldEncoder.AtomicFloatFieldEncoder(ENCODER);
+    private static final BCF2FieldEncoder.CharFieldEncoder CHAR = new BCF2FieldEncoder.CharFieldEncoder(ENCODER);
+    private static final BCF2FieldEncoder.StringFieldEncoder STRING = new BCF2FieldEncoder.StringFieldEncoder(ENCODER);
+    private static final BCF2FieldEncoder.VecIntFieldEncoder VEC_INT = new BCF2FieldEncoder.VecIntFieldEncoder(ENCODER);
+    private static final BCF2FieldEncoder.VecFloatFieldEncoder VEC_FLOAT = new BCF2FieldEncoder.VecFloatFieldEncoder(ENCODER);
+
+
+    @DataProvider(name = "fieldEncoderCases")
+    public static Object[][] fieldEncoderCases() {
+        final List<Object[]> cases = new ArrayList<>();
+
+        // Integer encoding
+        {
+            for (final BCF2Type intType : BCF2Utils.INTEGER_TYPES_BY_SIZE) {
+                final int byteWidth = intType.getSizeInBytes();
+                final List<Object> intsToEncode = Arrays.asList(1, -1, null, 1 << (byteWidth * 8 - 2));
+                final ByteBuffer bytes = ByteBuffer.allocate(intsToEncode.size() * byteWidth);
+                for (final Object o : intsToEncode) {
+                    final int i = o == null ? intType.getMissingBytes() : (Integer) o;
+                    for (int shift = 0; shift < byteWidth; shift++) {
+                        bytes.put((byte) (i >> (shift * 8)));
+                    }
+                }
+                cases.add(new Object[]{
+                    ATOMIC_INT,
+                    intsToEncode,
+                    bytes.array(),
+                });
+            }
+        }
+
+        // Float encoding
+        {
+            final int byteWidth = BCF2Type.FLOAT.getSizeInBytes();
+            final List<Object> floatsToEncode = Arrays.asList(1.0, -1.0, null, Double.NaN, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY);
+            final ByteBuffer bytes = ByteBuffer.allocate(floatsToEncode.size() * byteWidth);
+            for (final Object o : floatsToEncode) {
+                final int i = o == null ? BCF2Type.FLOAT.getMissingBytes() : Float.floatToRawIntBits((float) (double) (Double) o);
+                for (int shift = 0; shift < byteWidth; shift++) {
+                    bytes.put((byte) (i >> (shift * 8)));
+                }
+            }
+            cases.add(new Object[]{
+                ATOMIC_FLOAT,
+                floatsToEncode,
+                bytes.array(),
+            });
+        }
+
+        // Char encoding
+        {
+            final List<Object> stringsToEncode = Arrays.asList("str", null, "\0a\0");
+            final int maxByteWidth = stringsToEncode
+                .stream()
+                .mapToInt(o -> o == null ? 0 : ((String) o).getBytes(StandardCharsets.UTF_8).length)
+                .max().getAsInt();
+            final ByteBuffer bytes = ByteBuffer.allocate(stringsToEncode.size() * maxByteWidth);
+            for (final Object o : stringsToEncode) {
+                final byte[] b = o == null ? new byte[0] : ((String) o).getBytes(StandardCharsets.UTF_8);
+                bytes.put(b);
+                for (int i = maxByteWidth - b.length; i > 0; i--) bytes.put((byte) 0);
+            }
+            cases.add(new Object[]{
+                CHAR,
+                stringsToEncode,
+                bytes.array(),
+            });
+        }
+
+        // String encoding
+        {
+            final List<Object> stringsToEncode = Arrays.asList("st", null, Arrays.asList("a", "b"), new String[]{"a", "b"});
+            final byte[] bytes = new byte[]{
+                's', 't', '\0',   // padding
+                '\0', '\0', '\0', // null values should be encoded as all NULL bytes
+                'a', ',', 'b',    // lists of strings joined with ,
+                'a', ',', 'b',    // arrays of strings joined with ,
+            };
+            cases.add(new Object[]{
+                STRING,
+                stringsToEncode,
+                bytes,
+            });
+        }
+
+        // Vector of integers encoding
+        {
+            for (final BCF2Type intType : BCF2Utils.INTEGER_TYPES_BY_SIZE) {
+                final int byteWidth = intType.getSizeInBytes();
+                final List<Object> vecsToEncode = Arrays.asList(
+                    Arrays.asList(null, 1),  // Internal null should be missing bytes, not EOV
+                    new int[]{1},            // Short vector should be EOV padded
+                    null,                    // Entirely missing vector should be all EOV
+                    1 << (byteWidth * 8 - 2) // Atomic value should be treated as vector of size 1
+                );
+                final int nValues = 2;
+                final ByteBuffer bytes = ByteBuffer.allocate(nValues * vecsToEncode.size() * byteWidth);
+                final int[] ints = new int[]{
+                    intType.getMissingBytes(), 1,
+                    1, intType.getEOVBytes(),
+                    intType.getEOVBytes(), intType.getEOVBytes(),
+                    1 << (byteWidth * 8 - 2), intType.getEOVBytes(),
+                };
+                for (final int i : ints) {
+                    for (int shift = 0; shift < byteWidth; shift++) {
+                        bytes.put((byte) (i >> (shift * 8)));
+                    }
+                }
+                cases.add(new Object[]{
+                    VEC_INT,
+                    vecsToEncode,
+                    bytes.array(),
+                });
+            }
+        }
+
+        // Vector of floats encoding
+        {
+            final int byteWidth = BCF2Type.FLOAT.getSizeInBytes();
+            final List<Object> vecsToEncode = Arrays.asList(
+                Arrays.asList(null, 1.0), // Internal null should be missing bytes, not EOV
+                new double[]{1.0},        // Short vector should be EOV padded
+                null,                     // Entirely missing vector should be all EOV
+                Double.NaN                // Atomic value should be treated as vector of size 1
+            );
+            final int nValues = 2;
+            final ByteBuffer bytes = ByteBuffer.allocate(nValues * vecsToEncode.size() * byteWidth);
+            final int[] ints = new int[]{
+                BCF2Type.FLOAT.getMissingBytes(), Float.floatToRawIntBits(1.0f),
+                Float.floatToRawIntBits(1.0f), BCF2Type.FLOAT.getEOVBytes(),
+                BCF2Type.FLOAT.getEOVBytes(), BCF2Type.FLOAT.getEOVBytes(),
+                Float.floatToRawIntBits((float) Double.NaN), BCF2Type.FLOAT.getEOVBytes(),
+            };
+            for (final int i : ints) {
+                for (int shift = 0; shift < byteWidth; shift++) {
+                    bytes.put((byte) (i >> (shift * 8)));
+                }
+            }
+            cases.add(new Object[]{
+                VEC_FLOAT,
+                vecsToEncode,
+                bytes.array(),
+            });
+        }
+
+        return cases.toArray(new Object[0][]);
+    }
+
+    @Test(dataProvider = "fieldEncoderCases")
+    public static void testFieldEncoders(
+        final BCF2FieldEncoder encoder,
+        final List<Object> objects,
+        final byte[] expectedBytes
+    ) throws IOException {
+        for (final Object o : objects) {
+            encoder.load(o);
+        }
+        encoder.encode();
+        Assert.assertEquals(expectedBytes, ENCODER.getRecordBytes());
+    }
+
+
+    @DataProvider(name = "siteWriterCases")
+    public static Object[][] siteWriterCases() {
+        final List<Object[]> cases = new ArrayList<>();
+
+        // Generic encoder
+        {
+            final VCFInfoHeaderLine info = new VCFInfoHeaderLine("genericKey", 2, VCFHeaderLineType.Integer, "test");
+            final BCF2FieldWriter.SiteAttributeWriter writer = new BCF2FieldWriter.SiteAttributeWriter(info, 1, ENCODER);
+            final VariantContext vc1 = new VariantContextBuilder()
+                .attribute("genericKey", 1)
+                .chr("dummy")
+                .alleles("A")
+                .make();
+            final byte[] bytes1 = new byte[]{
+                0x21, // 2 8-bit ints
+                1, (byte) BCF2Type.INT8.getEOVBytes() // Field writer should pad out array to 2 elements to match header count
+            };
+            cases.add(new Object[]{
+                writer, vc1, bytes1,
+            });
+
+            final VariantContext vc2 = new VariantContextBuilder()
+                .chr("dummy")
+                .alleles("A")
+                .make();
+            final byte[] bytes2 = new byte[]{
+                0x01, // Field writer should directly write typed missing, ignoring header count
+            };
+            cases.add(new Object[]{
+                writer, vc2, bytes2,
+            });
+        }
+
+        // Flag writer
+        {
+            final VCFInfoHeaderLine info = new VCFInfoHeaderLine("genericKey", 0, VCFHeaderLineType.Flag, "test");
+            final BCF2FieldWriter.SiteFlagWriter writer = new BCF2FieldWriter.SiteFlagWriter(info, 1, ENCODER);
+            final VariantContext vc = new VariantContextBuilder()
+                .attribute("genericKey", true)
+                .chr("dummy")
+                .alleles("A")
+                .make();
+            final byte[] bytes = new byte[]{
+                0x00, // MISSING type just used as a filler value
+            };
+            cases.add(new Object[]{
+                writer, vc, bytes,
+            });
+        }
+        return cases.toArray(new Object[0][]);
+    }
+
+    @Test(dataProvider = "siteWriterCases")
+    public void testSiteWriters(
+        final BCF2FieldWriter.SiteWriter writer,
+        final VariantContext vc,
+        final byte[] expectedBytes
+    ) throws IOException {
+        // Skip starting so we don't get key in output
+        writer.encode(vc);
+        Assert.assertEquals(expectedBytes, ENCODER.getRecordBytes());
+    }
+
+
+    @DataProvider(name = "genotypeWriterCases")
+    public static Object[][] genotypeWriterCases() {
+        final List<Object[]> cases = new ArrayList<>();
+
+        // Generic encoder
+        {
+            final VCFFormatHeaderLine info = new VCFFormatHeaderLine("genericKey", 2, VCFHeaderLineType.Integer, "test");
+            final BCF2FieldWriter.GenotypeAttributeWriter writer = new BCF2FieldWriter.GenotypeAttributeWriter(info, 1, ENCODER);
+            final VariantContext vc = new VariantContextBuilder()
+                .attribute("genericKey", 1)
+                .chr("dummy")
+                .genotypes(new GenotypeBuilder()
+                    .name("sample")
+                    .attribute("genericKey", 1)
+                    .make()
+                )
+                .alleles("A")
+                .make();
+            final byte[] bytes = new byte[]{
+                0x21, // 2 8-bit ints
+                1, (byte) BCF2Type.INT8.getEOVBytes() // Field writer should pad out array to 2 elements to match header count
+            };
+            cases.add(new Object[]{
+                writer, vc, Collections.singletonList("sample"), bytes,
+            });
+        }
+
+        // FT encoder
+        {
+            final VCFFormatHeaderLine info = new VCFFormatHeaderLine("FT", 1, VCFHeaderLineType.String, "test");
+            final BCF2FieldWriter writer = BCF2FieldWriter.createGenotypeWriter(info, 1, ENCODER);
+            final VariantContext vc = new VariantContextBuilder()
+                .chr("dummy")
+                .genotypes(
+                    new GenotypeBuilder()
+                        .name("hasFilter")
+                        .filter("f")
+                        .make(),
+                    new GenotypeBuilder()
+                        .name("noFilter")
+                        .unfiltered() // should be encoded as PASS
+                        .make()
+                )
+                .alleles("A")
+                .make();
+            final byte[] bytes = new byte[]{
+                0x47, // Strings of length 4
+                'f', 0, 0, 0,
+                'P', 'A', 'S', 'S',
+            };
+            cases.add(new Object[]{
+                writer, vc, Arrays.asList("hasFilter", "noFilter"), bytes,
+            });
+        }
+
+        // GT encoder
+        {
+            final VCFFormatHeaderLine format = new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.String, "test");
+            final Allele ref = Allele.REF_A;
+            final Allele alt = Allele.ALT_T;
+
+            final BCF2FieldWriter writer = BCF2FieldWriter.createGenotypeWriter(format, 1, ENCODER);
+            {
+                final VariantContext vc = new VariantContextBuilder()
+                    .chr("dummy")
+                    .alleles(Arrays.asList(ref, alt))
+                    .genotypes(
+                        new GenotypeBuilder()
+                            .name("refAlt")
+                            .alleles(Arrays.asList(ref, alt))
+                            .make(),
+                        new GenotypeBuilder()
+                            .name("refAltPhased")
+                            .alleles(Arrays.asList(ref, alt))
+                            .phased(true)
+                            .make(),
+                        new GenotypeBuilder()
+                            .name("missingMissing")
+                            .alleles(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL))
+                            .make(),
+                        new GenotypeBuilder()
+                            .name("haploid")
+                            .alleles(Collections.singletonList(ref))
+                            .make()
+                    )
+                    .make();
+                final byte[] bytes = new byte[]{
+                    0x21, // 2 8-bit ints
+                    0x02, 0x04,
+                    0x02, 0x05,
+                    0x00, 0x00,
+                    0x02, (byte) 0x81,
+                };
+                cases.add(new Object[]{
+                    writer, vc,
+                    vc.getGenotypes().stream().map(Genotype::getSampleName).collect(Collectors.toList()),
+                    bytes,
+                });
+            }
+
+            // TODO revisit this test once the correct behavior is determined
+            // Test encoding for a VC entirely missing genotype data
+            {
+                final VariantContext vcMissingGenotypes = new VariantContextBuilder()
+                    .chr("dummy")
+                    .alleles(Arrays.asList(ref, alt))
+                    .genotypes(
+                        new GenotypeBuilder()
+                            .name("refAlt")
+                            .alleles(Arrays.asList(ref, alt))
+                            .make()
+                    )
+                    .make();
+                final byte[] bytes = new byte[]{
+                    0x21, // 2 8-bit ints
+                    (byte) BCF2Type.INT8.getMissingBytes(), (byte) BCF2Type.INT8.getMissingBytes(),
+                };
+                cases.add(new Object[]{
+                    writer, vcMissingGenotypes,
+                    Collections.singletonList("sampleNameNotPresentInGenotype"),
+                    bytes,
+                });
+            }
+        }
+
+        // Inline integer encoder
+        {
+            final VCFFormatHeaderLine format = new VCFFormatHeaderLine("DP", 1, VCFHeaderLineType.Integer, "test");
+            final BCF2FieldWriter writer = BCF2FieldWriter.createGenotypeWriter(format, 1, ENCODER);
+            final VariantContext vc = new VariantContextBuilder()
+                .chr("dummy")
+                .genotypes(
+                    new GenotypeBuilder()
+                        .name("small")
+                        .DP(2)
+                        .make(),
+                    new GenotypeBuilder()
+                        .name("big")
+                        .DP(256)
+                        .make()
+                )
+                .alleles("A")
+                .make();
+
+            final byte[] bytes = new byte[]{
+                0x12, // 1 16-bit int
+                0x02, 0x00,
+                (byte) 256, 256 >> 8,
+            };
+
+            cases.add(new Object[]{
+                writer, vc,
+                vc.getGenotypes().stream().map(Genotype::getSampleName).collect(Collectors.toList()),
+                bytes,
+            });
+        }
+
+        // Inline vector of integer encoder
+        {
+            final VCFFormatHeaderLine format = new VCFFormatHeaderLine("PL", VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "test");
+            final Allele ref = Allele.REF_A;
+            final Allele alt = Allele.ALT_T;
+            final BCF2FieldWriter writer = BCF2FieldWriter.createGenotypeWriter(format, 1, ENCODER);
+            final VariantContext vc = new VariantContextBuilder()
+                .chr("dummy")
+                .alleles(Arrays.asList(ref, alt))
+                .genotypes(
+                    new GenotypeBuilder()
+                        .name("small")
+                        .alleles(Arrays.asList(ref, alt))
+                        .PL(new int[]{1, 2})
+                        .make(),
+                    new GenotypeBuilder()
+                        .name("big")
+                        .alleles(Arrays.asList(ref, alt))
+                        .PL(new int[]{256}) // should pad out
+                        .make()
+                )
+                .make();
+
+            final byte[] bytes = new byte[]{
+                0x32, // 3 16-bit ints
+                0x01, 0x00, 0x02, 0x00, (byte) BCF2Type.INT16.getEOVBytes(), (byte) (BCF2Type.INT16.getEOVBytes() >> 8),
+                (byte) 256, 256 >> 8, (byte) BCF2Type.INT16.getEOVBytes(), (byte) (BCF2Type.INT16.getEOVBytes() >> 8), (byte) BCF2Type.INT16.getEOVBytes(), (byte) (BCF2Type.INT16.getEOVBytes() >> 8)
+            };
+
+            cases.add(new Object[]{
+                writer, vc,
+                vc.getGenotypes().stream().map(Genotype::getSampleName).collect(Collectors.toList()),
+                bytes,
+            });
+        }
+        return cases.toArray(new Object[0][]);
+    }
+
+    @Test(dataProvider = "genotypeWriterCases")
+    public void testGenotypeWriters(
+        final BCF2FieldWriter.GenotypeWriter writer,
+        final VariantContext vc,
+        final List<String> sampleNames,
+        final byte[] expectedBytes
+    ) throws IOException {
+        writer.encode(vc, sampleNames);
+        Assert.assertEquals(expectedBytes, ENCODER.getRecordBytes());
+    }
+}
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
index e18c0d9309..5f658bd69b 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
@@ -50,47 +50,6 @@
  * Tests for BCF2Utils
  */
 public final class BCF2UtilsUnitTest extends VariantBaseTest {
-    @DataProvider(name = "CollapseExpandTest")
-    public Object[][] makeCollapseExpandTest() {
-        List<Object[]> tests = new ArrayList<Object[]>();
-        tests.add(new Object[]{Arrays.asList("A"), "A", false});
-        tests.add(new Object[]{Arrays.asList("A", "B"), ",A,B", true});
-        tests.add(new Object[]{Arrays.asList("AB"), "AB", false});
-        tests.add(new Object[]{Arrays.asList("AB", "C"), ",AB,C", true});
-        tests.add(new Object[]{Arrays.asList(), "", false});
-        return tests.toArray(new Object[][]{});
-    }
-
-    @Test(dataProvider = "CollapseExpandTest")
-    public void testCollapseExpandTest(final List<String> in, final String expectedCollapsed, final boolean isCollapsed) {
-        final String actualCollapsed = BCF2Utils.collapseStringList(in);
-        Assert.assertEquals(actualCollapsed, expectedCollapsed);
-        Assert.assertEquals(BCF2Utils.isCollapsedString(actualCollapsed), isCollapsed);
-        if ( isCollapsed )
-            Assert.assertEquals(BCF2Utils.explodeStringList(actualCollapsed), in);
-    }
-
-    @Test
-    public void testCreateDictionary() {
-        final List<VCFHeaderLine> inputLines = new ArrayList<VCFHeaderLine>();
-        int counter = 0;
-        inputLines.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
-        inputLines.add(new VCFFilterHeaderLine("l" + counter++));
-        inputLines.add(new VCFFilterHeaderLine("l" + counter++));
-        inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", "l" + counter++), counter));
-        inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", "l" + counter++), counter));
-        inputLines.add(new VCFInfoHeaderLine(String.valueOf("A"+counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
-        inputLines.add(new VCFInfoHeaderLine(String.valueOf("A"+counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
-        inputLines.add(new VCFHeaderLine("x", "misc"));
-        inputLines.add(new VCFHeaderLine("y", "misc"));
-        inputLines.add(new VCFFilterHeaderLine("aFilter", "misc"));
-        inputLines.add(new VCFFormatHeaderLine(String.valueOf("A"+counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
-        inputLines.add(new VCFFormatHeaderLine(String.valueOf("A"+counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
-        final VCFHeader inputHeader = new VCFHeader(new LinkedHashSet<>(inputLines));
-        final ArrayList<String> dict = BCF2Utils.makeDictionary(inputHeader);
-        final int dict_size = dict.size();
-        Assert.assertEquals(8,dict_size);
-    }
 
     /**
      * Wrapper class for HeaderOrderTestProvider test cases to prevent TestNG from calling toString()
@@ -101,7 +60,7 @@ private static class HeaderOrderTestCase {
         public final VCFHeader testHeader;
         public final boolean expectedConsistent;
 
-        public HeaderOrderTestCase( final VCFHeader inputHeader, final VCFHeader testHeader, final boolean expectedConsistent ) {
+        public HeaderOrderTestCase(final VCFHeader inputHeader, final VCFHeader testHeader, final boolean expectedConsistent) {
             this.inputHeader = inputHeader;
             this.testHeader = testHeader;
             this.expectedConsistent = expectedConsistent;
@@ -110,8 +69,8 @@ public HeaderOrderTestCase( final VCFHeader inputHeader, final VCFHeader testHea
 
     @DataProvider(name = "HeaderOrderTestProvider")
     public Object[][] makeHeaderOrderTestProvider() {
-        final List<VCFHeaderLine> inputLines = new ArrayList<VCFHeaderLine>();
-        final List<VCFHeaderLine> extraLines = new ArrayList<VCFHeaderLine>();
+        final List<VCFHeaderLine> inputLines = new ArrayList<>();
+        final List<VCFHeaderLine> extraLines = new ArrayList<>();
 
         int counter = 0;
         inputLines.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
@@ -125,7 +84,7 @@ public Object[][] makeHeaderOrderTestProvider() {
         inputLines.add(new VCFFormatHeaderLine("l" + counter++, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
         inputLines.add(new VCFFormatHeaderLine("l" + counter++, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
         final int inputLineCounter = counter;
-        final VCFHeader inputHeader = new VCFHeader(new LinkedHashSet<VCFHeaderLine>(inputLines));
+        final VCFHeader inputHeader = new VCFHeader(new LinkedHashSet<>(inputLines));
 
         extraLines.add(new VCFFilterHeaderLine("l" + counter++));
         extraLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", "l" + counter++), counter));
@@ -134,19 +93,20 @@ public Object[][] makeHeaderOrderTestProvider() {
         extraLines.add(new VCFHeaderLine("x", "misc"));
         extraLines.add(new VCFHeaderLine("y", "misc"));
 
-        List<Object[]> tests = new ArrayList<Object[]>();
-        for ( final int extrasToTake : Arrays.asList(0, 1, 2, 3) ) {
+        final List<Object[]> tests = new ArrayList<>();
+        for (final int extrasToTake : Arrays.asList(0, 1, 2, 3)) {
             final List<VCFHeaderLine> empty = Collections.emptyList();
             final List<List<VCFHeaderLine>> permutations = extrasToTake == 0
-                    ? Collections.singletonList(empty)
-                    : GeneralUtils.makePermutations(extraLines, extrasToTake, false);
-            for ( final List<VCFHeaderLine> permutation : permutations ) {
-                for ( int i = -1; i < inputLines.size(); i++ ) {
-                    final List<VCFHeaderLine> allLines = new ArrayList<VCFHeaderLine>(inputLines);
-                    if ( i >= 0 && !VCFHeaderVersion.isFormatString(allLines.get(i).getKey()) )
+                ? Collections.singletonList(empty)
+                : GeneralUtils.makePermutations(extraLines, extrasToTake, false);
+            for (final List<VCFHeaderLine> permutation : permutations) {
+                for (int i = -1; i < inputLines.size(); i++) {
+                    final List<VCFHeaderLine> allLines = new ArrayList<>(inputLines);
+                    if (i >= 0)
                         allLines.remove(i);
                     allLines.addAll(permutation);
-                    final VCFHeader testHeader = new VCFHeader(new LinkedHashSet<VCFHeaderLine>(allLines));
+                    allLines.add(new VCFHeaderLine(VCFHeader.DEFAULT_VCF_VERSION.getFormatString(), VCFHeader.DEFAULT_VCF_VERSION.getVersionString()));
+                    final VCFHeader testHeader = new VCFHeader(new LinkedHashSet<>(allLines));
                     final boolean expectedConsistent = expectedConsistent(testHeader, inputLineCounter);
                     tests.add(new Object[]{new HeaderOrderTestCase(inputHeader, testHeader, expectedConsistent)});
                 }
@@ -155,18 +115,18 @@ public Object[][] makeHeaderOrderTestProvider() {
 
         // sample name tests
         final List<List<String>> sampleNameTests = Arrays.asList(
-                new ArrayList<String>(),
-                Arrays.asList("A"),
-                Arrays.asList("A", "B"),
-                Arrays.asList("A", "B", "C"));
-        for ( final List<String> inSamples : sampleNameTests ) {
-            for ( final List<String> testSamples : sampleNameTests ) {
+            new ArrayList<>(),
+            Collections.singletonList("A"),
+            Arrays.asList("A", "B"),
+            Arrays.asList("A", "B", "C"));
+        for (final List<String> inSamples : sampleNameTests) {
+            for (final List<String> testSamples : sampleNameTests) {
                 final VCFHeader inputHeaderWithSamples = new VCFHeader(inputHeader.getMetaDataInInputOrder(), inSamples);
 
                 final List<List<String>> permutations = testSamples.isEmpty()
-                        ? Collections.singletonList(testSamples)
-                        : GeneralUtils.makePermutations(testSamples, testSamples.size(), false);
-                for ( final List<String> testSamplesPermutation : permutations ) {
+                    ? Collections.singletonList(testSamples)
+                    : GeneralUtils.makePermutations(testSamples, testSamples.size(), false);
+                for (final List<String> testSamplesPermutation : permutations) {
                     final VCFHeader testHeaderWithSamples = new VCFHeader(inputHeader.getMetaDataInInputOrder(), testSamplesPermutation);
                     final boolean expectedConsistent = testSamples.equals(inSamples);
                     tests.add(new Object[]{new HeaderOrderTestCase(inputHeaderWithSamples, testHeaderWithSamples, expectedConsistent)});
@@ -187,8 +147,8 @@ private static boolean expectedConsistent(final VCFHeader combinationHeader, fin
         }
 
         // as long as the start contains all of the ids up to minCounterForInputLines in order
-        for ( int i = 0; i < minCounterForInputLines; i++ )
-            if ( i >= ids.size() || ids.get(i) != i )
+        for (int i = 0; i < minCounterForInputLines; i++)
+            if (i >= ids.size() || ids.get(i) != i)
                 return false;
 
         return true;
@@ -199,32 +159,8 @@ private static boolean expectedConsistent(final VCFHeader combinationHeader, fin
     // even when the header file is slightly different
     //
     @Test(dataProvider = "HeaderOrderTestProvider")
-    public void testHeaderOrder( final HeaderOrderTestCase testCase ) {
+    public void testHeaderOrder(final HeaderOrderTestCase testCase) {
         final boolean actualOrderConsistency = BCF2Utils.headerLinesAreOrderedConsistently(testCase.testHeader, testCase.inputHeader);
         Assert.assertEquals(actualOrderConsistency, testCase.expectedConsistent);
     }
-
-
-    private void assertListsAreEquivalent(final List<?> a, final List<?> b) {
-        Assert.assertEquals(a.size(), b.size());
-        for (int i=0; i<a.size(); i++)
-            Assert.assertEquals(a.get(i), b.get(i));
-    }
-
-    @DataProvider(name = "toListTestProvider")
-    public Object[][] makeToListTest() {
-        final List<Object[]> tests = new ArrayList<Object[]>();
-        tests.add(new Object[]{Object.class, null, Collections.emptyList()});
-        tests.add(new Object[]{Integer.class, 1, Arrays.asList(1)});
-        tests.add(new Object[]{Integer.class, new int[]{1, 2, 3}, Arrays.asList(1, 2, 3)});
-        tests.add(new Object[]{String.class, Arrays.asList("X", "Y"), Arrays.asList("X", "Y")});
-        return tests.toArray(new Object[][]{});
-    }
-
-    @Test(dataProvider = "toListTestProvider")
-    public void testToList(final Class<?> cls, final Object input, final List<Object> expectedOutput) {
-        assertListsAreEquivalent(BCF2Utils.toList(cls, input), expectedOutput);
-    }
-
-
 }
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java
index 17e2ae3257..0bc493bf15 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java
@@ -1,34 +1,36 @@
 /*
-* Copyright (c) 2017 The Broad Institute
-* 
-* Permission is hereby granted, free of charge, to any person
-* obtaining a copy of this software and associated documentation
-* files (the "Software"), to deal in the Software without
-* restriction, including without limitation the rights to use,
-* copy, modify, merge, publish, distribute, sublicense, and/or sell
-* copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following
-* conditions:
-* 
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-* 
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
+ * Copyright (c) 2017 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
 
 package htsjdk.variant.bcf2;
 
 import htsjdk.samtools.SAMSequenceDictionary;
 import htsjdk.samtools.util.TestUtil;
+import htsjdk.samtools.util.Tuple;
 import htsjdk.tribble.Tribble;
 import htsjdk.tribble.readers.PositionalBufferedStream;
+import htsjdk.utils.BCFToolsTestUtils;
 import htsjdk.variant.VariantBaseTest;
 import htsjdk.variant.variantcontext.Allele;
 import htsjdk.variant.variantcontext.Genotype;
@@ -37,30 +39,40 @@
 import htsjdk.variant.variantcontext.VariantContext;
 import htsjdk.variant.variantcontext.VariantContextBuilder;
 import htsjdk.variant.variantcontext.VariantContextTestProvider;
-import htsjdk.variant.variantcontext.writer.*;
-import htsjdk.variant.vcf.*;
+import htsjdk.variant.variantcontext.writer.Options;
+import htsjdk.variant.variantcontext.writer.VariantContextWriter;
+import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
+import htsjdk.variant.vcf.VCFFileReader;
+import htsjdk.variant.vcf.VCFFormatHeaderLine;
+import htsjdk.variant.vcf.VCFHeader;
+import htsjdk.variant.vcf.VCFHeaderLine;
+import htsjdk.variant.vcf.VCFHeaderLineType;
+import htsjdk.variant.vcf.VCFInfoHeaderLine;
 import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
+import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.zip.GZIPInputStream;
 
 /**
  * @author amila
- *         <p/>
- *         Class BCF2WriterUnitTest
- *         <p/>
- *         This class tests out the ability of the BCF writer to correctly write BCF files
+ * <p/>
+ * Class BCF2WriterUnitTest
+ * <p/>
+ * This class tests out the ability of the BCF writer to correctly write BCF files
  */
 public class BCF2WriterUnitTest extends VariantBaseTest {
 
@@ -72,7 +84,7 @@ public class BCF2WriterUnitTest extends VariantBaseTest {
      * @return a fake VCF header
      */
     private static VCFHeader createFakeHeader() {
-        final SAMSequenceDictionary sequenceDict = createArtificialSequenceDictionary();
+        final SAMSequenceDictionary sequenceDict = VariantBaseTest.createArtificialSequenceDictionary();
         final Set<VCFHeaderLine> metaData = new HashSet<>();
         final Set<String> additionalColumns = new HashSet<>();
         metaData.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
@@ -80,10 +92,10 @@ private static VCFHeader createFakeHeader() {
         additionalColumns.add("extra1");
         additionalColumns.add("extra2");
         final VCFHeader header = new VCFHeader(metaData, additionalColumns);
-        header.addMetaDataLine(new VCFInfoHeaderLine("DP", 1, VCFHeaderLineType.String, "x"));
+        header.addMetaDataLine(new VCFInfoHeaderLine("DP", 1, VCFHeaderLineType.Integer, "x"));
         header.addMetaDataLine(new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.String, "x"));
         header.addMetaDataLine(new VCFFormatHeaderLine("BB", 1, VCFHeaderLineType.String, "x"));
-        header.addMetaDataLine(new VCFFormatHeaderLine("GQ", 1, VCFHeaderLineType.String, "x"));
+        header.addMetaDataLine(new VCFFormatHeaderLine("GQ", 1, VCFHeaderLineType.Integer, "x"));
         header.setSequenceDictionary(sequenceDict);
         return header;
     }
@@ -102,25 +114,23 @@ private void createTemporaryDirectory() {
     public void testWriteAndReadBCF() throws IOException {
         final File bcfOutputFile = File.createTempFile("testWriteAndReadVCF.", ".bcf", tempDir);
         bcfOutputFile.deleteOnExit();
-        final VCFHeader header = createFakeHeader();
+        final VCFHeader header = BCF2WriterUnitTest.createFakeHeader();
         try (final VariantContextWriter writer = new VariantContextWriterBuilder()
-                .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary())
-                .unsetOption(Options.INDEX_ON_THE_FLY)
-                .build()) {
+            .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary())
+            .unsetOption(Options.INDEX_ON_THE_FLY)
+            .build()
+        ) {
             writer.writeHeader(header);
             writer.add(createVC(header));
             writer.add(createVC(header));
         }
-        VariantContextTestProvider.VariantContextContainer container = VariantContextTestProvider
-                .readAllVCs(bcfOutputFile, new BCF2Codec());
+        final VariantContextTestProvider.VariantContextContainer container = VariantContextTestProvider
+            .readAllVCs(bcfOutputFile, new BCF2Codec());
         int counter = 0;
-        final Iterator<VariantContext> it = container.getVCs().iterator();
-        while (it.hasNext()) {
-            it.next();
+        for (final VariantContext ignored : container.getVCs()) {
             counter++;
         }
         Assert.assertEquals(counter, 2);
-
     }
 
 
@@ -132,21 +142,20 @@ public void testWriteAndReadBCFWithIndex() throws IOException {
         final File bcfOutputFile = File.createTempFile("testWriteAndReadVCF.", ".bcf", tempDir);
         bcfOutputFile.deleteOnExit();
         Tribble.indexFile(bcfOutputFile).deleteOnExit();
-        final VCFHeader header = createFakeHeader();
+        final VCFHeader header = BCF2WriterUnitTest.createFakeHeader();
         try (final VariantContextWriter writer = new VariantContextWriterBuilder()
-                .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary())
-                .setOptions(EnumSet.of(Options.INDEX_ON_THE_FLY))
-                .build()) {
+            .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary())
+            .setOptions(EnumSet.of(Options.INDEX_ON_THE_FLY))
+            .build()
+        ) {
             writer.writeHeader(header);
             writer.add(createVC(header));
             writer.add(createVC(header));
         }
-        VariantContextTestProvider.VariantContextContainer container = VariantContextTestProvider
-                .readAllVCs(bcfOutputFile, new BCF2Codec());
+        final VariantContextTestProvider.VariantContextContainer container = VariantContextTestProvider
+            .readAllVCs(bcfOutputFile, new BCF2Codec());
         int counter = 0;
-        final Iterator<VariantContext> it = container.getVCs().iterator();
-        while (it.hasNext()) {
-            it.next();
+        for (final VariantContext ignored : container.getVCs()) {
             counter++;
         }
         Assert.assertEquals(counter, 2);
@@ -162,41 +171,43 @@ public void testWriteAndReadBCFHeaderless() throws IOException {
         final File bcfOutputHeaderlessFile = File.createTempFile("testWriteAndReadBCFHeaderless.", ".bcf", tempDir);
         bcfOutputHeaderlessFile.deleteOnExit();
 
-        final VCFHeader header = createFakeHeader();
+        final VCFHeader header = BCF2WriterUnitTest.createFakeHeader();
         // we write two files, bcfOutputFile with the header, and bcfOutputHeaderlessFile with just the body
         try (final VariantContextWriter fakeBCFFileWriter = new VariantContextWriterBuilder()
-                .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary())
-                .unsetOption(Options.INDEX_ON_THE_FLY)
-                .build()) {
+            .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary())
+            .unsetOption(Options.INDEX_ON_THE_FLY)
+            .build()
+        ) {
             fakeBCFFileWriter.writeHeader(header); // writes header
         }
 
         try (final VariantContextWriter fakeBCFBodyFileWriter = new VariantContextWriterBuilder()
-                .setOutputFile(bcfOutputHeaderlessFile).setReferenceDictionary(header.getSequenceDictionary())
-                .unsetOption(Options.INDEX_ON_THE_FLY)
-                .build()) {
+            .setOutputFile(bcfOutputHeaderlessFile).setReferenceDictionary(header.getSequenceDictionary())
+            .unsetOption(Options.INDEX_ON_THE_FLY)
+            .build()
+        ) {
             fakeBCFBodyFileWriter.setHeader(header); // does not write header
             fakeBCFBodyFileWriter.add(createVC(header));
             fakeBCFBodyFileWriter.add(createVC(header));
         }
 
-        VariantContextTestProvider.VariantContextContainer container;
-
-        try (final PositionalBufferedStream headerPbs = new PositionalBufferedStream(new FileInputStream(bcfOutputFile));
-        final PositionalBufferedStream bodyPbs = new PositionalBufferedStream(new FileInputStream(bcfOutputHeaderlessFile))) {
+        try (final PositionalBufferedStream headerPbs =
+                 new PositionalBufferedStream(new GZIPInputStream(new FileInputStream(bcfOutputFile)));
+             final PositionalBufferedStream bodyPbs =
+                 new PositionalBufferedStream(new GZIPInputStream(new FileInputStream(bcfOutputHeaderlessFile)))
+        ) {
 
-            BCF2Codec codec = new BCF2Codec();
+            final BCF2Codec codec = new BCF2Codec();
             codec.readHeader(headerPbs);
             // we use the header information read from identical file with header+body to read just the body of second file
 
             int counter = 0;
             while (!bodyPbs.isDone()) {
-                VariantContext vc = codec.decode(bodyPbs);
+                codec.decode(bodyPbs);
                 counter++;
             }
             Assert.assertEquals(counter, 2);
         }
-
     }
 
     /**
@@ -208,42 +219,45 @@ public void testReadAndWritePhasedBCF() throws IOException {
         final File bcfOutputFile = File.createTempFile("testWriteAndReadBCFHeaderless.", ".bcf", tempDir);
         bcfOutputFile.deleteOnExit();
 
-        try ( VCFFileReader vcfFile = new VCFFileReader(vcfInputFile);
-
-        VariantContextWriter bcfWriter = new VariantContextWriterBuilder().setOutputFile(bcfOutputFile).setReferenceDictionary(vcfFile.getFileHeader().getSequenceDictionary()).build();
-
-        ) {
-            bcfWriter.writeHeader(vcfFile.getFileHeader());
-
-            for (VariantContext vc : vcfFile.iterator().toList()) {
-                Assert.assertEquals(vc.getGenotypes().stream().filter(Genotype::isPhased).count(), 2);
-                bcfWriter.add(vc);
+        try (final VCFFileReader vcfFile = new VCFFileReader(vcfInputFile)) {
+            try (final VariantContextWriter bcfWriter = new VariantContextWriterBuilder()
+                .setOutputFile(bcfOutputFile)
+                .setReferenceDictionary(vcfFile.getFileHeader().getSequenceDictionary())
+                .build()
+            ) {
+                bcfWriter.writeHeader(vcfFile.getFileHeader());
+                for (final VariantContext vc : vcfFile.iterator().toList()) {
+                    Assert.assertEquals(vc.getGenotypes().stream().filter(Genotype::isPhased).count(), 2);
+                    bcfWriter.add(vc);
+                }
             }
-            bcfWriter.close();
 
             // Reading the VCF and writing it to a BCF
             final File vcfOutputFile = File.createTempFile("testWriteAndReadBCFHeaderless.", ".vcf", tempDir);
             vcfOutputFile.deleteOnExit();
 
-            try (final PositionalBufferedStream headerPbs = new PositionalBufferedStream(new FileInputStream(bcfOutputFile));
-                 VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(vcfOutputFile).setReferenceDictionary(vcfFile.getFileHeader().getSequenceDictionary()).build();
-                 ) {
+            try (final PositionalBufferedStream headerPbs =
+                     new PositionalBufferedStream(new GZIPInputStream(new FileInputStream(bcfOutputFile)));
+                 final VariantContextWriter vcfWriter = new VariantContextWriterBuilder()
+                     .setOutputFile(vcfOutputFile)
+                     .setReferenceDictionary(vcfFile.getFileHeader().getSequenceDictionary())
+                     .build()
+            ) {
                 vcfWriter.writeHeader(vcfFile.getFileHeader());
 
-                BCF2Codec codec = new BCF2Codec();
+                final BCF2Codec codec = new BCF2Codec();
                 codec.readHeader(headerPbs);
                 // we use the header information read from identical file with header+body to read just the body of second file
 
                 while (!headerPbs.isDone()) {
-                    VariantContext vc = codec.decode(headerPbs);
+                    final VariantContext vc = codec.decode(headerPbs);
                     Assert.assertEquals(vc.getGenotypes().stream().filter(Genotype::isPhased).count(), 2);
                     vcfWriter.add(vc);
                 }
-                vcfWriter.close();
             }
 
-            try (VCFFileReader vcfOutput = new VCFFileReader(vcfInputFile);) {
-                for (VariantContext vc : vcfOutput.iterator().toList()) {
+            try (final VCFFileReader vcfOutput = new VCFFileReader(vcfInputFile)) {
+                for (final VariantContext vc : vcfOutput.iterator().toList()) {
                     Assert.assertEquals(vc.getGenotypes().stream().filter(Genotype::isPhased).count(), 2);
                 }
             }
@@ -255,12 +269,13 @@ public void testWriteHeaderTwice() throws IOException {
         final File bcfOutputFile = File.createTempFile("testWriteAndReadVCF.", ".bcf", tempDir);
         bcfOutputFile.deleteOnExit();
 
-        final VCFHeader header = createFakeHeader();
+        final VCFHeader header = BCF2WriterUnitTest.createFakeHeader();
         // prevent writing header twice
         try (final VariantContextWriter writer = new VariantContextWriterBuilder()
-                .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary())
-                .unsetOption(Options.INDEX_ON_THE_FLY)
-                .build()) {
+            .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary())
+            .unsetOption(Options.INDEX_ON_THE_FLY)
+            .build()
+        ) {
             writer.writeHeader(header);
             writer.writeHeader(header);
         }
@@ -271,12 +286,13 @@ public void testChangeHeaderAfterWritingHeader() throws IOException {
         final File bcfOutputFile = File.createTempFile("testWriteAndReadVCF.", ".bcf", tempDir);
         bcfOutputFile.deleteOnExit();
 
-        final VCFHeader header = createFakeHeader();
+        final VCFHeader header = BCF2WriterUnitTest.createFakeHeader();
         // prevent changing header if it's already written
         try (final VariantContextWriter writer = new VariantContextWriterBuilder()
-                .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary())
-                .unsetOption(Options.INDEX_ON_THE_FLY)
-                .build()) {
+            .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary())
+            .unsetOption(Options.INDEX_ON_THE_FLY)
+            .build()
+        ) {
             writer.writeHeader(header);
             writer.setHeader(header);
         }
@@ -287,12 +303,13 @@ public void testChangeHeaderAfterWritingBody() throws IOException {
         final File bcfOutputFile = File.createTempFile("testWriteAndReadVCF.", ".bcf", tempDir);
         bcfOutputFile.deleteOnExit();
 
-        final VCFHeader header = createFakeHeader();
+        final VCFHeader header = BCF2WriterUnitTest.createFakeHeader();
         // prevent changing header if part of body is already written
         try (final VariantContextWriter writer = new VariantContextWriterBuilder()
-                .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary())
-                .unsetOption(Options.INDEX_ON_THE_FLY)
-                .build()) {
+            .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary())
+            .unsetOption(Options.INDEX_ON_THE_FLY)
+            .build()
+        ) {
             writer.setHeader(header);
             writer.add(createVC(header));
             writer.setHeader(header);
@@ -305,7 +322,7 @@ public void testChangeHeaderAfterWritingBody() throws IOException {
      * @param header the VCF header
      * @return a VCFRecord
      */
-    private VariantContext createVC(final VCFHeader header) {
+    private static VariantContext createVC(final VCFHeader header) {
         final List<Allele> alleles = new ArrayList<>();
         final Map<String, Object> attributes = new HashMap<>();
         final GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size());
@@ -315,14 +332,104 @@ private VariantContext createVC(final VCFHeader header) {
 
         attributes.put("DP", "50");
         for (final String name : header.getGenotypeSamples()) {
-            final Genotype gt = new GenotypeBuilder(name, alleles.subList(1, 2)).GQ(0).attribute("BB", "1").phased(true)
-                    .make();
+            final Genotype gt = new GenotypeBuilder(name, alleles.subList(1, 2))
+                .GQ(0).attribute("BB", "1")
+                .phased(true)
+                .make();
             genotypes.add(gt);
         }
         return new VariantContextBuilder("RANDOM", "1", 1, 1, alleles)
-                .genotypes(genotypes).attributes(attributes).make();
+            .genotypes(genotypes).attributes(attributes).make();
+    }
+
+    @DataProvider
+    public Object[][] bcftoolsRoundTripProvider() {
+        return new Object[][]{
+            {"phased.vcf"},
+            {"test1.vcf"},
+            {"NA12891.vcf"},
+            {"NA12891.fp.vcf"},
+            {"dbsnp_135.b37.1000.vcf"},
+            {"structuralvariants.vcf"},
+            // TODO the test testBCFToolsReadsHtsjdkOutput fails for the following two files
+            //  due to what appears to be a bug in bcftools' VCF output where missing FORMAT
+            //  values are sometimes encoded as an empty string and not '.'
+            //  This seems to have something to do with the affected keys being in trailing
+            //  position in the original VCF (trailing missing values can be dropped), and
+            //  htsjdk reordering FORMAT keys by sorting them alphabetically
+//            {"ex2.vcf"},
+//            {"test.vcf.bgz"},
+        };
     }
 
+    @Test(dataProvider = "bcftoolsRoundTripProvider")
+    public void testBCFToolsReadsHtsjdkOutput(final String testFile) throws IOException {
+        // Take an input VCF and read it into memory as our expected output
+        // Take the same VCF and write it out as a BCF using htsjdk's BCF2Writer, use bcftools to convert from
+        // BCF back to VCF, and read the converted VCF into memory again as our actual output
+        final Path path = new File(VariantBaseTest.variantTestDataRoot + testFile).toPath();
+        final Tuple<VCFHeader, List<VariantContext>> expectedVCF = readEntireVCFIntoMemory(path);
+        final VCFHeader header = expectedVCF.a;
+        final List<VariantContext> expectedVariantContexts = expectedVCF.b;
+
+        final File bcfOutputFile = File.createTempFile("testBCFToolsRoundTrip" + testFile, ".bcf", tempDir);
+        bcfOutputFile.deleteOnExit();
+
+        try (final VariantContextWriter writer = new VariantContextWriterBuilder()
+            .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary())
+            .unsetOption(Options.INDEX_ON_THE_FLY)
+            .build()
+        ) {
+            writer.writeHeader(header);
+            for (final VariantContext vc : expectedVariantContexts) {
+                writer.add(vc);
+            }
+        }
+
+        final Path converted = BCFToolsTestUtils.BCFToVCF(bcfOutputFile, "").toPath();
+        final Tuple<VCFHeader, List<VariantContext>> actualVCF = readEntireVCFIntoMemory(converted);
+        final List<VariantContext> actualVariantContexts = actualVCF.b;
+
+        // Don't compare the headers, since they might contain extraneous lines, and the BCF codec isn't responsible
+        // for headers
+        Assert.assertEquals(expectedVariantContexts.size(), actualVCF.b.size());
+        final int length = expectedVariantContexts.size();
+        for (int i = 0; i < length; i++) {
+            // Fully decode both variant contexts so that we're comparing actual objects and not their string
+            // representations, which can be different without affecting semantics, e.g. number of digits in a double
+            VariantBaseTest.assertVariantContextsAreEqual(
+                actualVariantContexts.get(i).fullyDecode(header, false),
+                expectedVariantContexts.get(i).fullyDecode(header, false)
+            );
+        }
+    }
 
+    @Test(dataProvider = "bcftoolsRoundTripProvider")
+    public void testHtsjdkReadsBCFToolsOutput(final String testFile) {
+        // Take an input VCF and read it into memory as our expected output
+        // Take the same VCF and convert it to BCF using bcftools, then read the BCF into memory again as our actual output
+        final Path path = new File(VariantBaseTest.variantTestDataRoot + testFile).toPath();
+        final Tuple<VCFHeader, List<VariantContext>> expectedVCF = readEntireVCFIntoMemory(path);
+        final VCFHeader header = expectedVCF.a;
+        final List<VariantContext> expectedVariantContexts = expectedVCF.b;
+
+        final File converted = BCFToolsTestUtils.VCFtoBCF(path.toFile(), "");
+        final VCFFileReader reader = new VCFFileReader(converted, false);
+
+        final List<VariantContext> actualVariantContexts = reader.iterator().stream().collect(Collectors.toList());
+
+        // Don't compare the headers, since they might contain extraneous lines, and the BCF codec isn't responsible
+        // for headers
+        Assert.assertEquals(expectedVariantContexts.size(), actualVariantContexts.size());
+        final int length = expectedVariantContexts.size();
+        for (int i = 0; i < length; i++) {
+            // Fully decode both variant contexts so that we're comparing actual objects and not their string
+            // representations, which can be different without affecting semantics, e.g. number of digits in a double
+            VariantBaseTest.assertVariantContextsAreEqual(
+                actualVariantContexts.get(i).fullyDecode(header, false),
+                expectedVariantContexts.get(i).fullyDecode(header, false)
+            );
+        }
+    }
 }
 
diff --git a/src/test/java/htsjdk/variant/bcf2/BCFCodecTest.java b/src/test/java/htsjdk/variant/bcf2/BCFCodecTest.java
index 39fce34b18..5c0af6c761 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCFCodecTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCFCodecTest.java
@@ -13,24 +13,13 @@
 import java.io.IOException;
 
 public class BCFCodecTest extends VariantBaseTest {
-    final String TEST_DATA_DIR = "src/test/resources/htsjdk/variant/";
-
-    // should reject bcf v2.2 on read, see issue https://github.com/samtools/htsjdk/issues/1323
-    @Test(expectedExceptions = TribbleException.class)
-    private void testRejectBCFVersion22() throws IOException {
-        BCF2Codec bcfCodec = new BCF2Codec();
-        try (final FileInputStream fis = new FileInputStream(new File(TEST_DATA_DIR, "BCFVersion22Uncompressed.bcf"));
-             final PositionalBufferedStream pbs = new PositionalBufferedStream(fis)) {
-            bcfCodec.readHeader(pbs);
-        }
-    }
+    private static final String TEST_DATA_DIR = "src/test/resources/htsjdk/variant/";
 
     @Test
-    private void testBCFCustomVersionCompatibility() throws IOException {
+    public void testBCFCustomVersionCompatibility() throws IOException {
         final BCF2Codec bcfCodec = new BCF2Codec() {
             @Override
             protected void validateVersionCompatibility(final BCFVersion supportedVersion, final BCFVersion actualVersion) {
-                return;
             }
         };
 
diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java b/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java
index e04910eb0e..34adb5a4b0 100644
--- a/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java
+++ b/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java
@@ -27,11 +27,9 @@
 
 import htsjdk.HtsjdkTest;
 import htsjdk.tribble.FeatureCodec;
-import htsjdk.tribble.FeatureCodecHeader;
 import htsjdk.tribble.Tribble;
 import htsjdk.tribble.readers.LineIterator;
 import htsjdk.tribble.readers.LineIteratorImpl;
-import htsjdk.tribble.readers.PositionalBufferedStream;
 import htsjdk.tribble.readers.SynchronousLineReader;
 import htsjdk.variant.VariantBaseTest;
 import htsjdk.variant.bcf2.BCF2Codec;
@@ -48,7 +46,8 @@
 import htsjdk.variant.vcf.VCFHeaderLineCount;
 import htsjdk.variant.vcf.VCFHeaderLineType;
 import htsjdk.variant.vcf.VCFInfoHeaderLine;
-
+import htsjdk.variant.vcf.VCFIterator;
+import htsjdk.variant.vcf.VCFIteratorBuilder;
 import org.testng.Assert;
 
 import java.io.BufferedInputStream;
@@ -234,6 +233,7 @@ private static void createSyntheticHeader() {
         addHeaderLine(metaData, "PL", VCFHeaderLineCount.G, VCFHeaderLineType.Integer);
         addHeaderLine(metaData, "GS", 2, VCFHeaderLineType.String);
         addHeaderLine(metaData, "GV", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String);
+        // TODO changed count type from UNBOUNDED to 1 to match VCF 4.3 spec, but might conflict with existing htsjdk code
         addHeaderLine(metaData, "FT", 1, VCFHeaderLineType.String);
 
         // prep the header
@@ -674,6 +674,7 @@ public static void testReaderWriterWithMissingGenotypes(final VariantContextIOTe
                         assertEquals(g, expected.getGenotype(g.getSampleName()));
                     } else {
                         // missing
+                        // TODO this may not be correct
                         Assert.assertTrue(g.isNoCall());
                     }
                 }
@@ -755,29 +756,9 @@ public void remove() { }
     }
 
     public static VariantContextContainer readAllVCs(final File input, final BCF2Codec codec) throws IOException {
-        PositionalBufferedStream headerPbs = new PositionalBufferedStream(new FileInputStream(input));
-        FeatureCodecHeader header = codec.readHeader(headerPbs);
-        headerPbs.close();
-
-        final PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(input));
-        pbs.skip(header.getHeaderEnd());
-
-        final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue();
-        return new VariantContextTestProvider.VariantContextContainer(vcfHeader, new VariantContextTestProvider.VCIterable(codec, vcfHeader) {
-            @Override
-            public boolean hasNext() {
-                try {
-                    return !pbs.isDone();
-                } catch (IOException e) {
-                    throw new RuntimeException(e);
-                }
-            }
-
-            @Override
-            public Object nextSource() {
-                return pbs;
-            }
-        });
+        final VCFIterator iterator = new VCFIteratorBuilder().open(input);
+        final VCFHeader vcfHeader = iterator.getHeader();
+        return new VariantContextTestProvider.VariantContextContainer(vcfHeader, () -> iterator);
     }
 
     public static VariantContextContainer readAllVCs(final File input, final VCFCodec codec) throws FileNotFoundException {
@@ -868,7 +849,7 @@ public static void assertEquals(final Genotype actual, final Genotype expected)
 
         // inline attributes
         Assert.assertEquals(actual.getDP(), expected.getDP(), "Genotype dp");
-        Assert.assertTrue(Arrays.equals(actual.getAD(), expected.getAD()));
+        Assert.assertEquals(actual.getAD(), expected.getAD(), "Genotype ad");
         Assert.assertEquals(actual.getGQ(), expected.getGQ(), "Genotype gq");
         Assert.assertEquals(actual.hasPL(), expected.hasPL(), "Genotype hasPL");
         Assert.assertEquals(actual.hasAD(), expected.hasAD(), "Genotype hasAD");
diff --git a/src/test/java/htsjdk/variant/variantcontext/writer/VCFWriterUnitTest.java b/src/test/java/htsjdk/variant/variantcontext/writer/VCFWriterUnitTest.java
index ceac4f95a8..4931fd8b09 100644
--- a/src/test/java/htsjdk/variant/variantcontext/writer/VCFWriterUnitTest.java
+++ b/src/test/java/htsjdk/variant/variantcontext/writer/VCFWriterUnitTest.java
@@ -29,12 +29,9 @@
 import htsjdk.samtools.util.BlockCompressedInputStream;
 import htsjdk.samtools.util.FileExtensions;
 import htsjdk.samtools.util.TestUtil;
-import htsjdk.tribble.AbstractFeatureReader;
-import htsjdk.tribble.FeatureReader;
 import htsjdk.tribble.Tribble;
 import htsjdk.tribble.readers.AsciiLineReader;
 import htsjdk.tribble.readers.AsciiLineReaderIterator;
-import htsjdk.tribble.util.TabixUtils;
 import htsjdk.variant.VariantBaseTest;
 import htsjdk.variant.variantcontext.Allele;
 import htsjdk.variant.variantcontext.Genotype;
@@ -42,7 +39,19 @@
 import htsjdk.variant.variantcontext.GenotypesContext;
 import htsjdk.variant.variantcontext.VariantContext;
 import htsjdk.variant.variantcontext.VariantContextBuilder;
-import htsjdk.variant.vcf.*;
+import htsjdk.variant.vcf.VCFCodec;
+import htsjdk.variant.vcf.VCFConstants;
+import htsjdk.variant.vcf.VCFFileReader;
+import htsjdk.variant.vcf.VCFFormatHeaderLine;
+import htsjdk.variant.vcf.VCFHeader;
+import htsjdk.variant.vcf.VCFHeaderLine;
+import htsjdk.variant.vcf.VCFHeaderLineType;
+import htsjdk.variant.vcf.VCFHeaderVersion;
+import htsjdk.variant.vcf.VCFStandardHeaderLines;
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
 
 import java.io.File;
 import java.io.FileInputStream;
@@ -52,16 +61,10 @@
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.DataProvider;
-import org.testng.annotations.Test;
-
 /**
  * @author aaron
  *         <p/>
@@ -104,31 +107,23 @@ public void testBasicWriteAndRead(final String extension) throws IOException {
         writer.add(createVC(header));
         writer.add(createVC(header));
         writer.close();
-        final VCFCodec codec = new VCFCodec();
-        final FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(fakeVCFFile.getAbsolutePath(), codec, false);
-        final VCFHeader headerFromFile = (VCFHeader)reader.getHeader();
+        final VCFFileReader reader = new VCFFileReader(fakeVCFFile.toPath(), false, VCFVersionUpgradePolicy.DO_NOT_UPGRADE);
+        final VCFHeader headerFromFile = reader.getHeader();
 
         int counter = 0;
 
         // validate what we're reading in
         validateHeader(headerFromFile, sequenceDict);
 
-        try {
-            final Iterator<VariantContext> it = reader.iterator();
-            while(it.hasNext()) {
-                it.next();
-                counter++;
-            }
-            Assert.assertEquals(counter, 2);
-        }
-        catch (final IOException e ) {
-            throw new RuntimeException(e.getMessage());
+        for (final VariantContext variantContext : reader) {
+            counter++;
         }
+        Assert.assertEquals(counter, 2);
 
     }
 
     /** test, using the writer and reader, that we can output and input a VCF body without problems */
-    @Test(dataProvider = "vcfExtensionsDataProvider")
+    @Test(dataProvider = "vcfHeaderlessExtensionsDataProvider")
     public void testWriteAndReadVCFHeaderless(final String extension) throws IOException {
         final File fakeVCFFile = File.createTempFile("testWriteAndReadVCFHeaderless.", extension, tempDir);
         fakeVCFFile.deleteOnExit();
@@ -226,6 +221,12 @@ private static VCFHeader createFakeHeader(final Set<VCFHeaderLine> metaData, fin
                                              final SAMSequenceDictionary sequenceDict) {
         metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_2.getFormatString(), VCFHeaderVersion.VCF4_2.getVersionString()));
         metaData.add(new VCFHeaderLine("two", "2"));
+        // Explicitly add GT, AD, and BB keys because the .bcf tests that use this fake header require that the header
+        // contain INFO/FORMAT lines for all the attributes written
+        metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));
+        metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_QUALITY_KEY));
+        metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));
+        metaData.add(new VCFFormatHeaderLine("BB", 1, VCFHeaderLineType.Integer, "test key"));
         additionalColumns.add("extra1");
         additionalColumns.add("extra2");
         final VCFHeader ret = new VCFHeader(metaData, additionalColumns);
@@ -326,14 +327,21 @@ public void TestWritingLargeVCF(final String extension) throws FileNotFoundExcep
     @DataProvider(name = "vcfExtensionsDataProvider")
     public Object[][]vcfExtensionsDataProvider() {
         return new Object[][] {
-                //TODO: fix this BCF problem!
-                // TODO: BCF doesn't work because header is not properly constructed.
-                // {".bcf"},
+                {FileExtensions.BCF},
                 {FileExtensions.VCF},
                 {FileExtensions.COMPRESSED_VCF}
         };
     }
 
+    // Testing writing headerless files does not make sense for .bcf because BCF's strong typing makes writing
+    // bodies without headers impossible, so we only test VCF and compressed VCF with headerless writing
+    @DataProvider(name = "vcfHeaderlessExtensionsDataProvider")
+    public Object[][]vcfHeaderlessExtensionsDataProvider() {
+        return new Object[][] {
+            {FileExtensions.VCF},
+            {FileExtensions.COMPRESSED_VCF}
+        };
+    }
 
     /**
      * A test to ensure that if we add a line to a VCFHeader it will persist through
@@ -366,7 +374,7 @@ public void testModifyHeader() {
      *
      * A test to check that we can't write VCF with missing header.
      */
-    @Test(dataProvider = "vcfExtensionsDataProvider", expectedExceptions = IllegalStateException.class)
+    @Test(dataProvider = "vcfHeaderlessExtensionsDataProvider", expectedExceptions = IllegalStateException.class)
     public void testWriteWithEmptyHeader(final String extension) throws IOException {
         final File fakeVCFFile = File.createTempFile("testWriteAndReadVCFHeaderless.", extension, tempDir);
         metaData = new HashSet<>();
diff --git a/src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java b/src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java
index 9709af8cc6..8bdc321b51 100644
--- a/src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java
+++ b/src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java
@@ -85,7 +85,7 @@ public Object[][] otherHeaderLines() {
                 // technically, this is invalid due to the lack of an "ID" attribute, but it should still parse
                 // into a VCFHeaderLine (just not a VCFSimpleHeaderLine)
                 { "ID=<Description=\"ClinVar Variation ID\">",
-                        new VCFHeaderLine("ID", "<Description=\"ClinVar Variation ID\">") },
+                    new VCFHeaderLine("ID", "<Description=\"ClinVar Variation ID\">") },
 		};
 	}
 
diff --git a/src/test/java/htsjdk/variant/vcf/VCFCompoundHeaderLineUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFCompoundHeaderLineUnitTest.java
index 2ba980fbb9..d0cc69d565 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFCompoundHeaderLineUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFCompoundHeaderLineUnitTest.java
@@ -142,6 +142,49 @@ public void testRepairFlagTypeWithNegativeCount() {
         Assert.assertEquals(infoLine.getCount(), 0);
     }
 
+    @DataProvider(name = "validHeaderIDs")
+    public Object[][] validHeaderIDs() {
+        return new Object[][] {
+            // 1000 Genomes ID key requires special handling
+            {new VCFInfoHeaderLine("<ID=1000G,Number=A,Type=Integer,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION)},
+            // Test all characters allowed after first character
+            {new VCFInfoHeaderLine("<ID=A_,Number=A,Type=Integer,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION)},
+            {new VCFInfoHeaderLine("<ID=A.,Number=A,Type=Integer,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION)},
+            {new VCFInfoHeaderLine("<ID=A0,Number=A,Type=Integer,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION)},
+            // ID can start with underscore _
+            {new VCFInfoHeaderLine("<ID=_A,Number=A,Type=Integer,Description=\"foo\">", VCFHeader.DEFAULT_VCF_VERSION)},
+        };
+    }
+
+    @Test(dataProvider = "validHeaderIDs")
+    public void testValidHeaderIDs(final VCFCompoundHeaderLine line) {
+        line.validateForVersion(VCFHeader.DEFAULT_VCF_VERSION);
+    }
+
+    @DataProvider(name = "invalidHeaderIDs")
+    public Object[][] invalidHeaderIDs() {
+        return new Object[][] {
+            // 1000G key is only allowed for INFO lines, not FORMAT
+            {new VCFFormatHeaderLine("<ID=1000G,Number=A,Type=Integer,Description=\"foo\">", VCFHeaderVersion.VCF4_2)},
+            // Key with 1000G key as prefix should be rejected
+            {new VCFInfoHeaderLine("<ID=1000GA,Number=A,Type=Integer,Description=\"foo\">", VCFHeaderVersion.VCF4_2)},
+            // Key cannot start with number
+            {new VCFInfoHeaderLine("<ID=1A,Number=A,Type=Integer,Description=\"foo\">", VCFHeaderVersion.VCF4_2)},
+            // Key containing invalid character -
+            {new VCFInfoHeaderLine("<ID=A-,Number=A,Type=Integer,Description=\"foo\">", VCFHeaderVersion.VCF4_2)},
+        };
+    }
+
+    @Test(dataProvider = "invalidHeaderIDs")
+    public void testPre43LenientHandling(final VCFCompoundHeaderLine line) {
+        line.validateForVersion(VCFHeaderVersion.VCF4_2);
+    }
+
+    @Test(dataProvider = "invalidHeaderIDs", expectedExceptions = TribbleException.class)
+    public void testInvalidHeaderIDs(final VCFCompoundHeaderLine line) {
+        line.validateForVersion(VCFHeader.DEFAULT_VCF_VERSION);
+    }
+
     @DataProvider (name = "equalsData")
     public Object[][] getEqualsData() {
         return new Object[][] {
diff --git a/src/test/java/htsjdk/variant/vcf/VCFFileReaderTest.java b/src/test/java/htsjdk/variant/vcf/VCFFileReaderTest.java
index 383d272a8d..b6835e1d25 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFFileReaderTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFFileReaderTest.java
@@ -75,7 +75,10 @@ Object[][] pathsData() {
                 {TEST_DATA_DIR + "Vcf4.2WithSourceVersionInfoFields.vcf", null, false, true},
 //
 //                // should reject bcf v2.2 on read, see issue https://github.com/samtools/htsjdk/issues/1323
-                {TEST_DATA_DIR + "BCFVersion22Uncompressed.bcf", null, false, false}
+                {TEST_DATA_DIR + "BCFVersion22Uncompressed.bcf", null, false, true},
+
+                // Test that gzipped BCFs can be read
+                {TEST_DATA_DIR + "bcfV22.bcf", null, false, true}
         };
     }
 
diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderLineTranslatorUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderLineTranslatorUnitTest.java
index 94859c8717..0ddacd7ec7 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFHeaderLineTranslatorUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderLineTranslatorUnitTest.java
@@ -103,8 +103,6 @@ private Object[][] getInvalidHeaderLines() {
         return new Object[][]{
                 // to parse, expected, recommended, error message
                 {"<Description=\"Y\",ID=X>", idDesc, none, "Unexpected tag or tag order for tag \"Description\""},
-                {"<ID=X,Desc=\"Y\">", idDesc, none, "Unexpected tag or tag order for tag \"Desc\""},
-                {"<>", idDesc, none, "Unexpected tag or tag order for tag \"\""},
 
                 {"<Source=\"source\",ID=X,Description=\"Y\">", idDesc, sourceVersion,
                         "Unexpected tag or tag order for tag \"Source\""},
diff --git a/src/test/java/htsjdk/variant/vcf/VCFIteratorTest.java b/src/test/java/htsjdk/variant/vcf/VCFIteratorTest.java
index 02ccdb2a33..6664501df6 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFIteratorTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFIteratorTest.java
@@ -50,21 +50,13 @@ public class VCFIteratorTest extends VariantBaseTest {
 
     @DataProvider(name = "VariantFiles")
     public Object[][] getVariantFiles() {
-        return new Object[][] { 
+        return new Object[][] {
                 new Object[] { "src/test/resources/htsjdk/tribble/tabix/testTabixIndex.vcf", 25 },
                 new Object[] { "src/test/resources/htsjdk/tribble/tabix/testTabixIndex.vcf.gz", 25 },
                 new Object[] { "src/test/resources/htsjdk/variant/serialization_test.bcf", 12 }
         };
     }
 
-    @DataProvider(name = "VcfFiles")
-    public Object[][] getVcfFiles() {
-        return new Object[][] {
-                new Object[] { "src/test/resources/htsjdk/tribble/tabix/testTabixIndex.vcf", 25 },
-                new Object[] { "src/test/resources/htsjdk/tribble/tabix/testTabixIndex.vcf.gz", 25 }
-        };
-    }
-
     private void assertExpectedNumberOfVariants(final VCFIterator r, final int expectVariants) {
         try {
             Assert.assertNotNull(r.getHeader());
@@ -91,32 +83,34 @@ public void testUsingFile(final String file, final int nVariants) throws IOExcep
     private void testUsingZippedInput(final String filepath, final int nVariants,
             final Function<File,OutputStream> outputStreamProvider) throws IOException {
     	File tmp =  new File(filepath);
+        // TODO I don't understand what problem the comment below is referencing
+        //  Does it mean the code paths for handling zipped/unzipped BCFs should be unified
+        //  under VCFFileReader once VCFFileReader supports zipped BCF?
+
         /* TODO fix this when VCFFileReader will support BCF see 
          * https://github.com/samtools/htsjdk/pull/837#discussion_r139490218
          * https://github.com/samtools/htsjdk/issues/946
          */
-        if( tmp.getName().endsWith(FileExtensions.VCF)) {
+        if(!tmp.getName().endsWith(FileExtensions.COMPRESSED_VCF)) {
             tmp = File.createTempFile("tmp",FileExtensions.COMPRESSED_VCF);
             tmp.deleteOnExit();
             try(    FileInputStream in = new FileInputStream(filepath);
                     OutputStream out =  outputStreamProvider.apply(tmp); ) {
                     IOUtil.copyStream(in, out);
                     out.flush();
-               } catch(final IOException err) {
-                   throw err;
-               }
             }
+        }
         try (final VCFIterator r = new VCFIteratorBuilder().open(tmp) ) {
             assertExpectedNumberOfVariants(r, nVariants);
         }
     }
 
-    @Test(dataProvider = "VcfFiles")
+    @Test(dataProvider = "VariantFiles")
     public void testUsingBGZippedInput(final String filepath, final int nVariants) throws IOException {
         testUsingZippedInput(filepath, nVariants, (F)-> new BlockCompressedOutputStream(F));
     }
 
-    @Test(dataProvider = "VcfFiles")
+    @Test(dataProvider = "VariantFiles")
     public void testUsingGZippedInput(final String filepath, final int nVariants) throws IOException {
         testUsingZippedInput(filepath, nVariants, (F)-> {
             try {
diff --git a/src/test/resources/htsjdk/variant/bcfV22.bcf b/src/test/resources/htsjdk/variant/bcfV22.bcf
new file mode 100644
index 0000000000000000000000000000000000000000..8ded3b5103f67cfbbfbc8d852204e12b44e4658e
GIT binary patch
literal 613
zcmV-r0-F6FiwFb&00000{{{d;LjnLu0>zR+Z__Xo$Fpn#rQ)(v4}cV!1Bb0OZ6_g3
z1gT4x5~<MAZi8FpCT{9Ajf?GcqY~mHaOA^qL;{Hq0$&3=70|@K6UmbGe?R}<%X{z4
z^G);AGz`a?;RMYHT|(|&dH(vO(X2R*AM`KINA9zr>kd2P@o^Wikm41`Bz0?@B(cRS
zjwrKNkTDe1j)U%7_sMhdG9)R-asINYRa?2*EUE2WZI#rMTy0Br8qPQ&37f1jy15dm
zWhu=@>uAndf~R$H%_}c(9$7A78oBar*NF`-nRp4{0;5fqf*?Y0NXaz{IRi1|3&3ay
zIAR=7#7IU%!~mf&m{I~GoHCAi#u1pKB$=W#LKNh=Kp^h7K7u@>$QS+0Jtk{NF-$p|
zi1tpfL>gzF-)d!PNR}&+L3FMm>b2_wJNy5&t;1O(-c%g##c1%-8V<%*(6vVAzSZep
zSg$WTeQUcFR`ALnR1Dkr_l4r@iSsp^dxl}=S`}7%)y|>uXove|Y@-Ek`ncDt_6`e%
z-*txJ*3jAEeu#5W=Axeb%9hnVjV0Pz)^@o20@^Qgv!@!z2Rg@i>$uB3)Nyl_`?;^-
zu*&_?ao1{dB&RfIp>0n4Kx3JxsXyqrrOJKQacPm0aqXOm%H1<H+^RS{8Fx(0@ZOb&
zC&`aGN2+H2qT^VRGk5p`$5rk~w+vZvcz-x$cxz>NGVn=fc~eX+oAO5YE4M0lN4H<^
zizQ0#u5Q0Je@pxgM#&zZWCj2LABzYC000000RIL6LPG)o8vp|U0000000000Jdz^x

literal 0
HcmV?d00001

diff --git a/src/test/resources/htsjdk/variant/bcfV22.bcf.gz b/src/test/resources/htsjdk/variant/bcfV22.bcf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..8ded3b5103f67cfbbfbc8d852204e12b44e4658e
GIT binary patch
literal 613
zcmV-r0-F6FiwFb&00000{{{d;LjnLu0>zR+Z__Xo$Fpn#rQ)(v4}cV!1Bb0OZ6_g3
z1gT4x5~<MAZi8FpCT{9Ajf?GcqY~mHaOA^qL;{Hq0$&3=70|@K6UmbGe?R}<%X{z4
z^G);AGz`a?;RMYHT|(|&dH(vO(X2R*AM`KINA9zr>kd2P@o^Wikm41`Bz0?@B(cRS
zjwrKNkTDe1j)U%7_sMhdG9)R-asINYRa?2*EUE2WZI#rMTy0Br8qPQ&37f1jy15dm
zWhu=@>uAndf~R$H%_}c(9$7A78oBar*NF`-nRp4{0;5fqf*?Y0NXaz{IRi1|3&3ay
zIAR=7#7IU%!~mf&m{I~GoHCAi#u1pKB$=W#LKNh=Kp^h7K7u@>$QS+0Jtk{NF-$p|
zi1tpfL>gzF-)d!PNR}&+L3FMm>b2_wJNy5&t;1O(-c%g##c1%-8V<%*(6vVAzSZep
zSg$WTeQUcFR`ALnR1Dkr_l4r@iSsp^dxl}=S`}7%)y|>uXove|Y@-Ek`ncDt_6`e%
z-*txJ*3jAEeu#5W=Axeb%9hnVjV0Pz)^@o20@^Qgv!@!z2Rg@i>$uB3)Nyl_`?;^-
zu*&_?ao1{dB&RfIp>0n4Kx3JxsXyqrrOJKQacPm0aqXOm%H1<H+^RS{8Fx(0@ZOb&
zC&`aGN2+H2qT^VRGk5p`$5rk~w+vZvcz-x$cxz>NGVn=fc~eX+oAO5YE4M0lN4H<^
zizQ0#u5Q0Je@pxgM#&zZWCj2LABzYC000000RIL6LPG)o8vp|U0000000000Jdz^x

literal 0
HcmV?d00001

diff --git a/src/test/resources/htsjdk/variant/structuralvariants.vcf b/src/test/resources/htsjdk/variant/structuralvariants.vcf
index 5ffad2f94c..4de882ea49 100644
--- a/src/test/resources/htsjdk/variant/structuralvariants.vcf
+++ b/src/test/resources/htsjdk/variant/structuralvariants.vcf
@@ -7,7 +7,7 @@
 ##INFO=<ID=STRAND_1,Number=1,Type=String,Description="Strand Orientation of SV Start">
 ##INFO=<ID=STRAND_2,Number=1,Type=String,Description="Strand Orientation of SV End">
 ##INFO=<ID=METHOD,Number=1,Type=String,Description="SV Caller used to predict">
-##INFO=<ID=DP,Number=1,Type=String,Description="combined depth across samples">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="combined depth across samples">
 ##ALT=<ID=DEL,Description="Deletion">
 ##ALT=<ID=DUP,Description="Duplication">
 ##ALT=<ID=INS,Description="Insertion of novel sequence">
diff --git a/src/test/resources/htsjdk/variant/test1.vcf b/src/test/resources/htsjdk/variant/test1.vcf
index 39bed22e75..55566f3365 100644
--- a/src/test/resources/htsjdk/variant/test1.vcf
+++ b/src/test/resources/htsjdk/variant/test1.vcf
@@ -48,6 +48,6 @@
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA12878	NA12891	NA12892
 1	8216712	rs11121115	A	G	1540.26	PASS	AC=3;AF=0.500;AN=6;BaseQRankSum=0.917;DB;DP=131;Dels=0.00;FS=11.67;HaplotypeScore=3.35;MLEAC=3;MLEAF=0.500;MQ=57.74;MQ0=1;MQRankSum=0.427;QD=11.76;ReadPosRankSum=-2.190e-01;SB=-9.390e+02;VQSLOD=5.53;culprit=HaplotypeScore	GT:AD:DP:GQ:PL:TP	0/1:23,28:51:99:681,0,668:127	0/1:16,18:34:99:338,0,244:127	0/1:24,22:46:99:560,0,323:127
 1	17032814	rs2773183	T	C	2828.26	PASS	AC=3;AF=0.500;AN=6;BaseQRankSum=-3.879e+00;DB;DP=322;Dels=0.00;FS=2.43;HaplotypeScore=15.45;MLEAC=3;MLEAF=0.500;MQ=56.86;MQ0=0;MQRankSum=2.92;QD=8.78;ReadPosRankSum=-1.245e+00;SB=-1.943e+03;VQSLOD=-1.421e+00;culprit=HaplotypeScore	GT:AD:DP:GQ:PL:TP	0/1:63,59:122:99:1434,0,1831:127	0/1:53,56:109:99:910,0,871:127	0/1:61,30:91:99:523,0,1257:127
-1	17032818	rs2773183	T	C	2828.26	FILTER	AC=3;AF=0.500;AN=6;BaseQRankSum=-3.879e+00;DB;DP=322;Dels=0.00;FS=2.43;HaplotypeScore=15.45;MLEAC=3;MLEAF=0.500;MQ=56.86;MQ0=0;MQRankSum=2.92;QD=8.78;ReadPosRankSum=-1.245e+00;SB=-1.943e+03;VQSLOD=-1.421e+00;culprit=HaplotypeScore	GT:AD:DP:GQ:PL:TP	0/1:63,59:122:99:1434,0,1831:127	0/1:53,56:109:99:910,0,871:127	0/1:61,30:91:99:523,0,1257:127
+1	17032818	rs2773183	T	C	2828.26	LowQual	AC=3;AF=0.500;AN=6;BaseQRankSum=-3.879e+00;DB;DP=322;Dels=0.00;FS=2.43;HaplotypeScore=15.45;MLEAC=3;MLEAF=0.500;MQ=56.86;MQ0=0;MQRankSum=2.92;QD=8.78;ReadPosRankSum=-1.245e+00;SB=-1.943e+03;VQSLOD=-1.421e+00;culprit=HaplotypeScore	GT:AD:DP:GQ:PL:TP	0/1:63,59:122:99:1434,0,1831:127	0/1:53,56:109:99:910,0,871:127	0/1:61,30:91:99:523,0,1257:127
 2	1143476	rs4998209	C	T	1483.26	PASS	AC=2;AF=0.333;AN=6;BaseQRankSum=-4.814e+00;DB;DP=189;Dels=0.00;FS=5.61;HaplotypeScore=0.324;MLEAC=2;MLEAF=0.333;MQ=58.36;MQ0=0;MQRankSum=1.58;QD=12.06;ReadPosRankSum=0.326;SB=-9.320e+02;VQSLOD=6.81;culprit=HaplotypeScore	GT:AD:DP:GQ:PL:TP	0|0:66,0:66:99:0,178,2264:127	0|1:33,38:71:99:844,0,1024:127	0|1:26,26:52:99:678,0,719:127
 2	9240279	rs56249990	A	G	3978.01	PASS	AC=3;AF=0.500;AN=6;BaseQRankSum=1.70;DB;DP=213;Dels=0.00;FS=7.83;HaplotypeScore=1.19;MLEAC=3;MLEAF=0.500;MQ=59.40;MQ0=0;MQRankSum=0.143;QD=27.25;ReadPosRankSum=-9.700e-02;SB=-1.991e+03;VQSLOD=9.14;culprit=FS	GT:AD:DP:GQ:PL:TP	0|1:33,42:75:99:1400,0,1031:127	0|0:67,0:67:99:0,178,2277:127	1|1:0,71:71:99:2578,199,0:127

From 8c47db2b5873782ee53af08ec4298e8267d5fc27 Mon Sep 17 00:00:00 2001
From: Anders Leung <aleung@ml6062-aleung.lan>
Date: Mon, 29 Nov 2021 16:40:30 -0500
Subject: [PATCH 08/22] Make scripts/install-bcftools.sh executable

---
 scripts/install-bcftools.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 scripts/install-bcftools.sh

diff --git a/scripts/install-bcftools.sh b/scripts/install-bcftools.sh
old mode 100644
new mode 100755

From eede351aca7e22d766f6ece473b72f3cc812dca6 Mon Sep 17 00:00:00 2001
From: Anders Leung <aleung@ml6062-aleung.lan>
Date: Mon, 29 Nov 2021 16:44:40 -0500
Subject: [PATCH 09/22] Add installing bcftools step in github workflow

---
 .github/workflows/tests.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index cbc1966dd6..38ba6b23ab 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -36,6 +36,8 @@ jobs:
         run: ./gradlew compileJava
       - name: Install Samtools
         run: scripts/install-samtools.sh
+      - name: Install Bcftools
+        run: scripts/install-bcftools.sh
       - name: Start the htsget server
         run: scripts/htsget-scripts/start-htsget-test-server.sh
       - name: Run tests

From ed64146fb4850abb4c274556d4c9eb7e6070a80d Mon Sep 17 00:00:00 2001
From: Anders Leung <aleung@ml6062-aleung.lan>
Date: Tue, 30 Nov 2021 08:41:37 -0500
Subject: [PATCH 10/22] Update to bcftools 1.14, set env variable in github
 workflow

---
 .github/workflows/tests.yml                       | 1 +
 scripts/install-bcftools.sh                       | 6 +++---
 src/test/java/htsjdk/utils/BCFToolsTestUtils.java | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 38ba6b23ab..74adfbcbf9 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -11,6 +11,7 @@ jobs:
   test:
     env:
       HTSJDK_SAMTOOLS_BIN: /usr/bin/samtools
+      HTSJDK_BCFTOOLS_BIN: /usr/bin/bcftools
     runs-on: ubuntu-latest
     strategy:
       matrix:
diff --git a/scripts/install-bcftools.sh b/scripts/install-bcftools.sh
index fca5a62134..1694c85912 100755
--- a/scripts/install-bcftools.sh
+++ b/scripts/install-bcftools.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 set -ex
-wget https://github.com/samtools/bcftools/releases/download/1.13/bcftools-1.13.tar.bz2
-tar -xjvf bcftools-1.13.tar.bz2
-cd bcftools-1.13 && ./configure --prefix=/usr && make && sudo make install
+wget https://github.com/samtools/bcftools/releases/download/1.14/bcftools-1.14.tar.bz2
+tar -xjvf bcftools-1.14.tar.bz2
+cd bcftools-1.14 && ./configure --prefix=/usr && make && sudo make install
diff --git a/src/test/java/htsjdk/utils/BCFToolsTestUtils.java b/src/test/java/htsjdk/utils/BCFToolsTestUtils.java
index 8193791e93..c6c4234f8e 100644
--- a/src/test/java/htsjdk/utils/BCFToolsTestUtils.java
+++ b/src/test/java/htsjdk/utils/BCFToolsTestUtils.java
@@ -12,7 +12,7 @@
 
 public class BCFToolsTestUtils {
     private static final String BCFTOOLS_BINARY_ENV_VARIABLE = "HTSJDK_BCFTOOLS_BIN";
-    public static final String expectedBCFtoolsVersion = "1.13";
+    public static final String expectedBCFtoolsVersion = "1.14";
 
     /**
      * @return true if bcftools is available, otherwise false

From f6fcac6ab8b285cab43e0f9d3075eb968fc7272a Mon Sep 17 00:00:00 2001
From: Anders Leung <aleung@ml6062-aleung.lan>
Date: Tue, 30 Nov 2021 14:21:48 -0500
Subject: [PATCH 11/22] Add tests for BCF2Dictionary, refactor
 BCF2WriterUnitTest

---
 .../htsjdk/variant/bcf2/BCF2Dictionary.java   |  15 +-
 .../variant/bcf2/BCF2DictionaryTest.java      | 164 ++++++++++++++----
 .../variant/bcf2/BCF2WriterUnitTest.java      |  34 ++--
 src/test/resources/htsjdk/variant/test2.vcf   |   2 +-
 4 files changed, 165 insertions(+), 50 deletions(-)

diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Dictionary.java b/src/main/java/htsjdk/variant/bcf2/BCF2Dictionary.java
index 7b30da8643..db2d342449 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Dictionary.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Dictionary.java
@@ -21,8 +21,7 @@
  * Dictionary of strings or contigs for use with a BCF file.
  * <p>
  * Provides an Integer -> String map interface, but determines during construction whether
- * mapping can be stored as an array (if it can be stored as a dense array) or
- * it must be stored using a map.
+ * the mapping can be stored as an array or it must be stored using a map.
  * <p>
  * This class validates that IDX fields are used as required by the BCF 2.2 spec, namely
  * that either all lines of a given dictionary type (contig or FORMAT/INFO/FILTER) have
@@ -126,11 +125,6 @@ private static BCF2Dictionary makeDictionary(
             for (final VCFSimpleHeaderLine line : headerLines) {
                 final String id = line.getID();
                 final int IDX = Integer.parseUnsignedInt(line.getGenericFieldValue(BCF2Codec.IDXField));
-                if (!seen.contains(id)) {
-                    seen.add(id);
-                    maxIDX = Math.max(maxIDX, IDX);
-                    strings.put(IDX, line.getID());
-                }
 
                 // Have we seen this IDX before with a different string?
                 if (strings.containsKey(IDX)) {
@@ -142,6 +136,13 @@ private static BCF2Dictionary makeDictionary(
                         ));
                     }
                 }
+
+                if (!seen.contains(id)) {
+                    seen.add(id);
+                    maxIDX = Math.max(maxIDX, IDX);
+                    strings.put(IDX, line.getID());
+                }
+
             }
             if (maxIDX == seen.size() - 1) {
                 // By the pigeonhole principle, if we have N unique non-negative IDXs numbered starting from 0
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2DictionaryTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2DictionaryTest.java
index 9d5b09a0ec..9fbf27842e 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCF2DictionaryTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2DictionaryTest.java
@@ -9,9 +9,7 @@
 import htsjdk.variant.vcf.VCFHeaderLine;
 import htsjdk.variant.vcf.VCFHeaderLineCount;
 import htsjdk.variant.vcf.VCFHeaderLineType;
-import htsjdk.variant.vcf.VCFHeaderVersion;
 import htsjdk.variant.vcf.VCFInfoHeaderLine;
-import htsjdk.variant.vcf.VCFSimpleHeaderLine;
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
@@ -57,45 +55,149 @@ public void testCreateDictionary(final BCF2Dictionary dict) {
         Assert.assertEquals(8, dict_size);
     }
 
-    /*
-    @DataProvider(name = "inconsistentIDXProvider")
-    public Object[][] inconsistentIDXProvider() {
-        final List<Object[]> cases = new ArrayList<>();
-
-        // TODO can't create FILTER/FORMAT/INFO lines with arbitrary attributes
-        //  should probably be addressed as part of refactoring, would be simpler and more consistent
-        for (final BCFVersion version : BCFVersion.SUPPORTED_VERSIONS) {
-            // String lines with inconsistent IDX
-            {
-                int counter = 0;
-                final List<VCFHeaderLine> inputLines = new ArrayList<>();
-                inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++)));
-                inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++)).getGenericFieldValue());
-
-                new VCFSimpleHeaderLine()
 
+    @DataProvider(name = "invalidIDXProvider")
+    public Object[][] invalidIDXProvider() {
+        final List<Object[]> cases = new ArrayList<>();
+        // String lines with inconsistent IDX
+        {
+            final LinkedHashSet<VCFHeaderLine> lines = new LinkedHashSet<>();
+            lines.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
+            lines.add(new VCFInfoHeaderLine(
+                "<ID=FOO,Number=A,Type=Integer,Description=\"test\",IDX=1>",
+                VCFHeader.DEFAULT_VCF_VERSION
+            ));
+            lines.add(new VCFInfoHeaderLine(
+                "<ID=BAR,Number=A,Type=Integer,Description=\"test\">",
+                VCFHeader.DEFAULT_VCF_VERSION
+            ));
+
+            final VCFHeader header = new VCFHeader(lines);
+            cases.add(new Object[]{header, BCFVersion.BCF2_2Version, true});
+        }
+        {
+            final LinkedHashSet<VCFHeaderLine> lines = new LinkedHashSet<>();
+            lines.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
+            lines.add(new VCFInfoHeaderLine(
+                "<ID=FOO,Number=A,Type=Integer,Description=\"test\">",
+                VCFHeader.DEFAULT_VCF_VERSION
+            ));
+            lines.add(new VCFInfoHeaderLine(
+                "<ID=BAR,Number=A,Type=Integer,Description=\"test\",IDX=2>",
+                VCFHeader.DEFAULT_VCF_VERSION
+            ));
+
+            final VCFHeader header = new VCFHeader(lines);
+            cases.add(new Object[]{header, BCFVersion.BCF2_2Version, true});
+        }
+        // Contig lines with inconsistent IDX
+        {
+            final LinkedHashSet<VCFHeaderLine> lines = new LinkedHashSet<>();
+            lines.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
+            lines.add(new VCFContigHeaderLine(
+                "<ID=chr3,Number=A,Type=Integer,Description=\"test\",IDX=3>",
+                VCFHeader.DEFAULT_VCF_VERSION,
+                3
+            ));
+            lines.add(new VCFContigHeaderLine(
+                "<ID=chr4,Number=A,Type=Integer,Description=\"test\">",
+                VCFHeader.DEFAULT_VCF_VERSION,
+                4
+            ));
+
+            final VCFHeader header = new VCFHeader(lines);
+            cases.add(new Object[]{header, BCFVersion.BCF2_2Version, false});
+        }
+        {
+            final LinkedHashSet<VCFHeaderLine> lines = new LinkedHashSet<>();
+            lines.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
+            lines.add(new VCFContigHeaderLine(
+                "<ID=chr3,Number=A,Type=Integer,Description=\"test\">",
+                VCFHeader.DEFAULT_VCF_VERSION,
+                3
+            ));
+            lines.add(new VCFContigHeaderLine(
+                "<ID=chr4,Number=A,Type=Integer,Description=\"test\",IDX=4>",
+                VCFHeader.DEFAULT_VCF_VERSION,
+                4
+            ));
+
+            final VCFHeader header = new VCFHeader(lines);
+            cases.add(new Object[]{header, BCFVersion.BCF2_2Version, false});
+        }
 
-                final VCFHeader header = new VCFHeader(new LinkedHashSet<>(inputLines));
-                final BCF2Dictionary dict = BCF2Dictionary.makeBCF2StringDictionary(header, version);
-                cases.add(new Object[]{dict});
-            }
-
-            // Contig lines with inconsistent IDX
-            {
-
-            }
+        // Headers with one IDX mapped to multiple strings/contigs
+        {
+            final LinkedHashSet<VCFHeaderLine> lines = new LinkedHashSet<>();
+            lines.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
+            lines.add(new VCFInfoHeaderLine(
+                "<ID=FOO,Number=A,Type=Integer,Description=\"test\",IDX=2>",
+                VCFHeader.DEFAULT_VCF_VERSION
+            ));
+            lines.add(new VCFInfoHeaderLine(
+                "<ID=BAR,Number=A,Type=Integer,Description=\"test\",IDX=2>",
+                VCFHeader.DEFAULT_VCF_VERSION
+            ));
+
+            final VCFHeader header = new VCFHeader(lines);
+            cases.add(new Object[]{header, BCFVersion.BCF2_2Version, true});
         }
 
         return cases.toArray(new Object[0][]);
     }
 
-    @Test(expectedExceptions = {TribbleException.class})
-    public void inconsistentIDX(final VCFHeader header, final BCFVersion version, final boolean string) {
-        if (string) {
+    @Test(dataProvider = "invalidIDXProvider", expectedExceptions = TribbleException.class)
+    public void invalidIDXUsage(final VCFHeader header, final BCFVersion version, final boolean isString) {
+        if (isString) {
             BCF2Dictionary.makeBCF2StringDictionary(header, version);
         } else {
             BCF2Dictionary.makeBCF2ContigDictionary(header, version);
         }
     }
-     */
+
+    @Test
+    public void testOutOfOrderAndMissingIDX() {
+        final LinkedHashSet<VCFHeaderLine> lines = new LinkedHashSet<>();
+        lines.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
+        lines.add(new VCFInfoHeaderLine(
+            "<ID=FOO,Number=A,Type=Integer,Description=\"test\",IDX=6>",
+            VCFHeader.DEFAULT_VCF_VERSION
+        ));
+        lines.add(new VCFInfoHeaderLine(
+            "<ID=BAR,Number=A,Type=Integer,Description=\"test\",IDX=4>",
+            VCFHeader.DEFAULT_VCF_VERSION
+        ));
+        lines.add(new VCFInfoHeaderLine(
+            "<ID=BAZ,Number=A,Type=Integer,Description=\"test\",IDX=2>",
+            VCFHeader.DEFAULT_VCF_VERSION
+        ));
+        final VCFHeader header = new VCFHeader(lines);
+
+        final BCF2Dictionary stringDict = BCF2Dictionary.makeBCF2StringDictionary(header, BCFVersion.BCF2_2Version);
+        Assert.assertEquals(stringDict.get(6), "FOO");
+        Assert.assertEquals(stringDict.get(4), "BAR");
+        Assert.assertEquals(stringDict.get(2), "BAZ");
+    }
+
+    @Test
+    public void testLinesWithDifferentKeySameIDShareIDX() {
+        final LinkedHashSet<VCFHeaderLine> lines = new LinkedHashSet<>();
+        lines.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
+        lines.add(new VCFInfoHeaderLine(
+            "<ID=FOO,Number=A,Type=Integer,Description=\"test\",IDX=2>",
+            VCFHeader.DEFAULT_VCF_VERSION
+        ));
+        lines.add(new VCFFormatHeaderLine(
+            "<ID=FOO,Number=A,Type=Integer,Description=\"test\",IDX=2>",
+            VCFHeader.DEFAULT_VCF_VERSION
+        ));
+        lines.add(new VCFFilterHeaderLine(
+            "<ID=FOO,Description=\"test\",IDX=2>",
+            VCFHeader.DEFAULT_VCF_VERSION
+        ));
+        final VCFHeader header = new VCFHeader(lines);
+
+        final BCF2Dictionary stringDict = BCF2Dictionary.makeBCF2StringDictionary(header, BCFVersion.BCF2_2Version);
+        Assert.assertEquals(stringDict.get(2), "FOO");
+    }
 }
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java
index 0bc493bf15..83258c54ae 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java
@@ -343,26 +343,22 @@ private static VariantContext createVC(final VCFHeader header) {
     }
 
     @DataProvider
-    public Object[][] bcftoolsRoundTripProvider() {
+    public Object[][] bcftoolsReadsHtsjdkOutputProvider() {
         return new Object[][]{
             {"phased.vcf"},
             {"test1.vcf"},
+            {"test2.vcf"},
             {"NA12891.vcf"},
             {"NA12891.fp.vcf"},
-            {"dbsnp_135.b37.1000.vcf"},
             {"structuralvariants.vcf"},
-            // TODO the test testBCFToolsReadsHtsjdkOutput fails for the following two files
-            //  due to what appears to be a bug in bcftools' VCF output where missing FORMAT
-            //  values are sometimes encoded as an empty string and not '.'
-            //  This seems to have something to do with the affected keys being in trailing
-            //  position in the original VCF (trailing missing values can be dropped), and
-            //  htsjdk reordering FORMAT keys by sorting them alphabetically
+            // These two tests appear to fail because of a bcftools bug
 //            {"ex2.vcf"},
 //            {"test.vcf.bgz"},
+            {"vcf43/all43Features.utf8.vcf"}
         };
     }
 
-    @Test(dataProvider = "bcftoolsRoundTripProvider")
+    @Test(dataProvider = "bcftoolsReadsHtsjdkOutputProvider")
     public void testBCFToolsReadsHtsjdkOutput(final String testFile) throws IOException {
         // Take an input VCF and read it into memory as our expected output
         // Take the same VCF and write it out as a BCF using htsjdk's BCF2Writer, use bcftools to convert from
@@ -382,7 +378,7 @@ public void testBCFToolsReadsHtsjdkOutput(final String testFile) throws IOExcept
         ) {
             writer.writeHeader(header);
             for (final VariantContext vc : expectedVariantContexts) {
-                writer.add(vc);
+                writer.add(vc.fullyDecode(header, false));
             }
         }
 
@@ -404,7 +400,23 @@ public void testBCFToolsReadsHtsjdkOutput(final String testFile) throws IOExcept
         }
     }
 
-    @Test(dataProvider = "bcftoolsRoundTripProvider")
+    @DataProvider
+    public Object[][] htsjdkReadsBCFToolsOutputProvider() {
+        return new Object[][]{
+            {"phased.vcf"},
+            {"test1.vcf"},
+            {"test2.vcf"},
+            {"NA12891.vcf"},
+            {"NA12891.fp.vcf"},
+            {"structuralvariants.vcf"},
+            {"ex2.vcf"},
+            {"test.vcf.bgz"},
+            // bcftools does not to decoding of percent encoded VCFs, so its BCF output contains the literal characters
+//            {"vcf43/all43Features.utf8.vcf"}
+        };
+    }
+
+    @Test(dataProvider = "htsjdkReadsBCFToolsOutputProvider")
     public void testHtsjdkReadsBCFToolsOutput(final String testFile) {
         // Take an input VCF and read it into memory as our expected output
         // Take the same VCF and convert it to BCF using bcftools, then read the BCF into memory again as our actual output
diff --git a/src/test/resources/htsjdk/variant/test2.vcf b/src/test/resources/htsjdk/variant/test2.vcf
index 39bed22e75..55566f3365 100644
--- a/src/test/resources/htsjdk/variant/test2.vcf
+++ b/src/test/resources/htsjdk/variant/test2.vcf
@@ -48,6 +48,6 @@
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA12878	NA12891	NA12892
 1	8216712	rs11121115	A	G	1540.26	PASS	AC=3;AF=0.500;AN=6;BaseQRankSum=0.917;DB;DP=131;Dels=0.00;FS=11.67;HaplotypeScore=3.35;MLEAC=3;MLEAF=0.500;MQ=57.74;MQ0=1;MQRankSum=0.427;QD=11.76;ReadPosRankSum=-2.190e-01;SB=-9.390e+02;VQSLOD=5.53;culprit=HaplotypeScore	GT:AD:DP:GQ:PL:TP	0/1:23,28:51:99:681,0,668:127	0/1:16,18:34:99:338,0,244:127	0/1:24,22:46:99:560,0,323:127
 1	17032814	rs2773183	T	C	2828.26	PASS	AC=3;AF=0.500;AN=6;BaseQRankSum=-3.879e+00;DB;DP=322;Dels=0.00;FS=2.43;HaplotypeScore=15.45;MLEAC=3;MLEAF=0.500;MQ=56.86;MQ0=0;MQRankSum=2.92;QD=8.78;ReadPosRankSum=-1.245e+00;SB=-1.943e+03;VQSLOD=-1.421e+00;culprit=HaplotypeScore	GT:AD:DP:GQ:PL:TP	0/1:63,59:122:99:1434,0,1831:127	0/1:53,56:109:99:910,0,871:127	0/1:61,30:91:99:523,0,1257:127
-1	17032818	rs2773183	T	C	2828.26	FILTER	AC=3;AF=0.500;AN=6;BaseQRankSum=-3.879e+00;DB;DP=322;Dels=0.00;FS=2.43;HaplotypeScore=15.45;MLEAC=3;MLEAF=0.500;MQ=56.86;MQ0=0;MQRankSum=2.92;QD=8.78;ReadPosRankSum=-1.245e+00;SB=-1.943e+03;VQSLOD=-1.421e+00;culprit=HaplotypeScore	GT:AD:DP:GQ:PL:TP	0/1:63,59:122:99:1434,0,1831:127	0/1:53,56:109:99:910,0,871:127	0/1:61,30:91:99:523,0,1257:127
+1	17032818	rs2773183	T	C	2828.26	LowQual	AC=3;AF=0.500;AN=6;BaseQRankSum=-3.879e+00;DB;DP=322;Dels=0.00;FS=2.43;HaplotypeScore=15.45;MLEAC=3;MLEAF=0.500;MQ=56.86;MQ0=0;MQRankSum=2.92;QD=8.78;ReadPosRankSum=-1.245e+00;SB=-1.943e+03;VQSLOD=-1.421e+00;culprit=HaplotypeScore	GT:AD:DP:GQ:PL:TP	0/1:63,59:122:99:1434,0,1831:127	0/1:53,56:109:99:910,0,871:127	0/1:61,30:91:99:523,0,1257:127
 2	1143476	rs4998209	C	T	1483.26	PASS	AC=2;AF=0.333;AN=6;BaseQRankSum=-4.814e+00;DB;DP=189;Dels=0.00;FS=5.61;HaplotypeScore=0.324;MLEAC=2;MLEAF=0.333;MQ=58.36;MQ0=0;MQRankSum=1.58;QD=12.06;ReadPosRankSum=0.326;SB=-9.320e+02;VQSLOD=6.81;culprit=HaplotypeScore	GT:AD:DP:GQ:PL:TP	0|0:66,0:66:99:0,178,2264:127	0|1:33,38:71:99:844,0,1024:127	0|1:26,26:52:99:678,0,719:127
 2	9240279	rs56249990	A	G	3978.01	PASS	AC=3;AF=0.500;AN=6;BaseQRankSum=1.70;DB;DP=213;Dels=0.00;FS=7.83;HaplotypeScore=1.19;MLEAC=3;MLEAF=0.500;MQ=59.40;MQ0=0;MQRankSum=0.143;QD=27.25;ReadPosRankSum=-9.700e-02;SB=-1.991e+03;VQSLOD=9.14;culprit=FS	GT:AD:DP:GQ:PL:TP	0|1:33,42:75:99:1400,0,1031:127	0|0:67,0:67:99:0,178,2277:127	1|1:0,71:71:99:2578,199,0:127

From 9b2d77c1dd762dd4b71cbfa61adc21a8f508b54f Mon Sep 17 00:00:00 2001
From: Anders Leung <aleung@ml6062-aleung.lan>
Date: Mon, 6 Dec 2021 16:53:54 -0500
Subject: [PATCH 12/22] Begin removing BCF 2.1, update tests files

---
 .../java/htsjdk/samtools/util/IOUtil.java     |   3 ++-
 .../htsjdk/tribble/index/IndexFactory.java    |   8 ++++++--
 .../java/htsjdk/variant/bcf2/BCF2Codec.java   |   2 ++
 .../java/htsjdk/variant/bcf2/BCFVersion.java  |   3 ++-
 .../tribble/index/IndexFactoryTest.java       |   9 +++------
 .../htsjdk/variant/vcf/VCFIteratorTest.java   |   2 +-
 .../htsjdk/variant/VcfThatLacksAnIndex.bcf    | Bin 7490 -> 3158 bytes
 .../htsjdk/variant/serialization_test.bcf     | Bin 7490 -> 3158 bytes
 .../variant/serialization_test_unzipped.bcf   | Bin 0 -> 7477 bytes
 9 files changed, 16 insertions(+), 11 deletions(-)
 create mode 100644 src/test/resources/htsjdk/variant/serialization_test_unzipped.bcf

diff --git a/src/main/java/htsjdk/samtools/util/IOUtil.java b/src/main/java/htsjdk/samtools/util/IOUtil.java
index 81351e297a..2d97d2284c 100755
--- a/src/main/java/htsjdk/samtools/util/IOUtil.java
+++ b/src/main/java/htsjdk/samtools/util/IOUtil.java
@@ -1277,7 +1277,8 @@ public static List<Path> filesToPaths(Collection<File> files){
      */
     public static boolean isGZIPInputStream(final InputStream stream) {
         if (!stream.markSupported()) {
-            throw new IllegalArgumentException("isGZIPInputStream() : Cannot test a stream that doesn't support marking.");
+            // BufferedInputStream supports mark
+            return isGZIPInputStream(new BufferedInputStream(stream));
         }
         stream.mark(GZIP_HEADER_READ_LENGTH);
 
diff --git a/src/main/java/htsjdk/tribble/index/IndexFactory.java b/src/main/java/htsjdk/tribble/index/IndexFactory.java
index 1e26c33300..be21977a2c 100644
--- a/src/main/java/htsjdk/tribble/index/IndexFactory.java
+++ b/src/main/java/htsjdk/tribble/index/IndexFactory.java
@@ -595,7 +595,9 @@ public FeatureIterator(final Path inputPath, final FeatureCodec<FEATURE_TYPE, SO
             try {
                 // Since we modified inputPath above, we MUST use this.inputPath for all checks and file creations
                 // for the rest of this method!
-                if (IOUtil.hasBlockCompressedExtension(this.inputPath)) {
+                if (IOUtil.hasBlockCompressedExtension(this.inputPath)
+                    || ((this.inputPath.toString().endsWith(FileExtensions.BCF)) && IOUtil.isGZIPInputStream(IOUtil.openFileForReading(this.inputPath)))
+                ) {
                     final BlockCompressedInputStream bcs = initIndexableBlockCompressedStream(this.inputPath);
                     source = (SOURCE) codec.makeIndexableSourceFromStream(bcs);
                 } else {
@@ -623,7 +625,9 @@ private static PositionalBufferedStream initIndexablePositionalStream(final Path
         private static BlockCompressedInputStream initIndexableBlockCompressedStream(final Path inputPath) {
             // test that this is in fact a valid block compressed file
             try {
-                if (!IOUtil.isBlockCompressed(inputPath, true)) {
+                if (!(IOUtil.isBlockCompressed(inputPath, true)
+                || (inputPath.toString().endsWith(FileExtensions.BCF)) && IOUtil.isGZIPInputStream(IOUtil.openFileForReading(inputPath)))
+                ) {
                     throw new TribbleException.MalformedFeatureFile("Input file is not in valid block compressed format.",
                             inputPath.toString());
                 }
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
index ce999574aa..62fcd3ede4 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
@@ -193,6 +193,8 @@ public FeatureCodecHeader readHeader(final PositionalBufferedStream inputStream)
             bcfVersion = BCFVersion.readBCFVersion(inputStream);
             if (bcfVersion == null) {
                 error("Input stream does not contain a BCF encoded file; BCF magic header info not found");
+            } else if (!BCFVersion.SUPPORTED_VERSIONS.contains(bcfVersion)) {
+                error(bcfVersion + " is not supported by htsjdk");
             }
 
             decoder = BCF2Decoder.getDecoder(bcfVersion);
diff --git a/src/main/java/htsjdk/variant/bcf2/BCFVersion.java b/src/main/java/htsjdk/variant/bcf2/BCFVersion.java
index 7bec9ef192..788cb60e88 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCFVersion.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCFVersion.java
@@ -29,6 +29,7 @@
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.Set;
 
@@ -49,7 +50,7 @@ public final class BCFVersion {
     public static final BCFVersion BCF2_1Version = new BCFVersion(2, 1);
     public static final BCFVersion BCF2_2Version = new BCFVersion(2, 2);
 
-    public static final Set<BCFVersion> SUPPORTED_VERSIONS = new HashSet<>(Arrays.asList(BCF2_1Version, BCF2_2Version));
+    public static final Set<BCFVersion> SUPPORTED_VERSIONS = new HashSet<>(Collections.singletonList(BCF2_2Version));
 
 
     final int majorVersion;
diff --git a/src/test/java/htsjdk/tribble/index/IndexFactoryTest.java b/src/test/java/htsjdk/tribble/index/IndexFactoryTest.java
index e127fd4b2f..648f7080cd 100644
--- a/src/test/java/htsjdk/tribble/index/IndexFactoryTest.java
+++ b/src/test/java/htsjdk/tribble/index/IndexFactoryTest.java
@@ -196,12 +196,9 @@ public void testCreateTabixIndexFromVCF(
     @DataProvider(name = "bcfDataFactory")
     public Object[][] getBCFData(){
         return new Object[][] {
-                //TODO: this needs more test cases, including block compressed and indexed, but bcftools can't
-                // generate indices for BCF2.1 files, which is all HTSJDK can read, and htsjdk also can't read/write
-                // block compressed BCFs (https://github.com/samtools/htsjdk/issues/946)
-                new Object[] {
-                        new File("src/test/resources/htsjdk/variant/serialization_test.bcf")
-                }
+            {new File("src/test/resources/htsjdk/variant/serialization_test_unzipped.bcf")},
+            // TODO: this needs more test cases, including block compressed and indexed
+//            {new File("src/test/resources/htsjdk/variant/serialization_test.bcf")},
         };
     }
 
diff --git a/src/test/java/htsjdk/variant/vcf/VCFIteratorTest.java b/src/test/java/htsjdk/variant/vcf/VCFIteratorTest.java
index 6664501df6..4030c180e2 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFIteratorTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFIteratorTest.java
@@ -91,7 +91,7 @@ private void testUsingZippedInput(final String filepath, final int nVariants,
          * https://github.com/samtools/htsjdk/pull/837#discussion_r139490218
          * https://github.com/samtools/htsjdk/issues/946
          */
-        if(!tmp.getName().endsWith(FileExtensions.COMPRESSED_VCF)) {
+        if(!(tmp.getName().endsWith(FileExtensions.COMPRESSED_VCF) || tmp.getName().endsWith(FileExtensions.BCF))) {
             tmp = File.createTempFile("tmp",FileExtensions.COMPRESSED_VCF);
             tmp.deleteOnExit();
             try(    FileInputStream in = new FileInputStream(filepath);
diff --git a/src/test/resources/htsjdk/variant/VcfThatLacksAnIndex.bcf b/src/test/resources/htsjdk/variant/VcfThatLacksAnIndex.bcf
index 8c84efb3993976d32a69eee8fee04b2379e86cc2..8f8630d8fb2cd790950cf1a9d76f8064f10ab4f2 100644
GIT binary patch
literal 3158
zcmV-c45{-UiwFb&00000{{{d;LjnLf46T?AY#Zen$KQOlu^Sj?lDZT|-z4pll*HKQ
z_^Yks_#;j-@@bnQLB}}T=lF_!=iZ(3)j=yk#n6C)CIQVDFpZC~P1Q66V;hu?!ZtCg
zGEIenm_`JHF_?z7wVTjDd)_;reZFRmyOJvLCHLO*JiqsOp8xY+L*ZzB{Rs;~ot+sj
z$7TdE&&d9r;pl9?&1vrJjE;^ahGPC5qY?j@FgIOfa=j5&N{L)S<^<l~0e{4M;IEv#
z&@oWGe&=*NmJk^}m1RdKB12<7pUrNU1^DpUtBoYYqMXGN%S)Wh&4MW`tIy#aJIm%U
zM1>U-I4`8(cSgow9FGa<ZMd)B=I}VYc!wX);~luqZu7bwjv8X|K`ait9b&rT^NNP}
z_s||2?Qpu!DT2={fNd53<qc8y=PcpGC#KFXHDF++NNAU>-{p2_fjDh+zt^{F9Ospm
zl_DW~;PkpZw9R98xX!`<=i`2x-g`MIrM*$3nT(AG6G}9}Nbf{3pJYXUtT(YxVEv=K
z%+9c4btu7Hj?Hl?oMsDhR#H665QoTSFvF)Ylat{amar1$c;%KLrdbi^I7w#HMxT)>
z&8OpRJ_`j=nCCzoh$AMgI>+7QTEcT2T#rxtX_pIpslO&+5m_!_Mr1LFi_D5_y2t2#
zc)Aw%!z?eza2HSOwMJ(pVXzXHMUJ0Y*@*GssWI*0;j@G=L8LLqEwbreJe38MJ}JfI
z*febGjgXG4Af(k?%#_>!xCGmj;doe7N(2ZimI`c&%Wx@%*fqwIs3{wYe=Ma0*jXW?
zSk}Dal#owyykcA_heDQ-;5IkQk`SOU^{dt7fE|%d$x1Av6VXYv7Q<R3!PRP!SSa!`
zfh-&DRrZ2qQdt}vOOy(-SK9}P$!k>!o1`nuiu<U+eKaR9@`}!*BD<%^@~MUM_Bm0@
z<|hQ1$zfhuNOoV>YbkP_u53cJ`4A*xI>zuf#*2A_qZQz;?Njrc5rsUyhRdaddB`Ye
z&W%_q<}oyuOuPr7+jmMf9E0vpl1!`9m@_($gfy3S%_U>#k93j<*f>j8Nm;I7Fi=xv
zuB;Q=u_WZxKS&@73pmTnvN*{?NovAia28)R+H2q`DDVZX0U*5vl7_S}$E(-rfWau6
zlUCpNsK}(0<^j7gCE&2Y%M8bZp5S<a;gt)p#gbKqT^^c<)Ur!)St#IwKt_q`hyZY~
zQk-RS)fS`i)kCh5qe36pD6+UHkvOBAlx4*(3A6KnrB;nBKvraEcr1|_mc~hrF>s79
zg`85taZ-b;M*v$Sk%odMEa!uR5>O@qXii=re&+-jpfe&n1L_2^Wi=-T=c9a5WZ5(<
z2@7l{v%2@n10Y231efAikR#}Fz*TB7t+r8>Q0jHNz>0lp(crl(R3(ULfgu{p%s}QP
zRWwH!F}=`t4a@Nm&J64*2)SHQ@pAm*V?(ux=QsnM0=-;`L~j&#gQ1@CcxQA<E8%W{
zz&x8`C5aKa+(Jd+fSOzf!#(FKhlHaB75;xHGMZ%901`ca5aStR1&Y{s9FH@F0x2T>
zAwy@xUQ27M)4^Z~U(1SuswPIG=R6OhtIvbk(mYTEIA03of6RqxqhPJzdM9Ltfrf%E
z&v0o-8+)Y&mRtZS1ck3Q8k-7gR&8fHQk7>YbSX%;3}VFQA?yMRno;CAI8sQZU&*nn
zTm_{n>gM=VuBh}q9DZRQKx93JNUpBoV3;`5Z#S8xlF=CK=ae9wbINKM;Z-RsPlhvv
zIUZWwZCq9L(3uY>Vyli-E!VK|43@HjC|9SY+9JH<j7`>1L1V%U7SbTJA`t}won%Sc
z7<h|<Sv6a7ItvGnqExz<VRCSsDe|PTNuL9ia+IE7ijoA&ah{u(i?F@QD_3<~sba1m
za&j%XC!)lI2&rl{m*t?o=1ST#Cy0<D7!KtBMMz8~$r)ZTgkDtw7fNqqjHK98f7J+y
zW$nm=g`#0QCAOB3<3dqPsqYe+tma`Lfk8OMgAkG^!^&F#Tgs+d2Prtma5IV{M{kbh
zXXLD(?)T9yyPJ0OGLi%*zuba9>GT*@)0NduhtKEqIM24)sjYVSymlHMv7fP8QV9H0
z<kcj!K;FNlJO?`}@aE3&NNjT4G&LDFjYdqdp{NNY-!y%7aLlB>wwuUqnbcP%(?rlg
zdp%zD)8{CC(q{A+Ldb{IuiI|83Dv=8?CVD(7o&RAfY6%9MakiC(D3cD2Cc&sqAsKw
zs4uo1dFagn>e9Ajr!pbx3vKXFfU>q_GCK#UOW=Mh`3s_`wp)(w4p1A_`|WMF2X8n+
znJl*;YJFSmnQcMpGOCWch+0E+w4tNBU!=CYUwxjUE^j;Cy(36%20uR<q%4S9*IwU9
z{!z{C*2^xkY*<g3C})R_GE=EkifW;ss7L65`WR6B6WQT9Tt{6%?c2BSQ5|Ofy@oo1
z+4G`(NW;8OlxUB~>F_$O32T^YDp8IsZyN;4M+zw&rPDbVtfss@IDfQ4*|Y1R=YcX*
z2b7x*TsjDpNTYoF?<-O+FSZcM#5+$tZ=eipcX|Dn1Lb1=x-#Wb6GB~U?}ET*8rA{j
z2aPJ_!#c{3f4i@)o?N`@i?mge>2i5#+UIh24JUNjJURb?pW4)Zv4z6c1OOlS@GCmt
z)8AVPDZms2ni#B*{%$-O0Mge;J`WF5Riyh59iuuCwWVu10)Ts4uO9@!OlFNSjXPRA
zYqzukU~Fgy0EV|(0B|$pCyY*ko2}?G_z90(Kv^u7AHuc01o-I%&U%u@!?&5jIzSI?
zw|jkVw>8`q4weCb^PQmp0Is(JU=RS0zY@^{b~k8%*?Wq*RPJq^I0}H@Ab?7)FM~xY
z;DHafzMue}naxKEHw6H;XV>2X2H4KEc7$61w)e`d0Nc}QqFNkL)YFQ3zKTr1Xfm0!
zQYCCpij>-s*`G#qrP}ZFd1<@P8ir&M!g?Uw<9|7zlqyHSKDqh(0|1tI=g|QGtFM&m
zms^$ypK`ywZQ+B*27vE5q?GEDRS87gilut&)F3I<10TLo%49=BLsLT&z=crzmo{z$
zxX^Haxl|GQG5US;7zB<QAA@>4QLmQjl8*AV&+bq@dmf%2(&>)N4WjGw*{gKte%?ZK
zXX#L0m%opu?+;XW<kF!xbh`8L<G*|kFi#T9T?ap@?#O=wW>+*0Xb8PY(DwK(!4g^%
zuo1NOtzm?Q0qv^ra2ahLx^QhH0sK?5#r!Mc_zbj<-nXb)^Y*MB%|Uw{c6YxI&=O^|
zd;a<#g0|~mPKWl)<xK`jox0QdTm@};v7fZyCj@Pe-&~E>^pb{l{FSe%X!SuuN!qq|
zZQ0NZXxvz&jP}wRgqEAW4bk4w*aDdH3R!!4@+S(~t`lzkv~c!&V9?lUtEMk?fOh2{
z-W^b6Wrm=&)L%i0bJxKaNY?5qXea0IB*m%RZ-)`{7owwoCTL5CE~!?RcaCY&a{QIe
z1Z{coEqy^Y0NOO#+&PH=ZF+c0SCD^1{&k;0DLx4iA4gMW746m%8qv(=yEiC^t#7|P
zbdhdCxcj~RE?>DG*C>T3sM*?2vgJ45N08QNNE_7~Cd<!t3Q~r!UscY}$g(*E5D0lo
z`Ds$V00Ozt02%=Z*T6^_;=Z-_f~^Oei5_;Do+_7~rX2)gxo5u$fllhjoZI8EJ3J2S
zuvJyZCP>e<=}U=rv>sNp<EFsXgvI>&cL_^VB|Upv?<L}|+;4At;6+5z^C=+{ES&*8
z+1)W9d%yZM04O&dc%7u`bT`sdl2oDHXk=?UQ0^YNs!aLo3&|Xio1XypX$XI@H0JKo
zQ665mPN_n_*%h*sD6KZ7_?k+}ICo^z5un@Kn$;<z`SoLgYGu4Pd_t#;jXQ59%Kw0p
zxBUl0Iy&?IJ<n=+JKg=(fHK#TztBtJ70v56Z0I0o6Wexz(?KhhC!%*7JHXpVb+qym
zaN67359x4zko=Z{^T^%TRSAASP1A0B`D{Y4uK(G|qky%%*rmgQcnny_PBrVWp8m(D
z&jQw41Z!{WT>;%dL+CxyQ{&Ts<u^Z1#`V;lo`6B*M-jGfA?K0Nsj#l6UPWp1zX0Hy
w4G)6bNGVbO1$pnwEj1kg03VA81ONa4009360763o02=@U00000000000FjjmrT_o{

literal 7490
zcmcJT4Qw0b8OPrwO&h0yaVBZX$LOVLm!zb@A8{PhI;msFNk)<;jwup!jEjAauh>5K
z?wqd<sDvVh1{5?2XkoCA4#qZB(-4epP&x|R#HhMyDh$N5MlcvF)6lkd6B=m$=g#M^
zCTY5Et&-UBoA-Tx&;NP;&tv!agXLw%t&BA`#-xN8la-{Pd3X4OlkIKx>c+<4z+kvH
z<lQ<D@D9pTqZuL58W7cpBBeD+PI=ekm;7%0R?=qHcNMSSF*+6sD?%y~7YBv|y@MW4
zo6V-l_~Ee?ZxU8ATAWwKlqzY`B(~x;<L~%{I4LH0+=|z?;FEF`&jpRg=KP=>y_VmM
zd%EqN{8leN&2Qy5+uAxi?Dl2c#e=)pZ8qHL>i^Yk=zO=s-R7`6JI?C{|Emii9On7C
zfEe*v;yPo)BNrwOHdsg)hpVmK)nPX;;%sxYcY4lwj|-Azp_`B>&dv_Eqs?uzyUyeM
z^A}~I;~`1@bTby4v#~L5vpqEA3+pB33$zYrlH;P{ZEFqBq($#QN)snUrT8koL_$nR
z5k4xWwYaKRhZYxkMU3%6D#{B94PRIys=Soq=_Of-iVB~QR85SQo*5W1&$KUfW;(6N
z(-O>q4iciq;e0baR{1Fj^*c1`b+}wO%Xm7DMYLSy1x4gxE}E!_(U#KV`$m^@e4m(-
zH3Wnw2g-R(K5%{El3m6$MM_N+$IaZN^a>+`<{$Sh5rJW{#)LF0MqBxjI8ZjL5g{Q)
z5!PEFM_iT>2y&{4>|Ft_B1|zUg+*B*V642F79&zjiU{POrFRK0OAY;eUX93ztQ^xN
zHc#ava&lZs>6`26kd6zg$WKU<BIN?ClM=#QZHR+?6fvUdcNrKCZZ~q#Z$`zpL{#)#
z6me)Kr4eLq<5oQuq7aGmzQJ%dAzRHjs6x`rN(8AOG4sa<OO6jFWI-zyG<$eZ5qD?A
zRAlCY&m1<-T&fhpvL+<>lwPM4{eoIULQqEJf>0dM9tbfS5>mI0Ws)TaEdX~T@{9D1
zDRPp(K}tmAX*tECFW$<lnIw<G(&%&_s(pvr#)nY-<D|6FjU`bnK0MH49%(b=3?_t<
z`iPE`fenddKx$k{mu#@C%=9hj6I*yyhTw)-B!wA1E=-F2xQLWAjiKbQv2)(`2G|J_
ze8wyQq*R(LFDg%^jK|ULl1;^gYHphqc!9VF6(K^EN&AL$ck|0BO^{OX6Mif$r1S>?
z*ek6niE{67&cT;Tp)pNP^J$sJ9pfOXNeN!BObUtOO9yk2Sp3RGc9h<XJu)JnQR%`#
zNsWt2lgf+Jz%sH%lVM953Z7SK&WiHml2F3YFQgN?;f_%bo;zp=kxJbKMVQk=LaWdl
z13V`wBU`m88AINLB2K`aaJRU5NXg*?sc}UVqgdjX#aQgzr|16wMufkRB9aI@g0rAV
z^qv(p%Bax#ihMgQDw~a@;iuxrN|<OGZ41qG0?O0fAyB`dL}xbNAWA6|$OIw^BiHgS
zp0lo@!Cv38%yURUM@22?J4oTCd^0v`$+dUGW(=oQ`DXM(J|#v(RTUH|fr%Lx)Lj+S
z^w{TbxllVKj%B#;e?t*7EqfUt!3zf|oiP?D`_RxBKP05nl$eD>MqbrXTS+D^IGh>X
zp=2liMp2OsHz}^}q4Vzt!E^V6$!tH+2eo7tYLUJ$T54De#AXLlOh7@Q%41R#MJ`^G
z4G}*+!v|;#EyfCs_{>->HOLyXu5K7o64tH3jKn0`L|TMrWKt4F3N`zb#x9z!^ob%{
z3(Zn0lE{!zU{MVH05>xR%$#bW>&=T~?Q@~QFnsp8&D2ZP(#_BgMr8H>(ZY))%XNmc
zjd?8B#usk~KqOc1Im^|@!XYyniw*&%)(E`7t8rO@_Z5bxSZKTrAKbo-3>uUtcsYsz
zSs}k9#m8Zz6F5Rs(5oi12{wtrBco<tiwOygGa00WqB|O>^ig_3$fzpvkxxp~S_VUa
z{-Hqey&{=JT9Gu9b}8-QmFVf1>GWPa6_+rvPGwDWN><?gXb$wb2;Db3#`i6Fg;vAJ
z4KGIRsp#!fapS2;NIRP&4=-m*W^Pqn&XvdHj1m!fdM`m0L2jQ!LqfO06py!hbW6Yf
zE<ntxX*Adf<``*0AMWj~2{ARH#l4PpkHcl_aM)V~RmJ3&nDLG~-SkOL!D>f-wbSnL
zINi>rRy)nrc2B3x;b?c-&RVVN1l|#PYtl0lM!Kp=38KoW>PCNmX#0?5Wc!$9AYcjg
z1}%8|U>UvMH)t_Fi?UE{TY~y~Kg+Ps?&x%P8b3Yu>`zBEdy+Bc!596z>z3PC8GeSo
zd?auwD`ypqt$b2Z?RL8ZU#@yzeILiT4RuFP|6m)()!lhyS2tH*7mMxK#$8o+rg^K6
zvvOtJWn2Y!1=hrRxXbIXTQ_$xezT@7^xU;R?z44=AAhThtECqh*HCx2@0P=y#d;^>
zK4YxC5<6^szj&<TF5y;k>+9H&T`zO18_HMEf3B*b{>n?NYu9iV&bhvgtL7q+2v_5H
zs+_S$%R|8U2eaX^zl^(xyW@^Kp1^q~XY##@GU9A`+16`vKBTA)x7%s&bk>LK{aht5
z!|y!vA~5gvO&<ZK(>dh>=KO3;A?Cq!qy%%_+!fn^**|~nwmfFb&c|N_W=~ljlP!vQ
z@^3|$-CJCp-m8E)o4hH9d7zT9rd9Vrp^1vs!2Do^f%(k>%#VI^M;TsX?AM+^U0zO?
ztJC4|xH_8p!n&S~i1-GmxRNxTpAByV?3wQz=mG4W+MBl(XgYW53nj3J`Z$6;G5vv;
z<Lko^^tVIf-5`CPaqAi`wHDzvjRs65_wGMhsN~if_o|I`Kn(S61)_hG6-sV^M1FP}
z_us@m#dj+MqSb0WgvT~~(Z5eFa+Z^lhwifY3lQB7o2}E+(NXVj^81jo24c(3zjgyL
z{y>KGJkwl3hzHZRbr&EWdo56cNXG#2SKsLE2BHs$duoS|0P!n?xTZdjSh1L*bEh`F
zREXQS%EtH_;I>|~3Ain_7Out~WG%I<<%`S$N{hu}<}1-Xt#C$N#($ivOXO?2$J6Ps
zdFuTLJeB3#sW(Z&&(|CvzVJKW?gHP@)7#8^Ek{`%OiRS~@$KL3LRprhEZ5CFO%m?*
zzS^D7*O?EV=mO#kRF?YMD{;bNWyvJU*NTdY%8E*qWe;ok+~t>puBWd(m#>Wdko~S|
z5c$ehJb|P<UT);;BlrcCC1Jk)={@?-mak9e$`bbD>VWrpJhq(u?DkrH;5@MZO|qX4
zM<4D6Cu47!oMb--_9shp+_~>#;(Rz|EwG=Dp8WX>;5<=aKj3T%j+u#i@?SbzCE$py
zVUwS+KCoTq@5`}GqZU`KAi_UZS*w2mlC#-9eBWwV&dJF_HoL=Zw{^68z-DN;+gna-
zBlp(70Ji6@sw8VY_S%;U*>>(rl(6mgRukI;fBAO-+x%=hRpQ5svsJ!Qn3Zi?nl`R&
z1)DS&$g#b$lCk;9uR*taR@8t~TR^vGxBp0I+j+brm!8ng+3rTiu{q$UM%$R5eVg+0
z#Pq$CpF8)xM7kY&ZG*{H2HoaPO%Pj6`PG!4WysG1`!6H5YkvQ30o&on|4e@RCt}mr
z>iJn{Fz=AT=s8&d-A37l#_bGjqkSVeHpc$IysJM&Ha-o$V{D|_&}}Z8ljNOM_pjA+
zvi9Vwy_aM?lv+hcduO|gWHZnh>-2%iTm29*_0L!LWIbi2o{O@f*IsIcE5U?~nOSTW
z>rV}Z@Tx(P&pz9Aq)?^)4;0MBx>!FbTwVP+iicJ`h}|Bm`aZrlS)QStiy9tL%(v_{
zDA<Wy_JN|qjZgaB_WC~98X3r$W=6@kr}jbW0o8AOi(uBxy<rMe37FbLx`Eu*eLcZc
z0!Fqrc0-^WqgNCd$fGYa66g~`)7KgXLZ1`m-x!!hc7G^`Nn){GtbbDjFn9G|m&5$!
z#ncb9s;7Vxg<P}QhIb#HlYtQCq1CJPMD$j>daPNo>f7}EqkbNXjsFaI5BiT&{#Czm
zl<>|pzukqAg}iZ3ZJcx}@J6<GUk}ix4zD{5-(Rug_9J=K9woHAH|9N~dhhP%&D<l;
zxVmc1+O_L}<80ePIO`o~Zj34H-4*L`=?}~B1mB@rLqo#>JT`H@H~xr@bMXF~a&<+v
z-|lcYI&8TyW#nErwY8(Cs|X7`gypS%(d^uWHb2``g0`pjK2q)GpPa}sHPk)(_fMXO
xYHt^yp{X?ujhafa_i|-LXakJ5ZKTm;V8owO3iAewR{s-%e6`{+-~_Tt{RhExM=SsU

diff --git a/src/test/resources/htsjdk/variant/serialization_test.bcf b/src/test/resources/htsjdk/variant/serialization_test.bcf
index 8c84efb3993976d32a69eee8fee04b2379e86cc2..8f8630d8fb2cd790950cf1a9d76f8064f10ab4f2 100644
GIT binary patch
literal 3158
zcmV-c45{-UiwFb&00000{{{d;LjnLf46T?AY#Zen$KQOlu^Sj?lDZT|-z4pll*HKQ
z_^Yks_#;j-@@bnQLB}}T=lF_!=iZ(3)j=yk#n6C)CIQVDFpZC~P1Q66V;hu?!ZtCg
zGEIenm_`JHF_?z7wVTjDd)_;reZFRmyOJvLCHLO*JiqsOp8xY+L*ZzB{Rs;~ot+sj
z$7TdE&&d9r;pl9?&1vrJjE;^ahGPC5qY?j@FgIOfa=j5&N{L)S<^<l~0e{4M;IEv#
z&@oWGe&=*NmJk^}m1RdKB12<7pUrNU1^DpUtBoYYqMXGN%S)Wh&4MW`tIy#aJIm%U
zM1>U-I4`8(cSgow9FGa<ZMd)B=I}VYc!wX);~luqZu7bwjv8X|K`ait9b&rT^NNP}
z_s||2?Qpu!DT2={fNd53<qc8y=PcpGC#KFXHDF++NNAU>-{p2_fjDh+zt^{F9Ospm
zl_DW~;PkpZw9R98xX!`<=i`2x-g`MIrM*$3nT(AG6G}9}Nbf{3pJYXUtT(YxVEv=K
z%+9c4btu7Hj?Hl?oMsDhR#H665QoTSFvF)Ylat{amar1$c;%KLrdbi^I7w#HMxT)>
z&8OpRJ_`j=nCCzoh$AMgI>+7QTEcT2T#rxtX_pIpslO&+5m_!_Mr1LFi_D5_y2t2#
zc)Aw%!z?eza2HSOwMJ(pVXzXHMUJ0Y*@*GssWI*0;j@G=L8LLqEwbreJe38MJ}JfI
z*febGjgXG4Af(k?%#_>!xCGmj;doe7N(2ZimI`c&%Wx@%*fqwIs3{wYe=Ma0*jXW?
zSk}Dal#owyykcA_heDQ-;5IkQk`SOU^{dt7fE|%d$x1Av6VXYv7Q<R3!PRP!SSa!`
zfh-&DRrZ2qQdt}vOOy(-SK9}P$!k>!o1`nuiu<U+eKaR9@`}!*BD<%^@~MUM_Bm0@
z<|hQ1$zfhuNOoV>YbkP_u53cJ`4A*xI>zuf#*2A_qZQz;?Njrc5rsUyhRdaddB`Ye
z&W%_q<}oyuOuPr7+jmMf9E0vpl1!`9m@_($gfy3S%_U>#k93j<*f>j8Nm;I7Fi=xv
zuB;Q=u_WZxKS&@73pmTnvN*{?NovAia28)R+H2q`DDVZX0U*5vl7_S}$E(-rfWau6
zlUCpNsK}(0<^j7gCE&2Y%M8bZp5S<a;gt)p#gbKqT^^c<)Ur!)St#IwKt_q`hyZY~
zQk-RS)fS`i)kCh5qe36pD6+UHkvOBAlx4*(3A6KnrB;nBKvraEcr1|_mc~hrF>s79
zg`85taZ-b;M*v$Sk%odMEa!uR5>O@qXii=re&+-jpfe&n1L_2^Wi=-T=c9a5WZ5(<
z2@7l{v%2@n10Y231efAikR#}Fz*TB7t+r8>Q0jHNz>0lp(crl(R3(ULfgu{p%s}QP
zRWwH!F}=`t4a@Nm&J64*2)SHQ@pAm*V?(ux=QsnM0=-;`L~j&#gQ1@CcxQA<E8%W{
zz&x8`C5aKa+(Jd+fSOzf!#(FKhlHaB75;xHGMZ%901`ca5aStR1&Y{s9FH@F0x2T>
zAwy@xUQ27M)4^Z~U(1SuswPIG=R6OhtIvbk(mYTEIA03of6RqxqhPJzdM9Ltfrf%E
z&v0o-8+)Y&mRtZS1ck3Q8k-7gR&8fHQk7>YbSX%;3}VFQA?yMRno;CAI8sQZU&*nn
zTm_{n>gM=VuBh}q9DZRQKx93JNUpBoV3;`5Z#S8xlF=CK=ae9wbINKM;Z-RsPlhvv
zIUZWwZCq9L(3uY>Vyli-E!VK|43@HjC|9SY+9JH<j7`>1L1V%U7SbTJA`t}won%Sc
z7<h|<Sv6a7ItvGnqExz<VRCSsDe|PTNuL9ia+IE7ijoA&ah{u(i?F@QD_3<~sba1m
za&j%XC!)lI2&rl{m*t?o=1ST#Cy0<D7!KtBMMz8~$r)ZTgkDtw7fNqqjHK98f7J+y
zW$nm=g`#0QCAOB3<3dqPsqYe+tma`Lfk8OMgAkG^!^&F#Tgs+d2Prtma5IV{M{kbh
zXXLD(?)T9yyPJ0OGLi%*zuba9>GT*@)0NduhtKEqIM24)sjYVSymlHMv7fP8QV9H0
z<kcj!K;FNlJO?`}@aE3&NNjT4G&LDFjYdqdp{NNY-!y%7aLlB>wwuUqnbcP%(?rlg
zdp%zD)8{CC(q{A+Ldb{IuiI|83Dv=8?CVD(7o&RAfY6%9MakiC(D3cD2Cc&sqAsKw
zs4uo1dFagn>e9Ajr!pbx3vKXFfU>q_GCK#UOW=Mh`3s_`wp)(w4p1A_`|WMF2X8n+
znJl*;YJFSmnQcMpGOCWch+0E+w4tNBU!=CYUwxjUE^j;Cy(36%20uR<q%4S9*IwU9
z{!z{C*2^xkY*<g3C})R_GE=EkifW;ss7L65`WR6B6WQT9Tt{6%?c2BSQ5|Ofy@oo1
z+4G`(NW;8OlxUB~>F_$O32T^YDp8IsZyN;4M+zw&rPDbVtfss@IDfQ4*|Y1R=YcX*
z2b7x*TsjDpNTYoF?<-O+FSZcM#5+$tZ=eipcX|Dn1Lb1=x-#Wb6GB~U?}ET*8rA{j
z2aPJ_!#c{3f4i@)o?N`@i?mge>2i5#+UIh24JUNjJURb?pW4)Zv4z6c1OOlS@GCmt
z)8AVPDZms2ni#B*{%$-O0Mge;J`WF5Riyh59iuuCwWVu10)Ts4uO9@!OlFNSjXPRA
zYqzukU~Fgy0EV|(0B|$pCyY*ko2}?G_z90(Kv^u7AHuc01o-I%&U%u@!?&5jIzSI?
zw|jkVw>8`q4weCb^PQmp0Is(JU=RS0zY@^{b~k8%*?Wq*RPJq^I0}H@Ab?7)FM~xY
z;DHafzMue}naxKEHw6H;XV>2X2H4KEc7$61w)e`d0Nc}QqFNkL)YFQ3zKTr1Xfm0!
zQYCCpij>-s*`G#qrP}ZFd1<@P8ir&M!g?Uw<9|7zlqyHSKDqh(0|1tI=g|QGtFM&m
zms^$ypK`ywZQ+B*27vE5q?GEDRS87gilut&)F3I<10TLo%49=BLsLT&z=crzmo{z$
zxX^Haxl|GQG5US;7zB<QAA@>4QLmQjl8*AV&+bq@dmf%2(&>)N4WjGw*{gKte%?ZK
zXX#L0m%opu?+;XW<kF!xbh`8L<G*|kFi#T9T?ap@?#O=wW>+*0Xb8PY(DwK(!4g^%
zuo1NOtzm?Q0qv^ra2ahLx^QhH0sK?5#r!Mc_zbj<-nXb)^Y*MB%|Uw{c6YxI&=O^|
zd;a<#g0|~mPKWl)<xK`jox0QdTm@};v7fZyCj@Pe-&~E>^pb{l{FSe%X!SuuN!qq|
zZQ0NZXxvz&jP}wRgqEAW4bk4w*aDdH3R!!4@+S(~t`lzkv~c!&V9?lUtEMk?fOh2{
z-W^b6Wrm=&)L%i0bJxKaNY?5qXea0IB*m%RZ-)`{7owwoCTL5CE~!?RcaCY&a{QIe
z1Z{coEqy^Y0NOO#+&PH=ZF+c0SCD^1{&k;0DLx4iA4gMW746m%8qv(=yEiC^t#7|P
zbdhdCxcj~RE?>DG*C>T3sM*?2vgJ45N08QNNE_7~Cd<!t3Q~r!UscY}$g(*E5D0lo
z`Ds$V00Ozt02%=Z*T6^_;=Z-_f~^Oei5_;Do+_7~rX2)gxo5u$fllhjoZI8EJ3J2S
zuvJyZCP>e<=}U=rv>sNp<EFsXgvI>&cL_^VB|Upv?<L}|+;4At;6+5z^C=+{ES&*8
z+1)W9d%yZM04O&dc%7u`bT`sdl2oDHXk=?UQ0^YNs!aLo3&|Xio1XypX$XI@H0JKo
zQ665mPN_n_*%h*sD6KZ7_?k+}ICo^z5un@Kn$;<z`SoLgYGu4Pd_t#;jXQ59%Kw0p
zxBUl0Iy&?IJ<n=+JKg=(fHK#TztBtJ70v56Z0I0o6Wexz(?KhhC!%*7JHXpVb+qym
zaN67359x4zko=Z{^T^%TRSAASP1A0B`D{Y4uK(G|qky%%*rmgQcnny_PBrVWp8m(D
z&jQw41Z!{WT>;%dL+CxyQ{&Ts<u^Z1#`V;lo`6B*M-jGfA?K0Nsj#l6UPWp1zX0Hy
w4G)6bNGVbO1$pnwEj1kg03VA81ONa4009360763o02=@U00000000000FjjmrT_o{

literal 7490
zcmcJT4Qw0b8OPrwO&h0yaVBZX$LOVLm!zb@A8{PhI;msFNk)<;jwup!jEjAauh>5K
z?wqd<sDvVh1{5?2XkoCA4#qZB(-4epP&x|R#HhMyDh$N5MlcvF)6lkd6B=m$=g#M^
zCTY5Et&-UBoA-Tx&;NP;&tv!agXLw%t&BA`#-xN8la-{Pd3X4OlkIKx>c+<4z+kvH
z<lQ<D@D9pTqZuL58W7cpBBeD+PI=ekm;7%0R?=qHcNMSSF*+6sD?%y~7YBv|y@MW4
zo6V-l_~Ee?ZxU8ATAWwKlqzY`B(~x;<L~%{I4LH0+=|z?;FEF`&jpRg=KP=>y_VmM
zd%EqN{8leN&2Qy5+uAxi?Dl2c#e=)pZ8qHL>i^Yk=zO=s-R7`6JI?C{|Emii9On7C
zfEe*v;yPo)BNrwOHdsg)hpVmK)nPX;;%sxYcY4lwj|-Azp_`B>&dv_Eqs?uzyUyeM
z^A}~I;~`1@bTby4v#~L5vpqEA3+pB33$zYrlH;P{ZEFqBq($#QN)snUrT8koL_$nR
z5k4xWwYaKRhZYxkMU3%6D#{B94PRIys=Soq=_Of-iVB~QR85SQo*5W1&$KUfW;(6N
z(-O>q4iciq;e0baR{1Fj^*c1`b+}wO%Xm7DMYLSy1x4gxE}E!_(U#KV`$m^@e4m(-
zH3Wnw2g-R(K5%{El3m6$MM_N+$IaZN^a>+`<{$Sh5rJW{#)LF0MqBxjI8ZjL5g{Q)
z5!PEFM_iT>2y&{4>|Ft_B1|zUg+*B*V642F79&zjiU{POrFRK0OAY;eUX93ztQ^xN
zHc#ava&lZs>6`26kd6zg$WKU<BIN?ClM=#QZHR+?6fvUdcNrKCZZ~q#Z$`zpL{#)#
z6me)Kr4eLq<5oQuq7aGmzQJ%dAzRHjs6x`rN(8AOG4sa<OO6jFWI-zyG<$eZ5qD?A
zRAlCY&m1<-T&fhpvL+<>lwPM4{eoIULQqEJf>0dM9tbfS5>mI0Ws)TaEdX~T@{9D1
zDRPp(K}tmAX*tECFW$<lnIw<G(&%&_s(pvr#)nY-<D|6FjU`bnK0MH49%(b=3?_t<
z`iPE`fenddKx$k{mu#@C%=9hj6I*yyhTw)-B!wA1E=-F2xQLWAjiKbQv2)(`2G|J_
ze8wyQq*R(LFDg%^jK|ULl1;^gYHphqc!9VF6(K^EN&AL$ck|0BO^{OX6Mif$r1S>?
z*ek6niE{67&cT;Tp)pNP^J$sJ9pfOXNeN!BObUtOO9yk2Sp3RGc9h<XJu)JnQR%`#
zNsWt2lgf+Jz%sH%lVM953Z7SK&WiHml2F3YFQgN?;f_%bo;zp=kxJbKMVQk=LaWdl
z13V`wBU`m88AINLB2K`aaJRU5NXg*?sc}UVqgdjX#aQgzr|16wMufkRB9aI@g0rAV
z^qv(p%Bax#ihMgQDw~a@;iuxrN|<OGZ41qG0?O0fAyB`dL}xbNAWA6|$OIw^BiHgS
zp0lo@!Cv38%yURUM@22?J4oTCd^0v`$+dUGW(=oQ`DXM(J|#v(RTUH|fr%Lx)Lj+S
z^w{TbxllVKj%B#;e?t*7EqfUt!3zf|oiP?D`_RxBKP05nl$eD>MqbrXTS+D^IGh>X
zp=2liMp2OsHz}^}q4Vzt!E^V6$!tH+2eo7tYLUJ$T54De#AXLlOh7@Q%41R#MJ`^G
z4G}*+!v|;#EyfCs_{>->HOLyXu5K7o64tH3jKn0`L|TMrWKt4F3N`zb#x9z!^ob%{
z3(Zn0lE{!zU{MVH05>xR%$#bW>&=T~?Q@~QFnsp8&D2ZP(#_BgMr8H>(ZY))%XNmc
zjd?8B#usk~KqOc1Im^|@!XYyniw*&%)(E`7t8rO@_Z5bxSZKTrAKbo-3>uUtcsYsz
zSs}k9#m8Zz6F5Rs(5oi12{wtrBco<tiwOygGa00WqB|O>^ig_3$fzpvkxxp~S_VUa
z{-Hqey&{=JT9Gu9b}8-QmFVf1>GWPa6_+rvPGwDWN><?gXb$wb2;Db3#`i6Fg;vAJ
z4KGIRsp#!fapS2;NIRP&4=-m*W^Pqn&XvdHj1m!fdM`m0L2jQ!LqfO06py!hbW6Yf
zE<ntxX*Adf<``*0AMWj~2{ARH#l4PpkHcl_aM)V~RmJ3&nDLG~-SkOL!D>f-wbSnL
zINi>rRy)nrc2B3x;b?c-&RVVN1l|#PYtl0lM!Kp=38KoW>PCNmX#0?5Wc!$9AYcjg
z1}%8|U>UvMH)t_Fi?UE{TY~y~Kg+Ps?&x%P8b3Yu>`zBEdy+Bc!596z>z3PC8GeSo
zd?auwD`ypqt$b2Z?RL8ZU#@yzeILiT4RuFP|6m)()!lhyS2tH*7mMxK#$8o+rg^K6
zvvOtJWn2Y!1=hrRxXbIXTQ_$xezT@7^xU;R?z44=AAhThtECqh*HCx2@0P=y#d;^>
zK4YxC5<6^szj&<TF5y;k>+9H&T`zO18_HMEf3B*b{>n?NYu9iV&bhvgtL7q+2v_5H
zs+_S$%R|8U2eaX^zl^(xyW@^Kp1^q~XY##@GU9A`+16`vKBTA)x7%s&bk>LK{aht5
z!|y!vA~5gvO&<ZK(>dh>=KO3;A?Cq!qy%%_+!fn^**|~nwmfFb&c|N_W=~ljlP!vQ
z@^3|$-CJCp-m8E)o4hH9d7zT9rd9Vrp^1vs!2Do^f%(k>%#VI^M;TsX?AM+^U0zO?
ztJC4|xH_8p!n&S~i1-GmxRNxTpAByV?3wQz=mG4W+MBl(XgYW53nj3J`Z$6;G5vv;
z<Lko^^tVIf-5`CPaqAi`wHDzvjRs65_wGMhsN~if_o|I`Kn(S61)_hG6-sV^M1FP}
z_us@m#dj+MqSb0WgvT~~(Z5eFa+Z^lhwifY3lQB7o2}E+(NXVj^81jo24c(3zjgyL
z{y>KGJkwl3hzHZRbr&EWdo56cNXG#2SKsLE2BHs$duoS|0P!n?xTZdjSh1L*bEh`F
zREXQS%EtH_;I>|~3Ain_7Out~WG%I<<%`S$N{hu}<}1-Xt#C$N#($ivOXO?2$J6Ps
zdFuTLJeB3#sW(Z&&(|CvzVJKW?gHP@)7#8^Ek{`%OiRS~@$KL3LRprhEZ5CFO%m?*
zzS^D7*O?EV=mO#kRF?YMD{;bNWyvJU*NTdY%8E*qWe;ok+~t>puBWd(m#>Wdko~S|
z5c$ehJb|P<UT);;BlrcCC1Jk)={@?-mak9e$`bbD>VWrpJhq(u?DkrH;5@MZO|qX4
zM<4D6Cu47!oMb--_9shp+_~>#;(Rz|EwG=Dp8WX>;5<=aKj3T%j+u#i@?SbzCE$py
zVUwS+KCoTq@5`}GqZU`KAi_UZS*w2mlC#-9eBWwV&dJF_HoL=Zw{^68z-DN;+gna-
zBlp(70Ji6@sw8VY_S%;U*>>(rl(6mgRukI;fBAO-+x%=hRpQ5svsJ!Qn3Zi?nl`R&
z1)DS&$g#b$lCk;9uR*taR@8t~TR^vGxBp0I+j+brm!8ng+3rTiu{q$UM%$R5eVg+0
z#Pq$CpF8)xM7kY&ZG*{H2HoaPO%Pj6`PG!4WysG1`!6H5YkvQ30o&on|4e@RCt}mr
z>iJn{Fz=AT=s8&d-A37l#_bGjqkSVeHpc$IysJM&Ha-o$V{D|_&}}Z8ljNOM_pjA+
zvi9Vwy_aM?lv+hcduO|gWHZnh>-2%iTm29*_0L!LWIbi2o{O@f*IsIcE5U?~nOSTW
z>rV}Z@Tx(P&pz9Aq)?^)4;0MBx>!FbTwVP+iicJ`h}|Bm`aZrlS)QStiy9tL%(v_{
zDA<Wy_JN|qjZgaB_WC~98X3r$W=6@kr}jbW0o8AOi(uBxy<rMe37FbLx`Eu*eLcZc
z0!Fqrc0-^WqgNCd$fGYa66g~`)7KgXLZ1`m-x!!hc7G^`Nn){GtbbDjFn9G|m&5$!
z#ncb9s;7Vxg<P}QhIb#HlYtQCq1CJPMD$j>daPNo>f7}EqkbNXjsFaI5BiT&{#Czm
zl<>|pzukqAg}iZ3ZJcx}@J6<GUk}ix4zD{5-(Rug_9J=K9woHAH|9N~dhhP%&D<l;
zxVmc1+O_L}<80ePIO`o~Zj34H-4*L`=?}~B1mB@rLqo#>JT`H@H~xr@bMXF~a&<+v
z-|lcYI&8TyW#nErwY8(Cs|X7`gypS%(d^uWHb2``g0`pjK2q)GpPa}sHPk)(_fMXO
xYHt^yp{X?ujhafa_i|-LXakJ5ZKTm;V8owO3iAewR{s-%e6`{+-~_Tt{RhExM=SsU

diff --git a/src/test/resources/htsjdk/variant/serialization_test_unzipped.bcf b/src/test/resources/htsjdk/variant/serialization_test_unzipped.bcf
new file mode 100644
index 0000000000000000000000000000000000000000..a19ea048f148aa1e0b7f7c26e02b2ec8727a33dd
GIT binary patch
literal 7477
zcmcJT3v65E70184+Sm<@Ym%lsM!zKOl9a^w8OO9v;@EMLk*8yd1RdkXzP4|%ecgMn
z^Xj0LSj5mM1x*5)F<=^xu}#%91Y;YNj>0xEs&1MJ12L@;493bdw5{EQ2HH9I<wxB#
zU0JHscJB4}o!|FA=YNjf9|%=e9<vbA(h}!WTwD;-tnAw!2+g>xPIF63Xm}(t5cX{w
z4*Euf*~uK6>IiaDROGWVFJyep@JG4}{>sbq%{|5UZ=akBM?^LgO>)Cy!GRI4*J`uL
z0z7za#fwD5oSdW*myvjxpMj;QtiF$?xEU@*Au6gYLDNDEUb8ZV#p#F;yOwsktagvR
zn{M;bIl7H@+N|AOcKa$~@j@(in+;;R`m>4#_xCtFR)^i$bzTvCRsq0q==&Q7qMpwR
z;Y^K9T$nVlKsjL?ZmY}PW!D37S{<%#?>XbRAX&;qf}%LPyF3o7$7Xk*$NuLpO1bSp
zlKk%aS#)Y=qee3x9`#3*67vT;#&YRtPV|L4BJ)|!H=L2V1Sb}U;!mZx6d$EAE-NP`
zr8?v!M@249*-VVGDH(p@7EYpkhT<hbjBz4O@siBNO7{#-=zH2%+B2IKg*hI~0XJe}
z>gKc!pG$O>hx#3z^f}ya*h_sm4L9NK5@kh>g1N|?$i>=Aw;!Bb&Gv&_Mv&n^C_0eg
zHTi=ZjFhZ0C5wC}QG9OtBBdcrjOcG3T;T-9&>B<x0vGF`6G=enl%i~ki@~wp3UVX`
z0S*G4Dxt*+;1V2DoX@~bnh;>DRLXKuKF&v3bkNdRBCAqE*`G>L0ZvwkD-!Fw(x{N0
z<}=FTN;+hdti;g-Kf_@zfOTR*=y$8)z&auqm6ceA$3o+3E(Y{d@vm?yN-lD+X)Yrp
zq_J>^auysLO;Z0zL`%pH{Tw7Vt!E`1QbA(ow-1$UA4&<VTr8-zc}V1T<+x0A{(|=$
z)AwAd6e5Dmrf5c~Q#}2GT7!hZj4%$N_(c0bh{-UUxpgX+F4?FI+>MZ5sBc^p()0#C
z6&2=$428aUE0uC-3WX)(?tZBD?UI#-q57v$X|)^koK(DduwUQOrpg&gu_g5pn??g0
z<<NknB%du=U{#svFY6OqsU(2ls#&Djd75NrI6BQiO6tZ?vf0!*qrCy_1QL8+F91lX
zELvVnn9ZoqW4$Gdaw$n)Ruj0)xranHij|4$h81@U2pO5>GvFt5D$8b+Cqb}RysPAt
z2gVEsUnzyAWFbql0**WCMp%<Pc)d8oriz0O87Fb}kc;dn+X-vrIGU3%;1Dk*Ik8ou
z+#F!3StAQzOEMHZm2l3A(P^G7;TU4GDaCN7FbB_lXmCUlb{8l@Lk}KWh0++nbJ7B2
zt2`^fke3j-1h^B#mee;X*?c%NEpl88ZV3onJbv!o^KSqnf_Lyyo&!4qXMrM7dR9y=
zqeAN|^6e}qcB)B3XOobXV4_)QThL4spghJ70u8ZZY`*gbj?X}WB;Z8B$mP6?8`d>C
zGT>j8d5*HsQK6Re9i(tk+6D`?8|_`S8P#be+6MiQX1FLPNvz1HU}A;<6;}mneC`jl
zU#J}-$0}U-f1!x$mc0s)(1nAP&KPCNK0G=_N7-x^6SHv0$g4VOE6K#N!<pJ0N>-vb
za-yKRNpW?Lo_{|Gox2~*X#GGL)Y2N%S^C0csbQ5lo9&QdEEE(}d7O_yk#pB*M?|OR
zX%NTIVyy6lUq37B3d4gs>neu9r@^{qFe5GvZ6eEoXXG+Gj1-dg62~r_u9S&FtA#c`
z6HVpNDBz|z^aF@Y9WV{m!q@A8X!codFpQkJZyWYfsdO=D2NQzy|7hVwk{O-hOk<uh
z+W6Vq0U)BQ_n+bFQ<1QK8fP5>bgkjwSt=z35xlSPc#4IFkKrTZtH_`cAwh*049Ft-
zB`Q7*HkyDfWD$DRjFw<CFnHu7?OL2o!8ntHln@n111e>dPOv#if_$WDeooH85TJZ1
zP#jk@m&%H~tkbTfJ$NPbbeQRsUOb!RVPc)tOmtQd!TX^(;P)aJuQtXHmJOjpHFDL9
zq4p%`?XyYsrA~+|>mv^pawRjj#H}XeDIq6DIf|bpup-dy)6kHhTfr0$AN4TGfbuDT
z)6`TOY!v1gKA{Zv_Kp;nNytf`!{v3jZCws~2P;W1`K9K4(@qb5lT&cFBY(Hk?)5r7
z&Xw+V>UZ0{-8P5A<+Pn~x1<pGCh((4|9k|}RZ8<9s*o|a1ct)nqo#@RDbsM!6dnkf
z;Nyd7@_PS>N&PO$gtcu7DWCmJV}84%+taN+dhOby!%Utagm~c>f9|~HHc|nP;jbMI
zUP>xS6(MV$6eYXe?tou+qrY*GA<V^074!LqLyx@G!(7pD<W#($`CJ3Msh4SNh{w10
zF_*#hdi)Dvn1(wJ@9bqRSFblU+~vRJ5M#33L74Ro;peXPGgmSd%q7ekrn!L}-uW`K
z`TgQ~in*%cblWyRvkCV3VIN~5%(|w^YW&C4HZ@*(iDkoj#>6<At&Ew8Mx#uf<EctQ
z9;yrj#XpG+o(C$Ji<muo_B;;Z=$PsEswxn({bk#Lj`@HnIXoVxz1!IsX$&wm8s*UP
zwS7SOST<Tf>2%Keiz)B&&mGQFw(of4MWF1j0LqPvSM&iT(J4>-y)5POLLE{@-g)N5
z63X7K?rz^zK)H~<$)H@SA*6NfJ)lpbY8_C1P_0rvT0r^HZ}(KdB|?7V4L0P(bi2D9
z4zIhbbudz(=JB}?e9XqCODzm-i~#WB$uAWEpZ@MrzXHrOEH6a*@}$2To$dwF*HO=d
zgG>?W-UCON7Q$?9oeYAEyX$Z61HgEEO{t9A>O5;VHvnLGU>g7iwpc*MO`uPJoPy}L
zkWb-%Q2rvuVzK-Hp4;FTe?Ga$S&1?pywemY0Q5L)wr+1%S7V?x;5X9s*WVuK1;F)<
z0O$w6qpt-E0o$r{z~p_o0x5Ubj~xcUuOlGt2~t*RQZAm{@{$60X3jpAy{#8u+jsn}
zw?xgBwKhWQ0Jh_rEdbkIZ({1~A<|w?+P_Rpz-Tg=^i)N*r$t7s$mCCqiiA{kdA;2Z
zo3}9lVqrm6LP75Gz1pj!Dvw|v-~PQGD9Fe=kN1FbmHC4FV%-w*Dc74C=0A9%2l!q<
zzKx4d7YRuCvZ;FHR3E15;>kBPC9A5cYN~3WAp1$v7cRdX;Q9w$Mye9>Bl7#&5lB^1
z{RHIWu}U>nm*5XrkVyIZr*|ul?T^kG1qr_6?gG>GdTm9%^I*n;zO!^7U7+8GlMnV5
zSLD)xHw%2{qbGmy0$?6T%pLnbF2X$VZ@_E~O+k_p@)n})@>%?<ZU9zQgJ?}#0)z|#
z+I4|J18okfaBVdL{;Aet{w08%iT2_97S(G`%oL*89Ui-_%jE?$RYA1W`~LbLMBA}H
zRe<)~RW&7+I<c?ug?tGvFSxJ-KSs1&K65cz%_}<E(bvABqE%{WhK*Xcwr<|g0ciY4
z&_H`-4I#@l-vZU{uC4=2xr}Phj{jIe+i|SR$W2hq>GD9Mu{pp;)oxJ%XxIGVT|`?v
znLxC<%BwLsckF)&RjbG+=kd9Hn4HS>CTKB#K_C4yqAeY`tO#xIJ4bYDIr`crL|a~X
zyD%ZEK($G-sb!o1+T`Gbfkwz5iErJfkcv+O-cd4PR#p3|mXPR~wfAmN60-ips{@y4
z-iaNe%hm02qi8DG8YK}0Rjs|)0Da~M5NVB$gq;sAm@Gd-2w0gEHUryrhCV~f=6--6
zq+8RcM)?5<;wC+02q4@&Lk7eHYww4(9<D_{Y&AWD>lU>+gjjCht3r_DM(P1Xmj}My
z_t+Z;!3WVhYCxVFV^^Se)E`v5<F?-Gk;VMRcaWt<XJNp^??>}jt~WJ2^fEztK0z|U
z($ZT<c5fKS-p6l+@*e}Bz&0+vfl{4rBZZVG71>FKwlo3d&Y|lJ%3ocKeL${#3TR>=
z)`HgN?t$0n9Y}d_-8v-;eP(yRMU$%0sw5vaadgJnLmLkP-R}A%Dy2E2`Hdq;SKy5I
z2aXjuWA*mi(fJ=zblbbXzktrXch~cJU7c=wy9bEk@#&w#Y+_j9)wSz4Y-k1?r*$jh
zG&`WF5h9XztD7O{4=dmW{12ivH8mZ8=Q_^!r@yJ-Ja+F*Mp2>RyBrQjm(7?>5bNfj
z9zP6N%L}aqSP)MM){#@S1z6Ai<CEtB>uto^U4IXXrZ+S~-ou(2odltL<`=PDPwew(
j9n2sbCe*eW=aJ!wfFTqN86;-@7wGYgs)vCls0sC7=qN^9

literal 0
HcmV?d00001


From b6559a9c283fb862f9f79aa8be997403cd95c8f4 Mon Sep 17 00:00:00 2001
From: Anders Leung <aleung@ml6062-aleung.lan>
Date: Tue, 7 Dec 2021 14:49:32 -0500
Subject: [PATCH 13/22] Cleanup various TODOs, remove/fix failing tests

---
 .../java/htsjdk/variant/bcf2/BCF2Codec.java    | 18 +++++++++++++-----
 .../bcf2/BCF2FieldWriter/BCF2FieldEncoder.java | 11 +++++++++++
 .../bcf2/BCF2FieldWriter/BCF2FieldWriter.java  |  3 ++-
 .../BCF2FieldWriterManager.java                | 14 +++++++++++---
 .../variantcontext/writer/BCF2Writer.java      |  1 -
 .../htsjdk/variant/vcf/VCFHeaderVersion.java   | 14 --------------
 .../variant/vcf/VCFStandardHeaderLines.java    |  4 ++++
 .../codecs/variants/vcf/HtsVCFCodecTest.java   |  9 ---------
 .../htsjdk/tribble/index/IndexFactoryTest.java |  1 +
 .../variant/bcf2/BCF2DictionaryTest.java       |  2 +-
 .../BCF2FieldWriter/BCF2FieldEncoderTest.java  |  3 +--
 .../htsjdk/variant/bcf2/BCF2UtilsUnitTest.java |  2 +-
 .../VariantContextTestProvider.java            |  2 --
 .../writer/VCFWriterUnitTest.java              |  2 +-
 .../variant/vcf/VCFHeaderLineUnitTest.java     |  6 ++----
 .../vcf/VCFStandardHeaderLinesUnitTest.java    |  2 +-
 16 files changed, 49 insertions(+), 45 deletions(-)

diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
index 62fcd3ede4..af18454db7 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
@@ -48,6 +48,7 @@
 import htsjdk.variant.vcf.VCFConstants;
 import htsjdk.variant.vcf.VCFHeader;
 import htsjdk.variant.vcf.VCFHeaderLineType;
+import htsjdk.variant.vcf.VCFInfoHeaderLine;
 
 import java.io.BufferedInputStream;
 import java.io.ByteArrayInputStream;
@@ -219,9 +220,8 @@ public FeatureCodecHeader readHeader(final PositionalBufferedStream inputStream)
         }
 
         // TODO should follow up on hts-specs and clarify the relationship between ##dictionary and IDX fields
-        // Error on ##dictionary lines, we don't know what to do with them
         if (this.header.getMetaDataInInputOrder().stream().anyMatch(line -> line.getKey().equals("dictionary"))) {
-            throw new TribbleException("Use of the ##dictionary line is not supported");
+            log.warn("Use of the ##dictionary line is not supported");
         }
 
         // create the contig dictionary
@@ -448,7 +448,7 @@ private void decodeInfo(final VariantContextBuilder builder, final int numInfoFi
         for (int i = 0; i < numInfoFields; i++) {
             final String key = getDictionaryString();
             Object value = decoder.decodeTypedValue();
-            final VCFCompoundHeaderLine metaData = VariantContextUtils.getMetaDataForField(header, key);
+            final VCFInfoHeaderLine metaData = header.getInfoHeaderLine(key);
             if (metaData.getType() == VCFHeaderLineType.Flag) {
                 // Despite contradictory language in the spec, bcftools/htslib encode the "payload" of
                 // FLAG as 0x00 (MISSING type) which we would normally decode as MISSING/null,
@@ -509,7 +509,11 @@ private String getDictionaryString() throws IOException {
     }
 
     protected final String getDictionaryString(final int offset) {
-        return stringDictionary.get(offset);
+        final String s = stringDictionary.get(offset);
+        if (s == null) {
+            error("No entry in the string dictionary matching key: " + offset + " was found");
+        }
+        return s;
     }
 
     private BCF2Dictionary makeStringDictionary(final BCFVersion bcfVersion) {
@@ -530,7 +534,11 @@ private BCF2Dictionary makeStringDictionary(final BCFVersion bcfVersion) {
      * @return
      */
     private String lookupContigName(final int contigOffset) {
-        return contigDictionary.get(contigOffset);
+        final String s = contigDictionary.get(contigOffset);
+        if (s == null) {
+            error("No entry in the contig dictionary matching key: " + contigOffset + " was found");
+        }
+        return s;
     }
 
     private BCF2Dictionary makeContigDictionary(final BCFVersion bcfVersion) {
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java
index 3a6aeae2cb..3d95f4ae5e 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java
@@ -237,6 +237,17 @@ void load(final Object o) {
         void encode() throws IOException {
             for (final Object o : vs) {
                 if (o == null) {
+                    // TODO we encode an entirely missing vector as all EOV, or essentially a 0-length vector
+                    //  padded to the appropriate length with EOV, this encoding is allowed but not required
+                    //  by the spec[1], but bcftools currently does not appear to handle it properly[2],
+                    //  printing such empty vectors in VCF as an empty string and not '.' or '.,.'
+                    //  bcfools encodes empty vectors uniformly as [MISSING, EOV*] which we handle appropriately,
+                    //  and the distinction between partially missing [MISSING, EOV] and fully missing [EOV, EOV]
+                    //  vectors is apparently not required to be preserved by implementations
+                    //  We could either match our output to bcftools' codec or keep it as is, and wait for
+                    //  bcftools to resolve this issue
+                    //  [1] https://github.com/samtools/hts-specs/issues/593#issuecomment-910266633
+                    //  [2] https://github.com/samtools/bcftools/issues/1622
                     encoder.encodePaddingValues(nValues, type);
                 } else if (o instanceof List) {
                     final List<Integer> v = (List<Integer>) o;
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriter.java b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriter.java
index 3b645bf981..dd140b3b3a 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriter.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriter.java
@@ -455,8 +455,9 @@ void encode(final VariantContext vc, final List<String> sampleNames) throws IOEx
                         encoder.encodePaddingValues(padding, type);
                     }
                 } else {
-                    // TODO read the spec more closely, look at htslib, this may not be correct
                     // Entirely missing genotype, which we encode as vector of no call
+                    // These cannot be encoded as MISSING values, because the BCF 2.2 spec explicitly forbids
+                    // any negative values in the GT array and MISSING values are negative
                     for (int i = 0; i < nValues; i++) {
                         encoder.encodeRawInt(0, type);
                     }
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriterManager.java b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriterManager.java
index b73a88036d..02fb3e4cdd 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriterManager.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriterManager.java
@@ -5,6 +5,7 @@
 import htsjdk.variant.bcf2.BCF2Encoder;
 import htsjdk.variant.variantcontext.VariantContext;
 import htsjdk.variant.vcf.VCFCompoundHeaderLine;
+import htsjdk.variant.vcf.VCFConstants;
 import htsjdk.variant.vcf.VCFFormatHeaderLine;
 import htsjdk.variant.vcf.VCFHeader;
 import htsjdk.variant.vcf.VCFHeaderLineCount;
@@ -37,7 +38,14 @@ public BCF2FieldWriterManager(final VCFHeader header, final Map<String, Integer>
         formatWriters = new HashMap<>(header.getFormatHeaderLines().size());
         for (final VCFFormatHeaderLine line : header.getFormatHeaderLines()) {
             final String field = line.getID();
-            validateStandardHeader(line, VCFStandardHeaderLines.getFormatLine(field, false));
+            // We skip validation for the FT key because its line count changed between VCF versions 4.2 and 4.3
+            // from UNBOUNDED to 1, while VCFStandardHeaderLines keeps the 4.2 definition.
+            // This does not matter for our BCF writing code because the concrete BCF count encoded in the typing
+            // bytes for strings always has to be determined by inspecting the strings themselves, so this validation
+            // would only produce noisy but harmless warnings.
+            if (!field.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
+                validateStandardHeader(line, VCFStandardHeaderLines.getFormatLine(field, false));
+            }
             final int offset = dict.get(field);
             final BCF2FieldWriter.GenotypeWriter writer = BCF2FieldWriter.createGenotypeWriter(line, offset, encoder);
             formatWriters.put(field, writer);
@@ -72,7 +80,7 @@ private static <T extends VCFCompoundHeaderLine> void validateStandardHeader(
         final VCFHeaderLineType actualType = actualLine.getType();
         final VCFHeaderLineType expectedType = expectedLine.getType();
         if (actualType != expectedType) {
-            log.error(String.format(
+            log.warn(String.format(
                 "Header with standard key: `%s` has type: %s which does not match standard type: %s",
                 actualLine.getID(),
                 actualType,
@@ -83,7 +91,7 @@ private static <T extends VCFCompoundHeaderLine> void validateStandardHeader(
         final VCFHeaderLineCount actualCountType = actualLine.getCountType();
         final VCFHeaderLineCount expectedCountType = expectedLine.getCountType();
         if (actualCountType != expectedCountType || actualLine.isFixedCount() && actualLine.getCount() != expectedLine.getCount()) {
-            log.error(String.format(
+            log.warn(String.format(
                 "Header with standard key: `%s` has count: %s which does not match standard count: %s",
                 actualLine.getID(),
                 actualLine.isFixedCount() ? actualLine.getCount() : actualCountType,
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
index fd95161be2..ac095d6f83 100644
--- a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
+++ b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
@@ -236,7 +236,6 @@ public void setHeader(final VCFHeader header) {
             throw new IllegalStateException("The header cannot be modified after the header or variants have been written to the output stream.");
         }
 
-        // TODO we default to 2.2 here, is this alright?
         encoder = BCF2Encoder.getEncoder(BCF2Codec.ALLOWED_BCF_VERSION);
 
         // make sure the header is sorted correctly
diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeaderVersion.java b/src/main/java/htsjdk/variant/vcf/VCFHeaderVersion.java
index ce5ed1920a..454d567300 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFHeaderVersion.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFHeaderVersion.java
@@ -136,20 +136,6 @@ public boolean isAtLeastAsRecentAs(final VCFHeaderVersion target) {
         return this.ordinal() >= target.ordinal();
     }
 
-    /**
-     * Determine if two header versions are compatible (header lines from these versions are interchangeable).
-     * For now, the only incompatibility is between V4.3 and any other version. All other version combinations
-     * are compatible.
-     * @param v1 first version to compare
-     * @param v2 scond version to compare
-     * @return true if the versions are compatible
-     */
-    //TODO: this method can be removed once this is rebased on the vcf4.3 writing branch
-    public static boolean versionsAreCompatible(final VCFHeaderVersion v1, final VCFHeaderVersion v2) {
-        return v1.equals(v2) ||
-                (!v1.isAtLeastAsRecentAs(VCF4_3) && !v2.isAtLeastAsRecentAs(VCF4_3));
-    }
-
     public String getVersionString() {
         return versionString;
     }
diff --git a/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java b/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java
index 6dd5f3906f..1032762f0d 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java
@@ -170,6 +170,10 @@ private static void registerStandard(final VCFFormatHeaderLine line) {
         registerStandard(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY,              1,                     VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
         registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY,        VCFHeaderLineCount.G,         VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
         registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFHeaderLineCount.R,         VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed"));
+        // This line's count changed from UNBOUNDED in VCF 4.2 to 1 in VCF 4.3, but we keep it at UNBOUNDED
+        // because VCFStandardHeaderLines is now mainly a facility for upgrading headers from pre-4.2 versions
+        // to conform to the 4.2 spec.
+        // Version upgrading for other versions is more difficult, so we do not rely on VCFStandardHeaderLines
         registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY,    VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String,  "Genotype-level filter"));
         registerStandard(new VCFFormatHeaderLine(VCFConstants.PHASE_SET_KEY,          1,                            VCFHeaderLineType.Integer, "Phasing set (typically the position of the first variant in the set)"));
         registerStandard(new VCFFormatHeaderLine(VCFConstants.PHASE_QUALITY_KEY,      1,                            VCFHeaderLineType.Float,   "Read-backed phasing quality"));
diff --git a/src/test/java/htsjdk/beta/codecs/variants/vcf/HtsVCFCodecTest.java b/src/test/java/htsjdk/beta/codecs/variants/vcf/HtsVCFCodecTest.java
index 2ea873212e..fa5201b754 100644
--- a/src/test/java/htsjdk/beta/codecs/variants/vcf/HtsVCFCodecTest.java
+++ b/src/test/java/htsjdk/beta/codecs/variants/vcf/HtsVCFCodecTest.java
@@ -105,15 +105,6 @@ public void testRoundTripVCFThroughStream(final IOPath inputPath, final HtsVersi
         }
     }
 
-    @Test(expectedExceptions = IllegalArgumentException.class)
-    public void testRejectWritingV43HeaderAsV42()  {
-        // read vcf v4.3 and try to write it to a vcf v4.2 (header is rejected)
-        final IOPath outputPath = IOUtils.createTempPath("rejectWrite43HeaderVCF", ".vcf");
-        readWriteVCFToPath(new HtsPath(VARIANTS_TEST_DIR + "variant/vcf43/all43Features.vcf"),
-                outputPath,
-                VCFCodecV4_3.VCF_V43_VERSION);
-    }
-
     @DataProvider(name="gzipSuffixTests")
     private Object[][] gzipSuffixTests() {
         return new Object[][] {
diff --git a/src/test/java/htsjdk/tribble/index/IndexFactoryTest.java b/src/test/java/htsjdk/tribble/index/IndexFactoryTest.java
index 648f7080cd..77a5902ea0 100644
--- a/src/test/java/htsjdk/tribble/index/IndexFactoryTest.java
+++ b/src/test/java/htsjdk/tribble/index/IndexFactoryTest.java
@@ -198,6 +198,7 @@ public Object[][] getBCFData(){
         return new Object[][] {
             {new File("src/test/resources/htsjdk/variant/serialization_test_unzipped.bcf")},
             // TODO: this needs more test cases, including block compressed and indexed
+            //  The test below, with a bgzipped BCF, fails
 //            {new File("src/test/resources/htsjdk/variant/serialization_test.bcf")},
         };
     }
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2DictionaryTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2DictionaryTest.java
index 9fbf27842e..43b8329993 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCF2DictionaryTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2DictionaryTest.java
@@ -27,7 +27,7 @@ public Object[][] dictionaryProvider() {
 
         final List<VCFHeaderLine> inputLines = new ArrayList<>();
         int counter = 0;
-        inputLines.add(new VCFHeaderLine(VCFHeader.DEFAULT_VCF_VERSION.getFormatString(), VCFHeader.DEFAULT_VCF_VERSION.getVersionString()));
+        inputLines.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
         inputLines.add(new VCFFilterHeaderLine("l" + counter++));
         inputLines.add(new VCFFilterHeaderLine("l" + counter++));
         inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter));
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java
index afb198286e..5946caa8a4 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java
@@ -356,7 +356,6 @@ public static Object[][] genotypeWriterCases() {
                 });
             }
 
-            // TODO revisit this test once the correct behavior is determined
             // Test encoding for a VC entirely missing genotype data
             {
                 final VariantContext vcMissingGenotypes = new VariantContextBuilder()
@@ -371,7 +370,7 @@ public static Object[][] genotypeWriterCases() {
                     .make();
                 final byte[] bytes = new byte[]{
                     0x21, // 2 8-bit ints
-                    (byte) BCF2Type.INT8.getMissingBytes(), (byte) BCF2Type.INT8.getMissingBytes(),
+                    (byte) 0, (byte) 0
                 };
                 cases.add(new Object[]{
                     writer, vcMissingGenotypes,
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
index 5f658bd69b..e07d23cfd9 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
@@ -105,7 +105,7 @@ public Object[][] makeHeaderOrderTestProvider() {
                     if (i >= 0)
                         allLines.remove(i);
                     allLines.addAll(permutation);
-                    allLines.add(new VCFHeaderLine(VCFHeader.DEFAULT_VCF_VERSION.getFormatString(), VCFHeader.DEFAULT_VCF_VERSION.getVersionString()));
+                    allLines.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
                     final VCFHeader testHeader = new VCFHeader(new LinkedHashSet<>(allLines));
                     final boolean expectedConsistent = expectedConsistent(testHeader, inputLineCounter);
                     tests.add(new Object[]{new HeaderOrderTestCase(inputHeader, testHeader, expectedConsistent)});
diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java b/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java
index 34adb5a4b0..91efd8bcf0 100644
--- a/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java
+++ b/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java
@@ -233,7 +233,6 @@ private static void createSyntheticHeader() {
         addHeaderLine(metaData, "PL", VCFHeaderLineCount.G, VCFHeaderLineType.Integer);
         addHeaderLine(metaData, "GS", 2, VCFHeaderLineType.String);
         addHeaderLine(metaData, "GV", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String);
-        // TODO changed count type from UNBOUNDED to 1 to match VCF 4.3 spec, but might conflict with existing htsjdk code
         addHeaderLine(metaData, "FT", 1, VCFHeaderLineType.String);
 
         // prep the header
@@ -674,7 +673,6 @@ public static void testReaderWriterWithMissingGenotypes(final VariantContextIOTe
                         assertEquals(g, expected.getGenotype(g.getSampleName()));
                     } else {
                         // missing
-                        // TODO this may not be correct
                         Assert.assertTrue(g.isNoCall());
                     }
                 }
diff --git a/src/test/java/htsjdk/variant/variantcontext/writer/VCFWriterUnitTest.java b/src/test/java/htsjdk/variant/variantcontext/writer/VCFWriterUnitTest.java
index 4931fd8b09..83376588c3 100644
--- a/src/test/java/htsjdk/variant/variantcontext/writer/VCFWriterUnitTest.java
+++ b/src/test/java/htsjdk/variant/variantcontext/writer/VCFWriterUnitTest.java
@@ -219,7 +219,7 @@ public void testChangeHeaderAfterWritingBody() {
      */
     private static VCFHeader createFakeHeader(final Set<VCFHeaderLine> metaData, final Set<String> additionalColumns,
                                              final SAMSequenceDictionary sequenceDict) {
-        metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_2.getFormatString(), VCFHeaderVersion.VCF4_2.getVersionString()));
+        metaData.add(VCFHeader.makeHeaderVersionLine(VCFHeader.DEFAULT_VCF_VERSION));
         metaData.add(new VCFHeaderLine("two", "2"));
         // Explicitly add GT, AD, and BB keys because the .bcf tests that use this fake header require that the header
         // contain INFO/FORMAT lines for all the attributes written
diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderLineUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderLineUnitTest.java
index 2cd81e7ef9..f02ccd0585 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFHeaderLineUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderLineUnitTest.java
@@ -114,8 +114,7 @@ public Object[][] vcfVersions() {
 
     @Test(dataProvider = "vcfVersions")
     public void testValidateForVersion(final VCFHeaderVersion vcfVersion) {
-        VCFHeaderLine headerLine = new VCFHeaderLine(vcfVersion.getFormatString(), vcfVersion.getVersionString());
-        headerLine.validateForVersion(vcfVersion);
+        VCFHeader.makeHeaderVersionLine(vcfVersion).validateForVersion(vcfVersion);
     }
 
     @DataProvider(name = "incompatibleVersions")
@@ -133,8 +132,7 @@ public Object[][] incompatibleVersionPairs() {
 
     @Test(dataProvider="incompatibleVersions", expectedExceptions= TribbleException.VersionValidationFailure.class)
     public void testValidateForVersionFails(final VCFHeaderVersion vcfVersion, final VCFHeaderVersion incompatibleVersion) {
-        VCFHeaderLine headerLine = new VCFHeaderLine(vcfVersion.getFormatString(), vcfVersion.getVersionString());
-        headerLine.validateForVersion(incompatibleVersion);
+        VCFHeader.makeHeaderVersionLine(vcfVersion).validateForVersion(incompatibleVersion);
     }
 
     @Test(expectedExceptions = { TribbleException.InvalidHeader.class }, expectedExceptionsMessageRegExp = ".*For fixed count, the count number must be 1 or higher.")
diff --git a/src/test/java/htsjdk/variant/vcf/VCFStandardHeaderLinesUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFStandardHeaderLinesUnitTest.java
index 38a8c983f7..c17360a770 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFStandardHeaderLinesUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFStandardHeaderLinesUnitTest.java
@@ -192,7 +192,7 @@ public void testRepairHeaderTest(final RepairHeaderTest cfg) {
         final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>();
         // The standard header line repair facility is not sufficiently powerful to fix broken lines
         // starting from version 4.3, so it is only used for versions <= 4.2, and we use version 4.2 for this test
-        headerLines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_2.getFormatString(), VCFHeaderVersion.VCF4_2.getVersionString()));
+        headerLines.add(VCFHeader.makeHeaderVersionLine(VCFHeaderVersion.VCF4_2));
         headerLines.add(cfg.original);
 
         final VCFHeader toRepair = new VCFHeader(headerLines);

From 44d3f34eeb87ac8e2269b2eceb541fef45c74146 Mon Sep 17 00:00:00 2001
From: Anders Leung <aleung@ml6062-aleung.lan>
Date: Tue, 7 Dec 2021 14:54:35 -0500
Subject: [PATCH 14/22] Fully remove BCF 2.1 encoder and decoder

---
 .../java/htsjdk/variant/bcf2/BCF2Decoder.java | 10 ------
 .../java/htsjdk/variant/bcf2/BCF2Encoder.java | 32 -------------------
 2 files changed, 42 deletions(-)

diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Decoder.java b/src/main/java/htsjdk/variant/bcf2/BCF2Decoder.java
index 1544d9ed6c..db0814839c 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Decoder.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Decoder.java
@@ -47,8 +47,6 @@ private BCF2Decoder() {
 
     public static BCF2Decoder getDecoder(final BCFVersion version) {
         switch (version.getMinorVersion()) {
-            case 1:
-                return new BCF2Decoder.BCF2_1Decoder();
             case 2:
                 return new BCF2Decoder.BCF2_2Decoder();
             default:
@@ -466,14 +464,6 @@ public final byte readTypeDescriptor() throws IOException {
 
     public abstract int getPaddingValue(final BCF2Type type);
 
-    public static class BCF2_1Decoder extends BCF2Decoder {
-
-        @Override
-        public int getPaddingValue(final BCF2Type type) {
-            return type.getMissingBytes();
-        }
-    }
-
     public static class BCF2_2Decoder extends BCF2Decoder {
 
         @Override
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Encoder.java b/src/main/java/htsjdk/variant/bcf2/BCF2Encoder.java
index ae99f8e7e1..55fe4a7a7e 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Encoder.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Encoder.java
@@ -49,8 +49,6 @@ public abstract class BCF2Encoder {
 
     public static BCF2Encoder getEncoder(final BCFVersion version) {
         switch (version.getMinorVersion()) {
-            case 1:
-                return new BCF2_1Encoder();
             case 2:
                 return new BCF2_2Encoder();
             default:
@@ -323,36 +321,6 @@ public final byte[] compactStrings(final String[] strings) {
     //
     // --------------------------------------------------------------------------------
 
-    public static class BCF2_1Encoder extends BCF2Encoder {
-
-        @Override
-        public void encodePaddingValue(final BCF2Type type) throws IOException {
-            type.write(type.getMissingBytes(), encodeStream);
-        }
-
-        @Override
-        public byte[] compactStrings(final List<String> strings) {
-            if (strings.isEmpty()) return new byte[0];
-
-            // 1 comma for each string, then add on individual string lengths
-            int size = strings.size();
-            final byte[][] bytes = new byte[strings.size()][];
-            int i = 0;
-            for (final String s : strings) {
-                final byte[] b = s.getBytes(StandardCharsets.UTF_8);
-                size += b.length;
-                bytes[i++] = b;
-            }
-            final ByteBuffer buff = ByteBuffer.allocate(size);
-            for (final byte[] bs : bytes) {
-                buff.put((byte) ',');
-                buff.put(bs);
-            }
-
-            return buff.array();
-        }
-    }
-
     public static class BCF2_2Encoder extends BCF2Encoder {
 
         @Override

From f79513396e1d7bb344c3957e40e8f5da2d65d5c4 Mon Sep 17 00:00:00 2001
From: Anders Leung <aleung@ml6062-aleung.lan>
Date: Tue, 7 Dec 2021 15:10:32 -0500
Subject: [PATCH 15/22] Tag BCF lazy data with version, only use lazy data in
 BCF2Writer if versions match

---
 .../java/htsjdk/variant/bcf2/BCF2Codec.java   | 12 +++++----
 .../variantcontext/writer/BCF2Writer.java     | 26 +++++++++++++++----
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
index af18454db7..acd5cc3090 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
@@ -481,7 +481,7 @@ private void createLazyGenotypesDecoder(final SitesInfoForDecoding siteInfo,
             final LazyGenotypesContext.LazyParser lazyParser =
                 new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields, builders);
 
-            final LazyData lazyData = new LazyData(header, siteInfo.nFormatFields, decoder.getRecordBytes());
+            final LazyData lazyData = new LazyData(header, siteInfo.nFormatFields, decoder.getRecordBytes(), bcfVersion);
             final LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, lazyData, header.getNGenotypeSamples());
 
             // did we resort the sample names?  If so, we need to load the genotype data
@@ -493,14 +493,16 @@ private void createLazyGenotypesDecoder(final SitesInfoForDecoding siteInfo,
     }
 
     public static class LazyData {
-        final public VCFHeader header;
-        final public int nGenotypeFields;
-        final public byte[] bytes;
+        public final VCFHeader header;
+        public final int nGenotypeFields;
+        public final byte[] bytes;
+        public final BCFVersion version;
 
-        public LazyData(final VCFHeader header, final int nGenotypeFields, final byte[] bytes) {
+        public LazyData(final VCFHeader header, final int nGenotypeFields, final byte[] bytes, final BCFVersion version) {
             this.header = header;
             this.nGenotypeFields = nGenotypeFields;
             this.bytes = bytes;
+            this.version = version;
         }
     }
 
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
index ac095d6f83..c81e9898e1 100644
--- a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
+++ b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
@@ -110,7 +110,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
     private final Map<VariantContext, List<String>> genotypeKeys = new HashMap<>();
 
     private BCF2Encoder encoder; // initialized after the header arrives
-
+    private BCFVersion version;
     private BCF2FieldWriterManager fieldWriterManager;
 
     /**
@@ -194,8 +194,12 @@ public void add(VariantContext vc) {
             // Genotypes data
             final int genotypesLength;
             final BCF2Codec.LazyData lazyData = getLazyData(vc);  // has critical side effects
-            if (lazyData != null) {
-                // we never decoded any data from this BCF file so we don't need to re-encode the samples data
+            final boolean lazyDataUsable = lazyData != null && lazyData.version == this.version;
+            if (lazyDataUsable) {
+                // We never decoded any data from this BCF file, and its contents were already encoded in the same BCF
+                // version as we are currently writing, so we don't need to re-encode the samples data.
+                // Note that the version check is necessary so that we do not write contents encoded using an old
+                // version of BCF as if it were a newer version, as this can cause problems with e.g. MISSING values
                 genotypesLength = lazyData.bytes.length;
             } else {
                 // we have to do work to convert the VC into a BCF2 byte stream
@@ -210,7 +214,7 @@ public void add(VariantContext vc) {
             // Write the encoder's buffer into the output stream
             // If there was no lazy data, this also contains the genotypes data
             encoder.write(outputStream);
-            if (lazyData != null) {
+            if (lazyDataUsable) {
                 // The encoder only contained sites data, so we need to write the lazy data
                 outputStream.write(lazyData.bytes);
             }
@@ -236,7 +240,8 @@ public void setHeader(final VCFHeader header) {
             throw new IllegalStateException("The header cannot be modified after the header or variants have been written to the output stream.");
         }
 
-        encoder = BCF2Encoder.getEncoder(BCF2Codec.ALLOWED_BCF_VERSION);
+        version = getBCFVersionFromHeader(header);
+        encoder = BCF2Encoder.getEncoder(version);
 
         // make sure the header is sorted correctly
         this.header = doNotWriteGenotypes
@@ -273,6 +278,17 @@ public void setHeader(final VCFHeader header) {
         fieldWriterManager = new BCF2FieldWriterManager(header, stringDictionaryMap, encoder);
     }
 
+    /**
+     * Determine the appropriate BCF version to use to encode a VCF with based on the version of its VCF header
+     * Note: currently htsjdk only supports one version of BCF (2.2), but this method is here for if/when
+     * new BCF versions are added.
+     * @param header
+     * @return
+     */
+    private static BCFVersion getBCFVersionFromHeader(final VCFHeader header) {
+        return BCF2Codec.ALLOWED_BCF_VERSION;
+    }
+
     // --------------------------------------------------------------------------------
     //
     // implicit block

From b5b264995d18fc4e794c7c3cb4fba1cda44f37b9 Mon Sep 17 00:00:00 2001
From: Anders Leung <aleung@ml6062-aleung.lan>
Date: Wed, 8 Dec 2021 10:21:37 -0500
Subject: [PATCH 16/22] Fix BCF lazy data version checking and genotype key
 computation

---
 .../java/htsjdk/variant/variantcontext/writer/BCF2Writer.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
index c81e9898e1..38e012fb2a 100644
--- a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
+++ b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
@@ -194,7 +194,7 @@ public void add(VariantContext vc) {
             // Genotypes data
             final int genotypesLength;
             final BCF2Codec.LazyData lazyData = getLazyData(vc);  // has critical side effects
-            final boolean lazyDataUsable = lazyData != null && lazyData.version == this.version;
+            final boolean lazyDataUsable = lazyData != null && lazyData.version.equals(this.version);
             if (lazyDataUsable) {
                 // We never decoded any data from this BCF file, and its contents were already encoded in the same BCF
                 // version as we are currently writing, so we don't need to re-encode the samples data.
@@ -423,7 +423,7 @@ private void buildInfo(final VariantContext vc) throws IOException {
     }
 
     private void buildSamplesData(final VariantContext vc) throws IOException {
-        fieldWriterManager.writeFormat(vc, genotypeKeys.get(vc));
+        fieldWriterManager.writeFormat(vc, genotypeKeys.computeIfAbsent(vc, v -> v.calcVCFGenotypeKeys(header)));
     }
 
     // --------------------------------------------------------------------------------

From 65cd7f77f81252b2a959f1e41a33e250014967fe Mon Sep 17 00:00:00 2001
From: Anders Leung <aleung@ml6062-aleung.lan>
Date: Thu, 9 Dec 2021 17:11:19 -0500
Subject: [PATCH 17/22] Match bcftools behavior when writing empty vectors

---
 .../java/htsjdk/variant/bcf2/BCF2Encoder.java | 19 ++--
 .../BCF2FieldWriter/BCF2FieldEncoder.java     | 92 +++++++++++--------
 .../java/htsjdk/variant/VariantBaseTest.java  |  2 +-
 .../BCF2FieldWriter/BCF2FieldEncoderTest.java |  8 +-
 .../variant/bcf2/BCF2WriterUnitTest.java      |  5 +-
 5 files changed, 68 insertions(+), 58 deletions(-)

diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Encoder.java b/src/main/java/htsjdk/variant/bcf2/BCF2Encoder.java
index 55fe4a7a7e..0eccbf3711 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Encoder.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Encoder.java
@@ -251,10 +251,14 @@ public final void encodeRawString(final byte[] s, final int paddedSize) {
     }
 
     public final void encodeRawVecInt(final int[] vs, final int paddedSize, final BCF2Type type) throws IOException {
+        encodeRawVecInt(vs, type);
+        encodePaddingValues(paddedSize - vs.length, type);
+    }
+
+    public final void encodeRawVecInt(final int[] vs, final BCF2Type type) throws IOException {
         for (final int v : vs) {
             type.write(v, encodeStream);
         }
-        encodePaddingValues(paddedSize - vs.length, type);
     }
 
     public final void encodeRawVecInt(final List<Integer> vs, final BCF2Type type) throws IOException {
@@ -267,16 +271,10 @@ public final void encodeRawVecInt(final List<Integer> vs, final BCF2Type type) t
         }
     }
 
-    public final void encodeRawVecInt(final List<Integer> vs, final int paddedSize, final BCF2Type type) throws IOException {
-        encodeRawVecInt(vs, type);
-        encodePaddingValues(paddedSize - vs.size(), type);
-    }
-
-    public final void encodeRawVecFloat(final double[] vs, final int paddedSize) throws IOException {
+    public final void encodeRawVecFloat(final double[] vs) throws IOException {
         for (final double v : vs) {
             encodeRawFloat(v);
         }
-        encodePaddingValues(paddedSize - vs.length, BCF2Type.FLOAT);
     }
 
     public final void encodeRawVecFloat(final List<Double> vs) throws IOException {
@@ -289,11 +287,6 @@ public final void encodeRawVecFloat(final List<Double> vs) throws IOException {
         }
     }
 
-    public final void encodeRawVecFloat(final List<Double> vs, final int paddedSize) throws IOException {
-        encodeRawVecFloat(vs);
-        encodePaddingValues(paddedSize - vs.size(), BCF2Type.FLOAT);
-    }
-
     public final void encodePaddingValues(final int size, final BCF2Type type) throws IOException {
         for (int i = 0; i < size; i++) {
             encodePaddingValue(type);
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java
index 3d95f4ae5e..1c252d80ec 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java
@@ -235,30 +235,33 @@ void load(final Object o) {
 
         @Override
         void encode() throws IOException {
-            for (final Object o : vs) {
-                if (o == null) {
-                    // TODO we encode an entirely missing vector as all EOV, or essentially a 0-length vector
-                    //  padded to the appropriate length with EOV, this encoding is allowed but not required
-                    //  by the spec[1], but bcftools currently does not appear to handle it properly[2],
-                    //  printing such empty vectors in VCF as an empty string and not '.' or '.,.'
-                    //  bcfools encodes empty vectors uniformly as [MISSING, EOV*] which we handle appropriately,
-                    //  and the distinction between partially missing [MISSING, EOV] and fully missing [EOV, EOV]
-                    //  vectors is apparently not required to be preserved by implementations
-                    //  We could either match our output to bcftools' codec or keep it as is, and wait for
-                    //  bcftools to resolve this issue
-                    //  [1] https://github.com/samtools/hts-specs/issues/593#issuecomment-910266633
-                    //  [2] https://github.com/samtools/bcftools/issues/1622
-                    encoder.encodePaddingValues(nValues, type);
-                } else if (o instanceof List) {
-                    final List<Integer> v = (List<Integer>) o;
-                    encoder.encodeRawVecInt(v, nValues, type);
-                } else if (o instanceof Integer) {
-                    final Integer v = (Integer) o;
-                    encoder.encodeRawInt(v, type);
-                    encoder.encodePaddingValues(nValues - 1, type);
-                } else if (o instanceof int[]) {
-                    final int[] v = (int[]) o;
-                    encoder.encodeRawVecInt(v, nValues, type);
+            if (nValues > 0) {
+                for (final Object o : vs) {
+                    final int valuesWritten;
+                    if (o == null) {
+                        valuesWritten = 0;
+                    } else if (o instanceof List) {
+                        final List<Integer> v = (List<Integer>) o;
+                        encoder.encodeRawVecInt(v, type);
+                        valuesWritten = v.size();
+                    } else if (o instanceof Integer) {
+                        final Integer v = (Integer) o;
+                        encoder.encodeRawInt(v, type);
+                        valuesWritten = 1;
+                    } else if (o instanceof int[]) {
+                        final int[] v = (int[]) o;
+                        encoder.encodeRawVecInt(v, type);
+                        valuesWritten = v.length;
+                    } else {
+                        throw new TribbleException("");
+                    }
+                    // In order to produce output that bcftools can interpret, we always write one MISSING
+                    // value even if the input is entirely absent, which we would otherwise write as a vector of
+                    // all EOV values
+                    if (valuesWritten == 0) {
+                        encoder.encodeRawMissingValue(type);
+                    }
+                    encoder.encodePaddingValues(nValues - Math.max(valuesWritten, 1), type);
                 }
             }
             vs.clear();
@@ -298,19 +301,34 @@ void load(final Object o) {
 
         @Override
         void encode() throws IOException {
-            for (final Object o : vs) {
-                if (o == null) {
-                    encoder.encodePaddingValues(nValues, type);
-                } else if (o instanceof List) {
-                    final List<Double> v = (List<Double>) o;
-                    encoder.encodeRawVecFloat(v, nValues);
-                } else if (o instanceof Double) {
-                    final Double v = (Double) o;
-                    encoder.encodeRawFloat(v);
-                    encoder.encodePaddingValues(nValues - 1, BCF2Type.FLOAT);
-                } else if (o instanceof double[]) {
-                    final double[] v = (double[]) o;
-                    encoder.encodeRawVecFloat(v, nValues);
+            if (nValues > 0) {
+                for (final Object o : vs) {
+                    final int valuesWritten;
+                    if (o == null) {
+                        valuesWritten = 0;
+                    } else if (o instanceof List) {
+                        final List<Double> v = (List<Double>) o;
+                        encoder.encodeRawVecFloat(v);
+                        valuesWritten = v.size();
+                    } else if (o instanceof Double) {
+                        final Double v = (Double) o;
+                        encoder.encodeRawFloat(v);
+                        valuesWritten = 1;
+                    } else if (o instanceof double[]) {
+                        final double[] v = (double[]) o;
+                        encoder.encodeRawVecFloat(v);
+                        valuesWritten = v.length;
+                    } else {
+                        throw new TribbleException("");
+                    }
+
+                    // In order to produce output that bcftools can interpret, we always write one MISSING
+                    // value even if the input is entirely absent, which we would otherwise write as a vector of
+                    // all EOV values
+                    if (valuesWritten == 0) {
+                        encoder.encodeRawMissingValue(type);
+                    }
+                    encoder.encodePaddingValues(nValues - Math.max(valuesWritten, 1), BCF2Type.FLOAT);
                 }
             }
             vs.clear();
diff --git a/src/test/java/htsjdk/variant/VariantBaseTest.java b/src/test/java/htsjdk/variant/VariantBaseTest.java
index 749ffe69e9..58e6cef658 100644
--- a/src/test/java/htsjdk/variant/VariantBaseTest.java
+++ b/src/test/java/htsjdk/variant/VariantBaseTest.java
@@ -254,7 +254,7 @@ private static void assertAttributesEquals(final Map<String, Object> actual, Map
             }
             else {
                 // it's ok to have a binding in x -> null that's absent in y
-                Assert.assertNull(actualValue, act.getKey() + " present in one but not in the other");
+                Assert.assertTrue(isMissingAttribute(actualValue), act.getKey() + " present in one but not in the other");
             }
             expectedKeys.remove(act.getKey());
         }
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java
index 5946caa8a4..2903fc4a63 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java
@@ -122,7 +122,7 @@ public static Object[][] fieldEncoderCases() {
                 final List<Object> vecsToEncode = Arrays.asList(
                     Arrays.asList(null, 1),  // Internal null should be missing bytes, not EOV
                     new int[]{1},            // Short vector should be EOV padded
-                    null,                    // Entirely missing vector should be all EOV
+                    null,                    // Entirely missing vector should start with one MISSING, then be EOV padded
                     1 << (byteWidth * 8 - 2) // Atomic value should be treated as vector of size 1
                 );
                 final int nValues = 2;
@@ -130,7 +130,7 @@ public static Object[][] fieldEncoderCases() {
                 final int[] ints = new int[]{
                     intType.getMissingBytes(), 1,
                     1, intType.getEOVBytes(),
-                    intType.getEOVBytes(), intType.getEOVBytes(),
+                    intType.getMissingBytes(), intType.getEOVBytes(),
                     1 << (byteWidth * 8 - 2), intType.getEOVBytes(),
                 };
                 for (final int i : ints) {
@@ -152,7 +152,7 @@ public static Object[][] fieldEncoderCases() {
             final List<Object> vecsToEncode = Arrays.asList(
                 Arrays.asList(null, 1.0), // Internal null should be missing bytes, not EOV
                 new double[]{1.0},        // Short vector should be EOV padded
-                null,                     // Entirely missing vector should be all EOV
+                null,                     // Entirely missing vector should start with one MISSING, then be EOV padded
                 Double.NaN                // Atomic value should be treated as vector of size 1
             );
             final int nValues = 2;
@@ -160,7 +160,7 @@ public static Object[][] fieldEncoderCases() {
             final int[] ints = new int[]{
                 BCF2Type.FLOAT.getMissingBytes(), Float.floatToRawIntBits(1.0f),
                 Float.floatToRawIntBits(1.0f), BCF2Type.FLOAT.getEOVBytes(),
-                BCF2Type.FLOAT.getEOVBytes(), BCF2Type.FLOAT.getEOVBytes(),
+                BCF2Type.FLOAT.getMissingBytes(), BCF2Type.FLOAT.getEOVBytes(),
                 Float.floatToRawIntBits((float) Double.NaN), BCF2Type.FLOAT.getEOVBytes(),
             };
             for (final int i : ints) {
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java
index 83258c54ae..95e5ce65dd 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java
@@ -351,9 +351,8 @@ public Object[][] bcftoolsReadsHtsjdkOutputProvider() {
             {"NA12891.vcf"},
             {"NA12891.fp.vcf"},
             {"structuralvariants.vcf"},
-            // These two tests appear to fail because of a bcftools bug
-//            {"ex2.vcf"},
-//            {"test.vcf.bgz"},
+            {"ex2.vcf"},
+            {"test.vcf.bgz"},
             {"vcf43/all43Features.utf8.vcf"}
         };
     }

From 56a07db63112df343e5377aaddede74a9f9b0ab3 Mon Sep 17 00:00:00 2001
From: Anders Leung <aleung@ml6062-aleung.lan>
Date: Fri, 10 Dec 2021 13:29:49 -0500
Subject: [PATCH 18/22] Fix spotbugs warning

---
 src/main/java/htsjdk/variant/bcf2/BCF2Codec.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
index acd5cc3090..7409a48e77 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
@@ -69,7 +69,7 @@
 public class BCF2Codec extends BinaryFeatureCodec<VariantContext> {
     private static final Log log = Log.getInstance(BCF2Codec.class);
 
-    public static String IDXField = "IDX"; // BCF2.2 IDX field name
+    public static final String IDXField = "IDX"; // BCF2.2 IDX field name
 
     protected final static int ALLOWED_MAJOR_VERSION = 2;
     protected final static int ALLOWED_MINOR_VERSION = 2;

From e917a3e841d04a7f695b4ba2b7c3493c25978f43 Mon Sep 17 00:00:00 2001
From: Anders Leung <aleung@ml6062-aleung.lan>
Date: Fri, 10 Dec 2021 15:16:16 -0500
Subject: [PATCH 19/22] Clean up BCF2Encoder, better error in BCF2FieldEncoder

---
 src/main/java/htsjdk/variant/bcf2/BCF2Encoder.java     | 10 ----------
 .../variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java |  4 ++--
 .../variant/bcf2/BCF2EncoderDecoderUnitTest.java       |  9 +++++++--
 3 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Encoder.java b/src/main/java/htsjdk/variant/bcf2/BCF2Encoder.java
index 0eccbf3711..335fc2e7f2 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Encoder.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Encoder.java
@@ -153,16 +153,6 @@ public final void encodeTypedVecInt(final int[] vs, final int paddedSize) throws
         encodeRawVecInt(vs, paddedSize, type);
     }
 
-    // TODO only used in testing, should remove and update tests
-    public final void encodeTyped(final List<?> v, final BCF2Type type) throws IOException {
-        if (type == BCF2Type.CHAR && !v.isEmpty()) {
-            encodeTypedString(compactStrings((List<String>) v));
-        } else {
-            encodeType(v.size(), type);
-            encodeRawValues(v, type);
-        }
-    }
-
 
     // --------------------------------------------------------------------------------
     //
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java
index 1c252d80ec..c7bf8bbdba 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java
@@ -253,7 +253,7 @@ void encode() throws IOException {
                         encoder.encodeRawVecInt(v, type);
                         valuesWritten = v.length;
                     } else {
-                        throw new TribbleException("");
+                        throw BCF2FieldEncoder.incompatibleType(o, type);
                     }
                     // In order to produce output that bcftools can interpret, we always write one MISSING
                     // value even if the input is entirely absent, which we would otherwise write as a vector of
@@ -319,7 +319,7 @@ void encode() throws IOException {
                         encoder.encodeRawVecFloat(v);
                         valuesWritten = v.length;
                     } else {
-                        throw new TribbleException("");
+                        throw BCF2FieldEncoder.incompatibleType(o, type);
                     }
 
                     // In order to produce output that bcftools can interpret, we always write one MISSING
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2EncoderDecoderUnitTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2EncoderDecoderUnitTest.java
index 050931444b..5d888f76fd 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCF2EncoderDecoderUnitTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2EncoderDecoderUnitTest.java
@@ -244,8 +244,13 @@ public void testBCF2EncodingVectors(final List<BCF2TypedValue> toEncode, final B
         for (final BCF2TypedValue tv : toEncode) {
             for (final int length : Arrays.asList(2, 5, 10, 15, 20, 25)) {
                 final BCF2Encoder encoder = BCF2Encoder.getEncoder(version);
-                final List<Object> expected = Collections.nCopies(length, tv.value);
-                encoder.encodeTyped(expected, tv.type);
+                final List<?> expected = Collections.nCopies(length, tv.value);
+                if (tv.type == BCF2Type.CHAR && !expected.isEmpty()) {
+                    encoder.encodeTypedString(encoder.compactStrings((List<String>) expected));
+                } else {
+                    encoder.encodeType(expected.size(), tv.type);
+                    encoder.encodeRawValues(expected, tv.type);
+                }
 
                 final BCF2Decoder decoder = BCF2Decoder.getDecoder(version, encoder.getRecordBytes());
                 final Object decoded = decoder.decodeTypedValue();

From 4af39399433b25fedc4d02f11e806a9273c48307 Mon Sep 17 00:00:00 2001
From: Anders Leung <aleung@ml6062-aleung.lan>
Date: Tue, 14 Dec 2021 14:39:14 -0500
Subject: [PATCH 20/22] Cleanup in BCF code

---
 .../java/htsjdk/variant/bcf2/BCF2Codec.java    | 11 ++---------
 .../java/htsjdk/variant/bcf2/BCF2Decoder.java  | 18 ++++++++----------
 .../bcf2/BCF2FieldWriter/BCF2FieldWriter.java  |  1 -
 .../variantcontext/writer/BCF2Writer.java      | 10 +++++-----
 .../BCF2FieldWriter/BCF2FieldEncoderTest.java  |  2 +-
 5 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
index 7409a48e77..04adb8cbf1 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
@@ -25,7 +25,6 @@
 
 package htsjdk.variant.bcf2;
 
-import htsjdk.samtools.BAMIndexer;
 import htsjdk.samtools.util.IOUtil;
 import htsjdk.samtools.util.Log;
 import htsjdk.tribble.BinaryFeatureCodec;
@@ -36,16 +35,12 @@
 import htsjdk.tribble.readers.LineIteratorImpl;
 import htsjdk.tribble.readers.PositionalBufferedStream;
 import htsjdk.tribble.readers.SynchronousLineReader;
-import htsjdk.variant.utils.GeneralUtils;
 import htsjdk.variant.variantcontext.Allele;
 import htsjdk.variant.variantcontext.GenotypeBuilder;
 import htsjdk.variant.variantcontext.LazyGenotypesContext;
 import htsjdk.variant.variantcontext.VariantContext;
 import htsjdk.variant.variantcontext.VariantContextBuilder;
-import htsjdk.variant.variantcontext.VariantContextUtils;
 import htsjdk.variant.vcf.VCFCodec;
-import htsjdk.variant.vcf.VCFCompoundHeaderLine;
-import htsjdk.variant.vcf.VCFConstants;
 import htsjdk.variant.vcf.VCFHeader;
 import htsjdk.variant.vcf.VCFHeaderLineType;
 import htsjdk.variant.vcf.VCFInfoHeaderLine;
@@ -54,10 +49,8 @@
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -141,8 +134,8 @@ public VariantContext decode(final PositionalBufferedStream inputStream) {
             recordNo++;
             final VariantContextBuilder builder = new VariantContextBuilder();
 
-            final int sitesBlockSize = decoder.readBlockSize(inputStream);
-            final int genotypeBlockSize = decoder.readBlockSize(inputStream);
+            final int sitesBlockSize = BCF2Decoder.readBlockSize(inputStream);
+            final int genotypeBlockSize = BCF2Decoder.readBlockSize(inputStream);
 
             decoder.readNextBlock(sitesBlockSize, inputStream);
             decodeSiteLoc(builder);
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Decoder.java b/src/main/java/htsjdk/variant/bcf2/BCF2Decoder.java
index db0814839c..e88db1e115 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Decoder.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Decoder.java
@@ -375,7 +375,7 @@ public final int[] decodeIntArray(final byte typeDescriptor, final int size) thr
         return decodeIntArray(size, type, null);
     }
 
-    private double rawFloatToFloat(final int rawFloat) {
+    private static double rawFloatToFloat(final int rawFloat) {
         return Float.intBitsToFloat(rawFloat);
     }
 
@@ -391,7 +391,7 @@ private double rawFloatToFloat(final int rawFloat) {
      * @param inputStream
      * @return
      */
-    public final int readBlockSize(final InputStream inputStream) throws IOException {
+    public static int readBlockSize(final InputStream inputStream) throws IOException {
         return BCF2Type.INT32.read(inputStream);
     }
 
@@ -410,21 +410,19 @@ private static byte[] readRecordBytes(final int blockSizeInBytes, final InputStr
         final byte[] record = new byte[blockSizeInBytes];
         try {
             int bytesRead = 0;
-            final int nReadAttempts = 0; // keep track of how many times we've read
+            int nReadAttempts = 0; // keep track of how many times we've read
 
             // because we might not read enough bytes from the file in a single go, do it in a loop until we get EOF
             while (bytesRead < blockSizeInBytes) {
                 final int read1 = inputStream.read(record, bytesRead, blockSizeInBytes - bytesRead);
-                if (read1 == -1)
+                nReadAttempts++;
+                if (read1 == -1) {
                     validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes);
+                    break;
+                }
                 else
                     bytesRead += read1;
             }
-
-            if (GeneralUtils.DEBUG_MODE_ENABLED && nReadAttempts > 1) { // TODO -- remove me
-                System.err.println("Required multiple read attempts to actually get the entire BCF2 block, unexpected behavior");
-            }
-
             validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes);
         } catch (final IOException e) {
             throw new TribbleException("I/O error while reading BCF2 file", e);
@@ -445,7 +443,7 @@ private static void validateReadBytes(final int actuallyRead, final int nReadAtt
 
         if (actuallyRead < expected) {
             throw new TribbleException(
-                String.format("Failed to read next complete record: expected %d bytes but read only %d after %d iterations",
+                String.format("Failed to read next complete record: expected %d bytes but read only %d after %d read attempts",
                     expected, actuallyRead, nReadAttempts));
         }
     }
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriter.java b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriter.java
index dd140b3b3a..d91b706681 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriter.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldWriter.java
@@ -179,7 +179,6 @@ void encode(final VariantContext vc) throws IOException {
     // TODO in the genotype writers, a missing genotype (one where variantContext.getGenotype(sampleName) == null)
     //  is treated like one where all its attributes/inline fields are missing, this matches the behavior
     //  of the old writer, which previously created a new empty Genotype object for each missing genotypes, is this right?
-    //  For example, should the FT string of a missing genotype be PASS or a padded empty string
 
     /**
      * Class that writes one field specified by a {@link VCFFormatHeaderLine}
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
index 38e012fb2a..517d5eeb3d 100644
--- a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
+++ b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
@@ -27,8 +27,8 @@
 
 import htsjdk.samtools.SAMSequenceDictionary;
 import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.Log;
 import htsjdk.samtools.util.RuntimeIOException;
-import htsjdk.tribble.TribbleException;
 import htsjdk.tribble.index.IndexCreator;
 import htsjdk.variant.bcf2.BCF2Codec;
 import htsjdk.variant.bcf2.BCF2Dictionary;
@@ -95,6 +95,8 @@
  * @since 06/12
  */
 class BCF2Writer extends IndexingVariantContextWriter {
+    private static final Log log = Log.getInstance(BCF2Writer.class);
+
     public static final int MAJOR_VERSION = 2;
     public static final int MINOR_VERSION = 2;
 
@@ -251,15 +253,13 @@ public void setHeader(final VCFHeader header) {
         // TODO should follow up on hts-specs and clarify the relationship between ##dictionary and IDX fields
         // Error on ##dictionary lines, we don't know what to do with them
         if (this.header.getMetaDataInInputOrder().stream().anyMatch(line -> line.getKey().equals("dictionary"))) {
-            throw new TribbleException("Use of the ##dictionary line is not supported");
+            log.warn("Use of the ##dictionary line is not supported");
         }
 
         // create the config offsets map
         if (this.header.getContigLines().isEmpty()) {
             if (ALLOW_MISSING_CONTIG_LINES) {
-                if (GeneralUtils.DEBUG_MODE_ENABLED) {
-                    System.err.println("No contig dictionary found in header, falling back to reference sequence dictionary");
-                }
+                log.debug("No contig dictionary found in header, falling back to reference sequence dictionary");
                 // The reference sequence dictionary should never contain IDX fields
                 createContigDictionary(VCFUtils.makeContigHeaderLines(getRefDict(), null));
             } else {
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java
index 2903fc4a63..6b72308c54 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java
@@ -250,7 +250,7 @@ public void testSiteWriters(
         final VariantContext vc,
         final byte[] expectedBytes
     ) throws IOException {
-        // Skip starting so we don't get key in output
+        // Skip writing key so that we don't get key in output
         writer.encode(vc);
         Assert.assertEquals(expectedBytes, ENCODER.getRecordBytes());
     }

From a991f7e77f54f79e1aca9dfe9528aaa21f76f2a9 Mon Sep 17 00:00:00 2001
From: Anders Leung <aleung@ml6062-aleung.lan>
Date: Wed, 22 Dec 2021 12:37:06 -0500
Subject: [PATCH 21/22] Add disabled test for missing Character and String VCF
 types

---
 .../BCF2FieldWriter/BCF2FieldEncoder.java     | 21 +++++++++++++++++--
 .../bcf2/BCF2GenotypeFieldDecoders.java       |  8 +++++++
 .../java/htsjdk/variant/bcf2/BCF2Type.java    |  8 +++----
 .../BCF2FieldWriter/BCF2FieldEncoderTest.java |  2 ++
 .../variant/bcf2/BCF2WriterUnitTest.java      |  8 ++++++-
 .../variant/missingStringAndCharacterTest.vcf | 17 +++++++++++++++
 6 files changed, 56 insertions(+), 8 deletions(-)
 create mode 100644 src/test/resources/htsjdk/variant/missingStringAndCharacterTest.vcf

diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java
index c7bf8bbdba..546fd3eff0 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoder.java
@@ -8,6 +8,7 @@
 import htsjdk.variant.vcf.VCFCompoundHeaderLine;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
@@ -109,6 +110,8 @@ void encode() throws IOException {
 
     static class CharFieldEncoder extends BCF2FieldEncoder {
 
+        // TODO see https://github.com/samtools/hts-specs/issues/618
+        // private static final byte[] MISSING = new byte[] {(byte) BCF2Type.CHAR.getMissingBytes()};
         private static final byte[] EMPTY = new byte[0];
 
         private final List<byte[]> vs = new ArrayList<>();
@@ -127,6 +130,20 @@ void load(final Object o) {
                 final byte[] b = ((String) o).getBytes(StandardCharsets.UTF_8);
                 nValues = Math.max(nValues, b.length);
                 vs.add(b);
+            } else if (o instanceof List) {
+                final List<String> strings = (List<String>) o;
+                nValues = Math.max(nValues, strings.size());
+                final ByteBuffer buff = ByteBuffer.allocate(strings.size());
+                for (final String s : strings) {
+                    if (s == null) {
+                        buff.put((byte) type.getMissingBytes());
+                    } else if (s.length() > 1) {
+                        throw new TribbleException("Value of VCF type Character is a string with more than 1 character: " + s);
+                    } else {
+                        buff.put(s.getBytes(StandardCharsets.UTF_8)[0]);
+                    }
+                }
+                vs.add(buff.array());
             } else {
                 throw BCF2FieldEncoder.incompatibleType(o, type);
             }
@@ -337,7 +354,7 @@ void encode() throws IOException {
     }
 
     static TribbleException incompatibleType(final Object o, final BCF2Type type) {
-        final String error = "Could not write object: %s whose type is incompatible with declared header of type: %s";
-        return new TribbleException(String.format(error, o, type));
+        final String error = "Could not write object: %s whose type %s is incompatible with declared header of type: %s";
+        return new TribbleException(String.format(error, o, o.getClass().getSimpleName(), type));
     }
 }
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2GenotypeFieldDecoders.java b/src/main/java/htsjdk/variant/bcf2/BCF2GenotypeFieldDecoders.java
index 173e095687..34d49546c1 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2GenotypeFieldDecoders.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2GenotypeFieldDecoders.java
@@ -257,6 +257,14 @@ public void decode(final List<Allele> siteAlleles, final String field, final BCF
             for (final GenotypeBuilder gb : gbs) {
                 final Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
                 if (value == null) continue;
+                // TODO see https://github.com/samtools/hts-specs/issues/618
+                //  Although it seems like a very rare corner case, this decoder cannot distinguish between
+                //  a vector of Character and a String, which are different VCF types but identical in BCF,
+                //  which should be decoded differently as Java objects
+                //  as List<String> chars = Arrays.asList("a", "b", "c") vs String str = new String("abc")
+                //  We would need the associated header line for each key to inspect its VCF type like we do in the
+                //  BCF writer. This would require a rewrite of this class, which would be desirable either way
+                //  so we can do stricter validation of the number and type of attributes being deserialized
                 if (value instanceof List && ((List) value).size() == 1) {
                     // TODO not sure what this refers to, htsjdk itself doesn't make any assumptions about
                     //  the concrete type of the data contained in the attributes map.
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Type.java b/src/main/java/htsjdk/variant/bcf2/BCF2Type.java
index 89610c7569..ae6f6ed90f 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Type.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Type.java
@@ -80,7 +80,6 @@ public int read(final InputStream in) throws IOException {
 
         @Override
         public void write(final int value, final OutputStream out) throws IOException {
-            // TODO -- optimization -- should we put this in a local buffer?
             out.write(value);
             out.write(value >> 8);
         }
@@ -117,10 +116,9 @@ public void write(final int value, final OutputStream out) throws IOException {
         }
     },
 
-    // CHAR isn't given a MISSING or EOV value in the spec, but for the purposes of
-    // padding strings (i.e. variable length vectors of chars), it is treated as if
-    // '\0' or NULL is both the MISSING and EOV value of CHAR
-    CHAR(7, 1, 0x00000000) {
+    // TODO uncertain as to the correct MISSING and EOV representations of Character/String
+    //  see https://github.com/samtools/hts-specs/issues/618
+    CHAR(7, 1, 0x07, 0x00, 0, 0) {
         @Override
         public int read(final InputStream in) throws IOException {
             return INT8.read(in);
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java
index 6b72308c54..7c5583c99f 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2FieldWriter/BCF2FieldEncoderTest.java
@@ -80,6 +80,7 @@ public static Object[][] fieldEncoderCases() {
         }
 
         // Char encoding
+        // TODO see https://github.com/samtools/hts-specs/issues/618
         {
             final List<Object> stringsToEncode = Arrays.asList("str", null, "\0a\0");
             final int maxByteWidth = stringsToEncode
@@ -100,6 +101,7 @@ public static Object[][] fieldEncoderCases() {
         }
 
         // String encoding
+        // TODO see https://github.com/samtools/hts-specs/issues/618
         {
             final List<Object> stringsToEncode = Arrays.asList("st", null, Arrays.asList("a", "b"), new String[]{"a", "b"});
             final byte[] bytes = new byte[]{
diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java
index 95e5ce65dd..7256c9c967 100644
--- a/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java
+++ b/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java
@@ -353,7 +353,9 @@ public Object[][] bcftoolsReadsHtsjdkOutputProvider() {
             {"structuralvariants.vcf"},
             {"ex2.vcf"},
             {"test.vcf.bgz"},
-            {"vcf43/all43Features.utf8.vcf"}
+            {"vcf43/all43Features.utf8.vcf"},
+            // This test fails because the BCF decoder cannot distinguish between a vector of Characters and a String
+//            {"missingStringAndCharacterTest.vcf"},
         };
     }
 
@@ -410,6 +412,10 @@ public Object[][] htsjdkReadsBCFToolsOutputProvider() {
             {"structuralvariants.vcf"},
             {"ex2.vcf"},
             {"test.vcf.bgz"},
+            // TODO bcftools does not convert '.' into the MISSING value for Character (0x07),
+            //  but writes it out as literal '.' which causes this test to fail when we compare '.' against null,
+            //  see https://github.com/samtools/hts-specs/issues/618
+//            {"missingStringAndCharacterTest.vcf"},
             // bcftools does not to decoding of percent encoded VCFs, so its BCF output contains the literal characters
 //            {"vcf43/all43Features.utf8.vcf"}
         };
diff --git a/src/test/resources/htsjdk/variant/missingStringAndCharacterTest.vcf b/src/test/resources/htsjdk/variant/missingStringAndCharacterTest.vcf
new file mode 100644
index 0000000000..eb9b8d0b7e
--- /dev/null
+++ b/src/test/resources/htsjdk/variant/missingStringAndCharacterTest.vcf
@@ -0,0 +1,17 @@
+##fileformat=VCFv4.3
+##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens">
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=CHAR,Number=3,Type=Character,Description="Test Character INFO key">
+##INFO=<ID=STR,Number=1,Type=String,Description="Test String INFO key">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+##FORMAT=<ID=CHAR,Number=3,Type=Character,Description="Test Character FORMAT key">
+##FORMAT=<ID=STR,Number=1,Type=String,Description="Test String FORMAT key">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+20	17330	.	T	A	.	PASS	NS=3;DP=11;AF=0.017;CHAR=.,b,c;STR=.	GT:GQ:DP:HQ:CHAR:STR	0|0:49:3:58,50:a,b,c:abc	0|1:3:5:65,3:.,.,c:c	0/0:41:3:4,5:.:.
+20	1110696	rs6040355	A	G,T	67	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;CHAR=.;STR=.	GT:GQ:DP:HQ:CHAR:STR	1|2:21:6:23,27:.:a	2|1:2:0:18,2:.:ab	2/2:35:4:10,20:.:abc

From b54ae828328c7e6e31c4f133b92d986722525a02 Mon Sep 17 00:00:00 2001
From: Anders Leung <aleung@ml6062-aleung.lan>
Date: Thu, 23 Dec 2021 15:47:05 -0500
Subject: [PATCH 22/22] Change BCF2Dictionary interface to be immutable

---
 .../java/htsjdk/variant/bcf2/BCF2Codec.java   |  8 +-
 .../htsjdk/variant/bcf2/BCF2Dictionary.java   | 74 ++++++++-----------
 2 files changed, 33 insertions(+), 49 deletions(-)

diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
index 04adb8cbf1..b987ee9cdf 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java
@@ -512,13 +512,7 @@ protected final String getDictionaryString(final int offset) {
     }
 
     private BCF2Dictionary makeStringDictionary(final BCFVersion bcfVersion) {
-        final BCF2Dictionary dict = BCF2Dictionary.makeBCF2StringDictionary(header, bcfVersion);
-
-        // if we got here we never found a dictionary, or there are no elements in the dictionary
-        if (dict.isEmpty())
-            error("Dictionary header element was absent or empty");
-
-        return dict;
+        return BCF2Dictionary.makeBCF2StringDictionary(header, bcfVersion);
     }
 
     /**
diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Dictionary.java b/src/main/java/htsjdk/variant/bcf2/BCF2Dictionary.java
index db2d342449..5a1d0ffd94 100644
--- a/src/main/java/htsjdk/variant/bcf2/BCF2Dictionary.java
+++ b/src/main/java/htsjdk/variant/bcf2/BCF2Dictionary.java
@@ -6,7 +6,6 @@
 import htsjdk.variant.vcf.VCFHeaderLine;
 import htsjdk.variant.vcf.VCFSimpleHeaderLine;
 
-import java.util.AbstractMap;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
@@ -32,7 +31,7 @@
  * n-to-1 IDX-to-string mapping might result from tools that do not deduplicate IDXs, so
  * we accept them.
  */
-public abstract class BCF2Dictionary extends AbstractMap<Integer, String> {
+public abstract class BCF2Dictionary {
 
     /**
      * Create and return a BCF string dictionary
@@ -182,6 +181,23 @@ private static BCF2Dictionary makeDictionary(
      */
     public abstract String get(final int i);
 
+    /**
+     * Performs the given action for each entry in the dictionary.
+     * @param action the action to be performed
+     */
+    public abstract void forEach(final BiConsumer<? super Integer, ? super String> action);
+
+    /**
+     * @return the number of elements in the dictionary
+     */
+    public abstract int size();
+
+    /**
+     * @param i the BCF index to search for
+     * @return true if there is a string or contig mapped to the given index
+     */
+    public abstract boolean containsIndex(final int i);
+
     /**
      * BCF 2.2 dense sequence dictionary. Strings are assigned an index corresponding to its position in a 0-indexed
      * array. This dictionary is used if no IDX fields are present in the header, or they are present, but they
@@ -196,44 +212,28 @@ private BCF2DenseDictionary(final List<String> dictionary) {
             this.dictionary = dictionary;
         }
 
-        @Override
-        public Set<Entry<Integer, String>> entrySet() {
-            final Set<Entry<Integer, String>> set = new HashSet<>(dictionary.size());
-            int i = 0;
-            for (final String s : dictionary) {
-                set.add(new AbstractMap.SimpleEntry<>(i, s));
-                i++;
-            }
-            return set;
-        }
-
         @Override
         public String get(final int i) {
             return i < 0 || i >= dictionary.size() ? null : dictionary.get(i);
         }
 
         @Override
-        public String get(final Object key) {
-            return dictionary.get((Integer) key);
+        public void forEach(final BiConsumer<? super Integer, ? super String> action) {
+            int i = 0;
+            for (final String s : dictionary) {
+                action.accept(i, s);
+                i++;
+            }
         }
 
         @Override
         public int size() {
-            return dictionary.size();
+            return this.dictionary.size();
         }
 
         @Override
-        public boolean isEmpty() {
-            return dictionary.isEmpty();
-        }
-
-        @Override
-        public void forEach(final BiConsumer<? super Integer, ? super String> action) {
-            int i = 0;
-            for (final String s : dictionary) {
-                action.accept(i, s);
-                i++;
-            }
+        public boolean containsIndex(final int i) {
+            return i < this.dictionary.size();
         }
     }
 
@@ -251,34 +251,24 @@ private BCF2SparseDictionary(final Map<Integer, String> dictionary) {
             this.dictionary = dictionary;
         }
 
-        @Override
-        public Set<Entry<Integer, String>> entrySet() {
-            return dictionary.entrySet();
-        }
-
         @Override
         public String get(final int i) {
             return dictionary.get(i);
         }
 
         @Override
-        public String get(final Object key) {
-            return dictionary.get(key);
+        public void forEach(final BiConsumer<? super Integer, ? super String> action) {
+            this.dictionary.forEach(action);
         }
 
         @Override
         public int size() {
-            return dictionary.size();
+            return this.dictionary.size();
         }
 
         @Override
-        public boolean isEmpty() {
-            return dictionary.isEmpty();
-        }
-
-        @Override
-        public void forEach(final BiConsumer<? super Integer, ? super String> action) {
-            dictionary.forEach(action);
+        public boolean containsIndex(final int i) {
+            return this.dictionary.containsKey(i);
         }
     }
 }