Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
ce7cdcf
Commit with raw GATK SequenceDictionaryUtils and SequenceDictionaryUt…
cmnbroad Nov 8, 2021
4d08d5d
VCFHeader and VCFHeaderLine refactoring to enable support for VCF4.3/…
cmnbroad Nov 8, 2021
210adb2
Eliminate redundant modeling of VCFHeaderVersion in VCFHeader.
cmnbroad Nov 15, 2021
f3b9001
Eliminate redundant modeling of file format lines in VCFMetaDataLines.
cmnbroad Nov 15, 2021
2fe930c
More code review comments.
cmnbroad Nov 15, 2021
88bdf78
Changes needed to port GATK over to htsjdk SAMSequenceDictionaryUtils.
cmnbroad Nov 23, 2021
d93546f
One more code review comment fix.
cmnbroad Nov 30, 2021
f9a0c08
Make VCFMetaDataLines public/@InternalAPI to allow consumers access f…
cmnbroad Dec 6, 2021
29c854f
Versioned header line validation framework.
cmnbroad Feb 28, 2022
5700958
Remove obsolete variable.
cmnbroad Feb 28, 2022
5e09eb3
Properly handle info fields with embedded spaces by VCF version.
cmnbroad Jun 1, 2023
860cab6
Remove unnecessary null test.
cmnbroad Jun 5, 2023
1f76c28
Properly handle info fields with embedded spaces by VCF version.
cmnbroad Jun 1, 2023
c30f4fd
Add VCF 4.3 writing
Apr 15, 2021
96b4fda
Refactor VariantContext decoding, decode VCs with different version f…
Dec 3, 2021
63aeae2
Move version upgrading from codec to VCFWriter
Dec 9, 2021
a72e8cc
Fix PrintVariantsExampleTest
Dec 10, 2021
6aa25b6
Add javadoc
Dec 15, 2021
ecd0eae
Address some review comments
Dec 23, 2021
4c9904f
Add an encoder and write support for the VCF4.3 HtsCodec.
cmnbroad Dec 14, 2021
f51153a
Remove obsolete VCFHeaderVersion method.
cmnbroad Jan 4, 2022
158829d
Unify validation failure framework.
cmnbroad Feb 25, 2022
73fc4d4
Fix spotbugs issue.
cmnbroad Mar 1, 2022
e54f282
Preserve more context for header mergers that fail due to version val…
cmnbroad Mar 1, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/main/java/htsjdk/beta/codecs/variants/vcf/VCFEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
import htsjdk.beta.io.bundle.Bundle;
import htsjdk.beta.io.bundle.BundleResource;
import htsjdk.beta.io.bundle.BundleResourceType;
import htsjdk.beta.plugin.HtsVersion;
import htsjdk.beta.plugin.variants.VariantsEncoder;
import htsjdk.beta.plugin.variants.VariantsEncoderOptions;
import htsjdk.beta.plugin.variants.VariantsFormats;
import htsjdk.io.IOPath;
import htsjdk.annotations.InternalAPI;
import htsjdk.samtools.util.Log;
import htsjdk.utils.ValidationUtils;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.Options;
Expand All @@ -28,6 +30,7 @@
*/
@InternalAPI
public abstract class VCFEncoder implements VariantsEncoder {
private final static Log LOG = Log.getInstance(VCFEncoder.class);
private final Bundle outputBundle;
private final VariantsEncoderOptions variantsEncoderOptions;
private final String displayName;
Expand Down Expand Up @@ -64,6 +67,15 @@ public VCFEncoder(final Bundle outputBundle, final VariantsEncoderOptions varian
public void setHeader(final VCFHeader vcfHeader) {
ValidationUtils.nonNull(vcfHeader, "vcfHeader");

final HtsVersion htsVersion = getVersion();
final HtsVersion headerHtsVersion = vcfHeader.getVCFHeaderVersion().toHtsVersion();
if (!(headerHtsVersion.getMajorVersion() == htsVersion.getMajorVersion()) ||
!(headerHtsVersion.getMinorVersion() == htsVersion.getMinorVersion())) {
LOG.warn(String.format("Using a version %s VCF header on a version %s encoder (this can happen when an in-place upgrade fails).",
vcfHeader.getVCFHeaderVersion(),
getVersion()));
}

vcfWriter = getVCFWriter(getOutputBundle(), getVariantsEncoderOptions());
vcfWriter.writeHeader(vcfHeader);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@ public VCFDecoder getDecoder(final Bundle inputBundle, final VariantsDecoderOpti

@Override
public VCFEncoder getEncoder(final Bundle outputBundle, final VariantsEncoderOptions encoderOptions) {
throw new HtsjdkUnsupportedOperationException("Version v4.3 VCF encoder not yet implemented");
ValidationUtils.nonNull(outputBundle, "outputBundle");
ValidationUtils.nonNull(encoderOptions, "encoderOptions");

return new VCFEncoderV4_3(outputBundle, encoderOptions);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import htsjdk.beta.plugin.HtsVersion;
import htsjdk.beta.io.bundle.Bundle;
import htsjdk.beta.plugin.variants.VariantsDecoderOptions;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderVersion;

/**
* VCF V4.3 decoder.
Expand All @@ -25,4 +27,25 @@ public HtsVersion getVersion() {
return VCFCodecV4_3.VCF_V43_VERSION;
}

@Override
public VCFHeader getHeader() {
final VCFHeader vcfHeader = super.getHeader();

// This decoder uses the multi-version legacy codec htsjdk.variant.vcf.VCFCodec as its underlying
// implementation; which in turn uses {@link VCFStandardHeaderLines#repairStandardHeaderLines(VCFHeader)
// to in-place upgrade any pre-v4.2 input to v4.2. Most of the other codecs in codecs.variants.vcf
// that wrap the same legacy codec have to tolerate the in-place upgrades and accept the "upgraded"
// inputs that do not match the nominal version supported by the codec - i.e, VCFDecoderV4_0 will
// see vcf4.2 inputs because the underlying implementation upgrades them to 4.2 in-place), but those
// upgradesare not be done by {@link VCFStandardHeaderLines#repairStandardHeaderLines(VCFHeader) for
// VCF4.3+, so this codec can include a sanity check and require that it only ever sees v4.3 inputs.
if (!vcfHeader.getVCFHeaderVersion().equals(VCFHeaderVersion.VCF4_3)) {
throw new RuntimeException(
String.format(
"The VCF %s version decoder cannot be used to write a version %s VCF header",
VCFCodecV4_3.VCF_V43_VERSION,
vcfHeader.getVCFHeaderVersion().toString()));
}
return vcfHeader;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package htsjdk.beta.codecs.variants.vcf.vcfv4_3;

import htsjdk.beta.codecs.variants.vcf.VCFEncoder;
import htsjdk.beta.io.bundle.Bundle;
import htsjdk.beta.plugin.HtsVersion;
import htsjdk.beta.plugin.variants.VariantsEncoderOptions;
import htsjdk.samtools.util.Log;
import htsjdk.utils.ValidationUtils;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderVersion;


/**
* VCF V4.3 encoder.
*/
public class VCFEncoderV4_3 extends VCFEncoder {
protected final static Log LOG = Log.getInstance(VCFEncoderV4_3.class);

/**
* Create a new VCF V4.3 encoder.
*
* @param outputBundle the output {@link Bundle} to encoder
* @param variantsEncoderOptions the {@link VariantsEncoderOptions} to use
*/
public VCFEncoderV4_3(final Bundle outputBundle, final VariantsEncoderOptions variantsEncoderOptions) {
super(outputBundle,variantsEncoderOptions);
}

@Override
public HtsVersion getVersion() {
return VCFCodecV4_3.VCF_V43_VERSION;
}

@Override
public void setHeader(final VCFHeader vcfHeader) {
ValidationUtils.nonNull(vcfHeader, "vcfHeader");

if (!vcfHeader.getVCFHeaderVersion().equals(VCFHeaderVersion.VCF4_3)) {
LOG.warn(String.format("Attempting to set a version %s VCF header on a version %s VCF encoder",
vcfHeader.getVCFHeaderVersion(),
VCFCodecV4_3.VCF_V43_VERSION));
}
super.setHeader(vcfHeader);
}

}
4 changes: 3 additions & 1 deletion src/main/java/htsjdk/beta/plugin/HtsVersion.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
* versions.
*/
public class HtsVersion implements Comparable<HtsVersion> {
/** Sentinel constant to match any version */
public static final int ANY_VERSION = -1;
/** Sentinel constant used to indicate the newest version available */
public static final HtsVersion NEWEST_VERSION = new HtsVersion(-1, -1, -1);
public static final HtsVersion NEWEST_VERSION = new HtsVersion(ANY_VERSION, ANY_VERSION, ANY_VERSION);

private static final String FORMAT_STRING = "%d.%d.%d";

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package htsjdk.beta.plugin.registry;

import htsjdk.beta.codecs.variants.vcf.vcfv4_2.VCFCodecV4_2;
import htsjdk.beta.exception.HtsjdkException;
import htsjdk.beta.exception.HtsjdkPluginException;
import htsjdk.beta.plugin.HtsVersion;
Expand Down Expand Up @@ -164,9 +163,7 @@ public VariantsEncoder getVariantsEncoder(
ValidationUtils.nonNull(outputBundle, "Output bundle");
ValidationUtils.nonNull(variantsEncoderOptions, "Encoder options");

//NOTE: we can't allow the resolver to choose the newest registered codec when writing a
// VCF, since the newest codec is v4.3, which has no encoder, so for now explicitly select v4.2
final VariantsCodec variantsCodec = resolveForEncoding(outputBundle, VCFCodecV4_2.VCF_V42_VERSION);
final VariantsCodec variantsCodec = resolveForEncoding(outputBundle);
return (VariantsEncoder) variantsCodec.getEncoder(outputBundle, variantsEncoderOptions);
}

Expand Down
18 changes: 18 additions & 0 deletions src/main/java/htsjdk/samtools/Defaults.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package htsjdk.samtools;

import htsjdk.samtools.util.Log;
import htsjdk.variant.variantcontext.writer.VCFVersionUpgradePolicy;

import java.io.File;
import java.util.Collections;
Expand Down Expand Up @@ -110,6 +111,17 @@ public class Defaults {
*/
public static final boolean DISABLE_SNAPPY_COMPRESSOR;

/**
* Strict VCF version validation. Default = true.
*/
public static final boolean STRICT_VCF_VERSION_VALIDATION;

/**
* How to treat files from VCF versions older than the current version.
* Default = {@link VCFVersionUpgradePolicy#UPGRADE_OR_FALLBACK}
*/
public static final VCFVersionUpgradePolicy VCF_VERSION_TRANSITION_POLICY;


public static final String SAMJDK_PREFIX = "samjdk.";
static {
Expand All @@ -134,6 +146,11 @@ public class Defaults {
SAM_FLAG_FIELD_FORMAT = SamFlagField.valueOf(getStringProperty("sam_flag_field_format", SamFlagField.DECIMAL.name()));
SRA_LIBRARIES_DOWNLOAD = getBooleanProperty("sra_libraries_download", false);
DISABLE_SNAPPY_COMPRESSOR = getBooleanProperty(DISABLE_SNAPPY_PROPERTY_NAME, false);
STRICT_VCF_VERSION_VALIDATION = getBooleanProperty("strict_version_validation", true);
VCF_VERSION_TRANSITION_POLICY = VCFVersionUpgradePolicy.valueOf(getStringProperty(
"vcf_version_upgrade_policy",
VCFVersionUpgradePolicy.UPGRADE_OR_FALLBACK.name()
));
}

/**
Expand All @@ -157,6 +174,7 @@ public static SortedMap<String, Object> allDefaults(){
result.put("CUSTOM_READER_FACTORY", CUSTOM_READER_FACTORY);
result.put("SAM_FLAG_FIELD_FORMAT", SAM_FLAG_FIELD_FORMAT);
result.put("DISABLE_SNAPPY_COMPRESSOR", DISABLE_SNAPPY_COMPRESSOR);
result.put("VCF_VERSION_UPGRADE_POLICY", VCF_VERSION_TRANSITION_POLICY);
return Collections.unmodifiableSortedMap(result);
}

Expand Down
15 changes: 15 additions & 0 deletions src/main/java/htsjdk/samtools/SAMSequenceDictionary.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ public SAMSequenceDictionary(final List<SAMSequenceRecord> list) {
setSequences(list);
}

//TODO: this returns sequences in the internal list order instead of
// honoring each sequence's contigIndex
/**
* Get a list of sequences for this dictionary.
* @return the list of sequences for this dictionary in internal order (the order in which the sequences
* were added to this dictionary)
*/
public List<SAMSequenceRecord> getSequences() {
return Collections.unmodifiableList(mSequences);
}
Expand All @@ -75,6 +82,14 @@ public void setSequences(final List<SAMSequenceRecord> list) {
list.forEach(this::addSequence);
}

/**
* Add a sequence to the dictionary.
* @param sequenceRecord the sequence record to add - note that this method mutates the contig
* index of the sequenceRecord to match the newly added record's relative
* order in the list
*/
//TODO: this method ignores (and actually mutates) the sequenceRecord's contig index to make it match
// the record's relative placement in the dictionary's internal list
public void addSequence(final SAMSequenceRecord sequenceRecord) {
if (mSequenceMap.containsKey(sequenceRecord.getSequenceName())) {
throw new IllegalArgumentException("Cannot add sequence that already exists in SAMSequenceDictionary: " +
Expand Down
Loading