diff --git a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_1/CRAMEncoderV3_1.java b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_1/CRAMEncoderV3_1.java
index fdcc53a8cc..4ae82e91e1 100644
--- a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_1/CRAMEncoderV3_1.java
+++ b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_1/CRAMEncoderV3_1.java
@@ -22,7 +22,6 @@ public class CRAMEncoderV3_1 extends CRAMEncoder {
*/
public CRAMEncoderV3_1(final Bundle outputBundle, final ReadsEncoderOptions readsEncoderOptions) {
super(outputBundle, readsEncoderOptions);
- throw new CRAMException("CRAM v3.1 encoding is not yet supported");
}
@Override
diff --git a/src/main/java/htsjdk/beta/plugin/registry/ReadsResolver.java b/src/main/java/htsjdk/beta/plugin/registry/ReadsResolver.java
index 4e67a4dffd..b89ffc4a10 100644
--- a/src/main/java/htsjdk/beta/plugin/registry/ReadsResolver.java
+++ b/src/main/java/htsjdk/beta/plugin/registry/ReadsResolver.java
@@ -1,6 +1,5 @@
package htsjdk.beta.plugin.registry;
-import htsjdk.beta.codecs.reads.cram.cramV3_1.CRAMCodecV3_1;
import htsjdk.beta.exception.HtsjdkException;
import htsjdk.beta.exception.HtsjdkPluginException;
import htsjdk.beta.plugin.HtsVersion;
@@ -12,13 +11,9 @@
import htsjdk.beta.plugin.reads.ReadsDecoderOptions;
import htsjdk.beta.plugin.reads.ReadsEncoder;
import htsjdk.beta.plugin.reads.ReadsEncoderOptions;
-import htsjdk.beta.plugin.reads.ReadsFormats;
import htsjdk.io.IOPath;
import htsjdk.utils.ValidationUtils;
-import java.util.List;
-import java.util.stream.Collectors;
-
/**
* Class with methods for resolving inputs and outputs to reads encoders and decoders.
*
@@ -209,30 +204,4 @@ public ReadsEncoder getReadsEncoder(
.getEncoder(outputBundle, readsEncoderOptions);
}
- /**
- * Temporarily override to remove the CRAM 3.1 codec from the list of candidate codecs when the request is for
- * the newest version, since it has no write implementation yet.
- */
- @Override
- protected List filterByVersion(final List candidateCodecs, final HtsVersion htsVersion) {
- final List preFilteredCodecs;
- if (htsVersion.equals(HtsVersion.NEWEST_VERSION)) {
- // if the request is for the newest version, then pre-filter out the CRAM 3.1 codec since it has no
- // write implementation yet, and then delegate to the superclass to let it find the newest version among
- // the remaining codecs
- preFilteredCodecs = candidateCodecs.stream().filter(
- c -> !(c.getFileFormat().equals(ReadsFormats.CRAM)
- && c.getVersion().equals(CRAMCodecV3_1.VERSION_3_1)))
- .collect(Collectors.toList());
- final HtsVersion newestVersion = preFilteredCodecs.stream()
- .map(c -> c.getVersion())
- .reduce(candidateCodecs.get(0).getVersion(),
- (HtsVersion a, HtsVersion b) -> a.compareTo(b) > 0 ? a : b);
- return candidateCodecs.stream().filter(
- c -> c.getVersion().equals(newestVersion)).collect(Collectors.toList());
- } else {
- preFilteredCodecs = candidateCodecs;
- }
- return super.filterByVersion(preFilteredCodecs, htsVersion);
- }
}
diff --git a/src/main/java/htsjdk/samtools/cram/common/CramVersions.java b/src/main/java/htsjdk/samtools/cram/common/CramVersions.java
index 852a135563..be0c86f909 100644
--- a/src/main/java/htsjdk/samtools/cram/common/CramVersions.java
+++ b/src/main/java/htsjdk/samtools/cram/common/CramVersions.java
@@ -17,7 +17,7 @@ public final class CramVersions {
/**
* The default CRAM version when creating a new CRAM output file or stream.
*/
- public static final CRAMVersion DEFAULT_CRAM_VERSION = CRAM_v3;
+ public static final CRAMVersion DEFAULT_CRAM_VERSION = CRAM_v3_1;
/**
* Return true if {@code candidateVersion} is a supported CRAM version.
diff --git a/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java b/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java
index c8a5bf04d9..e96dfdc08f 100644
--- a/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java
+++ b/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java
@@ -156,7 +156,7 @@ public static ByteBuffer allocateOutputBuffer(final int inSize) {
final int compressedSize = (int) (inSize + 257 * 257 * 3 + 9);
final ByteBuffer outputBuffer = allocateByteBuffer(compressedSize);
if (outputBuffer.remaining() < compressedSize) {
- throw new CRAMException("Failed to allocate sufficient buffer size for RANS coder.");
+ throw new CRAMException("Failed to allocate sufficient buffer size for CRAM codec.");
}
return outputBuffer;
}
diff --git a/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationEncode.java b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationEncode.java
index d17220d1fc..3322ae6407 100644
--- a/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationEncode.java
+++ b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationEncode.java
@@ -342,15 +342,20 @@ private static ByteBuffer tryCompress(final ByteBuffer nameTokenStream, final bo
ByteBuffer compressedByteBuffer = null;
if (useArith == true) { // use the range encoder
+ // this code path is never executed by the default write profile, since we don't turn on the
+ // range coder, but it is used by the test suite
final int[] rangeEncoderFlagsSets = {
- 0,
- RangeParams.ORDER_FLAG_MASK,
+ // based on a few observations (using ransNx16, not range), RLE, PACK, and ORDER/PACK seem to
+ // yield the best results; this to be validated for the range encoder but for now use the same
+ // flags as for ransNx16
+ 0, // no flags, just use arith
RangeParams.RLE_FLAG_MASK, //64
- RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, //65
RangeParams.PACK_FLAG_MASK, //128,
RangeParams.PACK_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, //129
+ //RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, //65
+ //RangeParams.ORDER_FLAG_MASK,
// we don't include stripe here since it's not implemented for write
- RangeParams.PACK_FLAG_MASK | RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK // 193+8
+ //RangeParams.PACK_FLAG_MASK | RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK // 193+8
};
for (int rangeEncoderFlagSet : rangeEncoderFlagsSets) {
if ((rangeEncoderFlagSet & RangeParams.ORDER_FLAG_MASK) != 0 && nameTokenStream.remaining() < 100) {
@@ -368,16 +373,24 @@ private static ByteBuffer tryCompress(final ByteBuffer nameTokenStream, final bo
compressedByteBuffer = tmpByteBuffer;
}
}
+ if (bestCompressedLength > nameTokenStream.limit()) {
+ // compression doesn't buy us anything; just use CAT
+ final RangeEncode rangeEncode = new RangeEncode();
+ nameTokenStream.rewind();
+ compressedByteBuffer = rangeEncode.compress(nameTokenStream, new RangeParams(RANSNx16Params.CAT_FLAG_MASK));
+ }
} else {
final int[] ransNx16FlagsSets = {
- 0,
- RANSNx16Params.ORDER_FLAG_MASK,
+ 0, // no flags, just use RANSNx16
RANSNx16Params.RLE_FLAG_MASK, //64
- RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, //65
RANSNx16Params.PACK_FLAG_MASK, //128,
RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, //129
+ // based on a few observations using ransNx16, RLE, PACK, and ORDER/PACK seem to yield the
+ // best results; this needs more validation but for now don't try the remaining combinations
+ //RANSNx16Params.ORDER_FLAG_MASK,
+ //RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, //65
// we don't include stripe here since it's not implemented for write
- RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK // 193+8
+ //RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK // 193+8
};
for (int ransNx16FlagSet : ransNx16FlagsSets) {
if ((ransNx16FlagSet & RANSNx16Params.ORDER_FLAG_MASK) != 0 && nameTokenStream.remaining() < 100) {
@@ -395,6 +408,12 @@ private static ByteBuffer tryCompress(final ByteBuffer nameTokenStream, final bo
compressedByteBuffer = tmpByteBuffer;
}
}
+ if (bestCompressedLength > nameTokenStream.limit()) {
+ // compression doesn't buy us anything; just use CAT
+ final RANSNx16Encode ransEncode = new RANSNx16Encode();
+ nameTokenStream.rewind();
+ compressedByteBuffer = ransEncode.compress(nameTokenStream, new RANSNx16Params(RANSNx16Params.CAT_FLAG_MASK));
+ }
}
return compressedByteBuffer;
}
diff --git a/src/main/java/htsjdk/samtools/cram/structure/CompressionHeaderEncodingMap.java b/src/main/java/htsjdk/samtools/cram/structure/CompressionHeaderEncodingMap.java
index 4394b49eab..a7be0d938f 100644
--- a/src/main/java/htsjdk/samtools/cram/structure/CompressionHeaderEncodingMap.java
+++ b/src/main/java/htsjdk/samtools/cram/structure/CompressionHeaderEncodingMap.java
@@ -26,7 +26,8 @@
import htsjdk.samtools.cram.CRAMException;
import htsjdk.samtools.cram.compression.ExternalCompressor;
-import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params;
+import htsjdk.samtools.cram.compression.nametokenisation.NameTokenisationDecode;
+import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Params;
import htsjdk.samtools.cram.encoding.CRAMEncoding;
import htsjdk.samtools.cram.encoding.external.ByteArrayStopEncoding;
import htsjdk.samtools.cram.encoding.external.ExternalByteEncoding;
@@ -140,7 +141,7 @@ public CompressionHeaderEncodingMap(final CRAMEncodingStrategy encodingStrategy)
putExternalRansOrderOneEncoding(DataSeries.RG_ReadGroup);
putExternalRansOrderZeroEncoding(DataSeries.RI_RefId);
putExternalRansOrderOneEncoding(DataSeries.RL_ReadLength);
- putExternalByteArrayStopTabGzipEncoding(encodingStrategy, DataSeries.RN_ReadName);
+ putByteArrayStopNameTokEncoding(encodingStrategy, DataSeries.RN_ReadName);
putExternalGzipEncoding(encodingStrategy, DataSeries.RS_RefSkip);
putExternalByteArrayStopTabGzipEncoding(encodingStrategy, DataSeries.SC_SoftClip);
// the TC data series is obsolete
@@ -287,13 +288,13 @@ public ExternalCompressor getBestExternalCompressor(final byte[] data, final CRA
final int gzipLen = gzip.compress(data, null).length;
final ExternalCompressor rans0 = compressorCache.getCompressorForMethod(
- BlockCompressionMethod.RANS,
- RANS4x8Params.ORDER.ZERO.ordinal());
+ BlockCompressionMethod.RANSNx16,
+ RANSNx16Params.ORDER.ZERO.ordinal());
final int rans0Len = rans0.compress(data,null).length;
final ExternalCompressor rans1 = compressorCache.getCompressorForMethod(
- BlockCompressionMethod.RANS,
- RANS4x8Params.ORDER.ONE.ordinal());
+ BlockCompressionMethod.RANSNx16,
+ RANSNx16Params.ORDER.ONE.ordinal());
final int rans1Len = rans1.compress(data, null).length;
// find the best of general purpose codecs:
@@ -386,6 +387,15 @@ private void putExternalByteArrayStopTabGzipEncoding(final CRAMEncodingStrategy
compressorCache.getCompressorForMethod(BlockCompressionMethod.GZIP, encodingStrategy.getGZIPCompressionLevel()));
}
+ private void putByteArrayStopNameTokEncoding(final CRAMEncodingStrategy encodingStrategy, final DataSeries dataSeries) {
+ // ByteArrayStopEncoding is paired with name tokenisation since using it with the
+ // NameTokenisationDecode.NAME_SEPARATOR conveniently writes the read name data in the NAME_SEPARATOR
+ // delimited/terminated format that is expected by the downstream tokenisation compressor code
+ putExternalEncoding(dataSeries,
+ new ByteArrayStopEncoding(NameTokenisationDecode.NAME_SEPARATOR, dataSeries.getExternalBlockContentId()).toEncodingDescriptor(),
+ compressorCache.getCompressorForMethod(BlockCompressionMethod.NAME_TOKENISER, 0));
+ }
+
// add an external encoding appropriate for the dataSeries value type, with a GZIP compressor
private void putExternalGzipEncoding(final CRAMEncodingStrategy encodingStrategy, final DataSeries dataSeries) {
putExternalEncoding(
@@ -397,14 +407,14 @@ private void putExternalGzipEncoding(final CRAMEncodingStrategy encodingStrategy
private void putExternalRansOrderOneEncoding(final DataSeries dataSeries) {
putExternalEncoding(
dataSeries,
- compressorCache.getCompressorForMethod(BlockCompressionMethod.RANS, RANS4x8Params.ORDER.ONE.ordinal()));
+ compressorCache.getCompressorForMethod(BlockCompressionMethod.RANSNx16, RANSNx16Params.ORDER.ONE.ordinal()));
}
// add an external encoding appropriate for the dataSeries value type, with a RANS order 0 compressor
private void putExternalRansOrderZeroEncoding(final DataSeries dataSeries) {
putExternalEncoding(
dataSeries,
- compressorCache.getCompressorForMethod(BlockCompressionMethod.RANS, RANS4x8Params.ORDER.ZERO.ordinal()));
+ compressorCache.getCompressorForMethod(BlockCompressionMethod.RANSNx16, RANSNx16Params.ORDER.ZERO.ordinal()));
}
@Override
diff --git a/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java b/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java
index a7e28511d8..a0087452c9 100644
--- a/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java
+++ b/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java
@@ -79,8 +79,8 @@ public ExternalCompressor getCompressorForMethod(
return getCachedCompressorForMethod(compressionMethod, compressorSpecificArg);
case RANS: {
- // for efficiency, we want to share the same underlying RANS object with both order-0 and
- // order-1 ExternalCompressors
+ // in previous implementations, we would cache separate order-0 and order-1 compressors for performance
+ // reasons; we no longer NEED to do so but retain this structure for now
final int ransArg = compressorSpecificArg == ExternalCompressor.NO_COMPRESSION_ARG ?
RANS4x8Params.ORDER.ZERO.ordinal() :
compressorSpecificArg;
@@ -103,8 +103,8 @@ public ExternalCompressor getCompressorForMethod(
}
case RANSNx16: {
- // for efficiency, we want to share the same underlying RANSNx16 object with both order-0 and
- // order-1 ExternalCompressors
+ // in previous implementations, we would cache separate order-0 and order-1 compressors for performance
+ // reasons; we no longer NEED to do so but retain this structure for now
final int ransArg = compressorSpecificArg == ExternalCompressor.NO_COMPRESSION_ARG ?
RANSNx16Params.ORDER.ZERO.ordinal() :
compressorSpecificArg;
diff --git a/src/test/java/htsjdk/beta/codecs/reads/cram/HtsCRAMCodec31Test.java b/src/test/java/htsjdk/beta/codecs/reads/cram/HtsCRAMCodec31Test.java
index a5ddeeffd9..e1ff709b5c 100644
--- a/src/test/java/htsjdk/beta/codecs/reads/cram/HtsCRAMCodec31Test.java
+++ b/src/test/java/htsjdk/beta/codecs/reads/cram/HtsCRAMCodec31Test.java
@@ -2,18 +2,28 @@
import htsjdk.HtsjdkTest;
import htsjdk.beta.codecs.reads.cram.cramV3_1.CRAMCodecV3_1;
+import htsjdk.beta.plugin.IOUtils;
import htsjdk.beta.plugin.reads.ReadsDecoderOptions;
+import htsjdk.beta.plugin.reads.ReadsEncoderOptions;
import htsjdk.beta.plugin.reads.ReadsFormats;
import htsjdk.beta.plugin.registry.HtsDefaultRegistry;
import htsjdk.io.HtsPath;
import htsjdk.io.IOPath;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.cram.CRAM31Tests;
+import htsjdk.samtools.cram.common.CramVersions;
import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.samtools.util.FileExtensions;
import htsjdk.utils.SamtoolsTestUtils;
import org.testng.Assert;
import org.testng.annotations.Test;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
public class HtsCRAMCodec31Test extends HtsjdkTest {
final IOPath TEST_DIR = new HtsPath("src/test/resources/htsjdk/samtools/");
@@ -49,4 +59,59 @@ public void testCRAMDecoder() {
}
}
}
+
+ @Test
+ public void testRoundTripCRAM31() throws IOException {
+ final IOPath sourceCRAMPath = new HtsPath(TEST_DIR + "cram/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.v3.0.samtools.cram");
+ final IOPath referencePath = new HtsPath(TEST_DIR + "reference/human_g1k_v37.20.21.fasta.gz");
+ final IOPath tempCRAM31Path = IOUtils.createTempPath("htsCRAMCodecTemporary", FileExtensions.CRAM);
+
+ final ReadsDecoderOptions readsDecoderOptions =
+ new ReadsDecoderOptions().setCRAMDecoderOptions(
+ new CRAMDecoderOptions().setReferencePath(referencePath));
+ final ReadsEncoderOptions readsEncoderOptions =
+ new ReadsEncoderOptions()
+ .setPreSorted(true)
+ .setCRAMEncoderOptions(new CRAMEncoderOptions().setReferencePath(referencePath));
+
+ try (final CRAMDecoder cramDecoder = (CRAMDecoder)
+ HtsDefaultRegistry.getReadsResolver().getReadsDecoder(sourceCRAMPath, readsDecoderOptions);
+ final CRAMEncoder cram31Encoder = (CRAMEncoder)
+ HtsDefaultRegistry.getReadsResolver().getReadsEncoder(tempCRAM31Path, readsEncoderOptions)) {
+
+ Assert.assertNotNull(cramDecoder);
+ Assert.assertEquals(cramDecoder.getFileFormat(), ReadsFormats.CRAM);
+ Assert.assertTrue(cramDecoder.getDisplayName().contains(sourceCRAMPath.toString()));
+
+ Assert.assertNotNull(cram31Encoder);
+ Assert.assertEquals(cram31Encoder.getFileFormat(), ReadsFormats.CRAM);
+ Assert.assertTrue(cram31Encoder.getDisplayName().contains(tempCRAM31Path.toString()));
+
+ final SAMFileHeader samFileHeader = cramDecoder.getHeader();
+ cram31Encoder.setHeader(samFileHeader);
+ for (final SAMRecord samRec : cramDecoder) {
+ cram31Encoder.write(samRec);
+ }
+ }
+
+ // make sure we got a CRAM 3.1 file
+ Assert.assertEquals(CRAM31Tests.getCRAMVersion(tempCRAM31Path), CramVersions.CRAM_v3_1);
+
+ final List recs30 = new ArrayList<>();
+ final List recs31 = new ArrayList<>();
+
+ try (final CRAMDecoder cram30Decoder = (CRAMDecoder)
+ HtsDefaultRegistry.getReadsResolver().getReadsDecoder(sourceCRAMPath, readsDecoderOptions);
+ final CRAMDecoder cram31Decoder = (CRAMDecoder)
+ HtsDefaultRegistry.getReadsResolver().getReadsDecoder(tempCRAM31Path, readsDecoderOptions)) {
+ final Iterator it31 = cram31Decoder.iterator();
+ for (final SAMRecord sam30Rec : cram30Decoder) {
+ final SAMRecord sam31Rec = it31.next();
+ recs30.add(sam30Rec);
+ recs31.add(sam31Rec);
+ Assert.assertEquals(sam30Rec, sam31Rec);
+ }
+ }
+ }
+
}
diff --git a/src/test/java/htsjdk/samtools/cram/CRAM31Tests.java b/src/test/java/htsjdk/samtools/cram/CRAM31Tests.java
index b193e41a4b..5275bb758c 100644
--- a/src/test/java/htsjdk/samtools/cram/CRAM31Tests.java
+++ b/src/test/java/htsjdk/samtools/cram/CRAM31Tests.java
@@ -24,7 +24,7 @@
public class CRAM31Tests extends HtsjdkTest {
private static final String TEST_DATA_DIR = "src/test/resources/htsjdk/samtools/cram/";
- // use samtools to convert to CRAM3.1 using samtools profiles fast, normanl, small, and archive, then
+ // use samtools to convert to CRAM3.1 using samtools profiles fast, normal, small, and archive, then
// read with htsjdk and compare the results with the original
@DataProvider(name="cram31FidelityTests")
@@ -98,24 +98,42 @@ public Object[][] cram31FidelityTests() {
};
}
+ //because conversions are expensive, this tests both htsjdk cram 31.1 reading and writing together
@Test(dataProvider = "cram31FidelityTests")
- public void testCRAM31RoundTrip(
+ public void testCRAM31SamtoolsFidelity(
final IOPath testInput,
final IOPath testReference,
- final String samtoolsCommandLineArgs) {
+ final String samtoolsCommandLineArgs) throws IOException {
// for testing CRAM 3.1, we use CRAM 3.0 as our ground-truth for comparison, so check to make
// sure that our input test file is CRAM 3.0 and not 3.1
Assert.assertEquals(getCRAMVersion(testInput), CramVersions.CRAM_v3);
// use samtools to convert the input to CRAM 3.1, then compare the result of that with the original
- final IOPath cram31Path = IOUtils.createTempPath("cram31Test", "cram");
+ // test htsjdk cram 3.1 reading by using samtools to convert the input to CRAM 3.1, and then consuming it
+ // with htsjdk and comparing the results to the original
+ final IOPath cramSamtools31Path = IOUtils.createTempPath("cram31SamtoolsWriteTest", ".cram");
SamtoolsTestUtils.convertToCRAM(
testInput,
- cram31Path,
+ cramSamtools31Path,
testReference,
samtoolsCommandLineArgs);
- doCRAMCompare(testInput.toPath(), cram31Path.toPath(), testReference.toPath());
+ Assert.assertEquals(getCRAMVersion(cramSamtools31Path), CramVersions.CRAM_v3_1);
+
+ // compare the original CRAM 3.0 test input with the samtools-generated 3.1 output
+ doCRAMCompare(testInput.toPath(), cramSamtools31Path.toPath(), testReference.toPath());
+
+ // now test htsjdk CRAM 3.1 writing by using htsjdk to write CRAM 3.1 and use samtools to consume it and write
+ // it back to another CRAM (3.1), and then read that result back in and compare it to the original
+ final IOPath cramHtsjdk31Path = IOUtils.createTempPath("cram31HtsjdkWriteTest", ".cram");
+ doHTSJDKWriteCRAM(testInput, cramHtsjdk31Path, testReference);
+ Assert.assertEquals(getCRAMVersion(cramHtsjdk31Path), CramVersions.CRAM_v3_1);
+
+ // compare the original test input with the htsjdk-generated 3.1 output
+ doCRAMCompare(testInput.toPath(), cramHtsjdk31Path.toPath(), testReference.toPath());
+
+ // for completeness, compare the htsjdk-generated cram 3.1 output with the samtools-generated 3.1 output
+ doCRAMCompare(cramHtsjdk31Path.toPath(), cramSamtools31Path.toPath(), testReference.toPath());
}
public static void doCRAMCompare(
@@ -148,7 +166,21 @@ public static void doCRAMCompare(
Assert.assertEquals(diffCount, 0);
}
- private static CRAMVersion getCRAMVersion(final IOPath cramPath) {
+ public static void doHTSJDKWriteCRAM(final IOPath inputPath, final IOPath outputPath, final IOPath referencePath) throws IOException{
+ final SamReaderFactory samReaderFactory =
+ SamReaderFactory.makeDefault()
+ .referenceSequence(referencePath.toPath())
+ .validationStringency(ValidationStringency.LENIENT);
+ try (final SamReader reader = samReaderFactory.open(inputPath.toPath());
+ final SAMFileWriter writer = new SAMFileWriterFactory()
+ .makeWriter(reader.getFileHeader().clone(), true, outputPath.toPath(), referencePath.toPath())) {
+ for (final SAMRecord rec : reader) {
+ writer.addAlignment(rec);
+ }
+ }
+ }
+
+ public static CRAMVersion getCRAMVersion(final IOPath cramPath) {
try (final InputStream fis = Files.newInputStream(cramPath.toPath())) {
final CramHeader cramHeader = CramIO.readCramHeader(fis);
return cramHeader.getCRAMVersion();
diff --git a/src/test/java/htsjdk/samtools/cram/CRAMVersionTest.java b/src/test/java/htsjdk/samtools/cram/CRAMVersionTest.java
index e785ce4e6e..3c74b7cc10 100644
--- a/src/test/java/htsjdk/samtools/cram/CRAMVersionTest.java
+++ b/src/test/java/htsjdk/samtools/cram/CRAMVersionTest.java
@@ -37,11 +37,11 @@ public class CRAMVersionTest extends HtsjdkTest {
* @throws IOException
*/
@Test
- public void test_V3() throws IOException {
+ public void test_V3_1() throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ReferenceSource source = new ReferenceSource((File) null);
SAMFileHeader samFileHeader = new SAMFileHeader();
- CRAMVersion cramVersion = CramVersions.CRAM_v3;
+ CRAMVersion cramVersion = CramVersions.CRAM_v3_1;
CRAMFileWriter w = new CRAMFileWriter(baos, source, samFileHeader, null);
SAMRecord record = new SAMRecord(samFileHeader);
record.setReadName("name");
diff --git a/src/test/java/htsjdk/samtools/cram/structure/SliceBlockWriteStreamTest.java b/src/test/java/htsjdk/samtools/cram/structure/SliceBlockWriteStreamTest.java
index 2c0cd213cc..69dfd6b124 100644
--- a/src/test/java/htsjdk/samtools/cram/structure/SliceBlockWriteStreamTest.java
+++ b/src/test/java/htsjdk/samtools/cram/structure/SliceBlockWriteStreamTest.java
@@ -1,6 +1,7 @@
package htsjdk.samtools.cram.structure;
import htsjdk.HtsjdkTest;
+import htsjdk.samtools.cram.compression.nametokenisation.NameTokenisationTest;
import htsjdk.samtools.cram.io.BitOutputStream;
import org.testng.Assert;
import org.testng.annotations.Test;
@@ -32,7 +33,12 @@ public void testSliceBlocksWriteStreamsRoundTrip() throws IOException {
// and write the name of each data series to the corresponding external stream
for (final DataSeries dataSeries : DataSeries.values()) {
if (!StructureTestUtils.DATASERIES_NOT_WRITTEN_BY_HTSJDK.contains(dataSeries)) {
- final String uncompressedContent = dataSeries.getCanonicalName();
+ // artificially prepare read name data since the name tokenizer requires structured data and cannot
+ // handle the raw,unstructured data which this test uses
+ final String uncompressedContent =
+ dataSeries == DataSeries.RN_ReadName ?
+ dataSeries.getCanonicalName() + NameTokenisationTest.LOCAL_NAME_SEPARATOR_CHARSEQUENCE :
+ dataSeries.getCanonicalName();
expectedExternalContent.put(dataSeries.getExternalBlockContentId(), uncompressedContent);
sliceBlocksWriteStreams
.getExternalOutputStream(dataSeries.getExternalBlockContentId())