diff --git a/src/main/java/htsjdk/samtools/util/SequenceUtil.java b/src/main/java/htsjdk/samtools/util/SequenceUtil.java index 18e524ae35..65f56eeed8 100644 --- a/src/main/java/htsjdk/samtools/util/SequenceUtil.java +++ b/src/main/java/htsjdk/samtools/util/SequenceUtil.java @@ -110,6 +110,14 @@ public class SequenceUtil { bases['.'] = A_MASK | C_MASK | G_MASK | T_MASK; }; + /** + * Different modes on how to compare bases, e.g. considering ambiguity codes or applying the SAM NM tag spec. + */ + protected enum BaseComparisonMode { + MatchExact, + MatchAmbiguity, + NMTagMode + } /** * Calculate the reverse complement of the specified sequence @@ -435,20 +443,43 @@ public static String makeSoftClipCigar(final int clipLength) { * @param refBase the reference base to match * @param negativeStrand set to true if the base to test is on the negative strand and should be reverse complemented (only applies if bisulfiteSequence is true) * @param bisulfiteSequence set to true if the base to match is a bisulfite sequence and needs to be converted - * @param matchAmbiguousRef causes the match to return true when the read base is a subset of the possible IUPAC reference bases, but not the other way around + * @param comparisonMode determines how matches should be counted, e.g. if ambiguity codes should count as matches, or if the SAM NM tag spec should be applied * @return true if the bases match, false otherwise */ private static boolean basesMatch(final byte readBase, final byte refBase, final boolean negativeStrand, - final boolean bisulfiteSequence, final boolean matchAmbiguousRef) { - if (bisulfiteSequence) { - if (matchAmbiguousRef) return bisulfiteBasesMatchWithAmbiguity(negativeStrand, readBase, refBase); - else return bisulfiteBasesEqual(negativeStrand, readBase, refBase); - } else { - if (matchAmbiguousRef) return readBaseMatchesRefBaseWithAmbiguity(readBase, refBase); - else return basesEqual(readBase, refBase); + final boolean bisulfiteSequence, final BaseComparisonMode comparisonMode) { + switch (comparisonMode) { + case MatchExact: + if (bisulfiteSequence) { + return bisulfiteBasesEqual(negativeStrand, readBase, refBase); + } else { + return basesEqual(readBase, refBase); + } + case MatchAmbiguity: + if (bisulfiteSequence) { + return bisulfiteBasesMatchWithAmbiguity(negativeStrand, readBase, refBase); + } else { + return readBaseMatchesRefBaseWithAmbiguity(readBase, refBase); + } + case NMTagMode: + // TODO Different treatment for bisulfite? + return readBaseMatchesRefBaseForNM(readBase, refBase); + default: + throw new IllegalStateException("Invalid BaseComparisonMode: Not implemented."); } } + /** + * Determine if the bases match according to the SAM NM tag spec, which defines matching ambiguity codes (such as N - N) as mismatches. + * + * @param readBase the read base to match + * @param refBase the reference base to match + * @return true if the bases match and are from [ACGTacgt], false otherwise + */ + public static boolean readBaseMatchesRefBaseForNM(final byte readBase, final byte refBase) { + return readBase == '=' || isValidBase(readBase) && isValidBase(refBase) && basesEqual(readBase, refBase); + } + /** Calculates the number of mismatches between the read and the reference sequence provided. */ public static int countMismatches(final SAMRecord read, final byte[] referenceBases) { return countMismatches(read, referenceBases, 0, false); @@ -460,7 +491,7 @@ public static int countMismatches(final SAMRecord read, final byte[] referenceBa } /** - * Calculates the number of mismatches between the read and the reference sequence provided. + * Calculates the number of mismatches between the read and the reference sequence provided. Bases have to match exactly and no ambiguity codes will be considered. * * @param referenceBases Array of ASCII bytes that covers at least the the portion of the reference sequence * to which read is aligned from getReferenceStart to getReferenceEnd. @@ -471,11 +502,11 @@ public static int countMismatches(final SAMRecord read, final byte[] referenceBa * as mismatches. */ public static int countMismatches(final SAMRecord read, final byte[] referenceBases, final int referenceOffset, final boolean bisulfiteSequence) { - return countMismatches(read, referenceBases, referenceOffset, bisulfiteSequence, false); + return countMismatches(read, referenceBases, referenceOffset, bisulfiteSequence, BaseComparisonMode.MatchExact); } public static int countMismatches(final SAMRecord read, final byte[] referenceBases, final int referenceOffset, - final boolean bisulfiteSequence, final boolean matchAmbiguousRef) { + final boolean bisulfiteSequence, final BaseComparisonMode baseComparisonMode) { try { int mismatches = 0; @@ -488,7 +519,7 @@ public static int countMismatches(final SAMRecord read, final byte[] referenceBa for (int i = 0; i < length; ++i) { if (!basesMatch(readBases[readBlockStart + i], referenceBases[referenceBlockStart + i], - read.getReadNegativeStrandFlag(), bisulfiteSequence, matchAmbiguousRef)) { + read.getReadNegativeStrandFlag(), bisulfiteSequence, baseComparisonMode)) { ++mismatches; } } @@ -640,7 +671,7 @@ public static int calculateSamNmTag(final SAMRecord read, final byte[] reference */ public static int calculateSamNmTag(final SAMRecord read, final byte[] referenceBases, final int referenceOffset, final boolean bisulfiteSequence) { - int samNm = countMismatches(read, referenceBases, referenceOffset, bisulfiteSequence, false); + int samNm = countMismatches(read, referenceBases, referenceOffset, bisulfiteSequence, BaseComparisonMode.NMTagMode); for (final CigarElement el : read.getCigar().getCigarElements()) { if (el.getOperator() == CigarOperator.INSERTION || el.getOperator() == CigarOperator.DELETION) { samNm += el.getLength(); @@ -971,7 +1002,6 @@ public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref final byte[] seq = record.getReadBases(); final int alignmentStart = record.getAlignmentStart() - 1; int cigarIndex, blockRefPos, blockReadStart, matchCount = 0; - int nmCount = 0; final StringBuilder mdString = new StringBuilder(); final int nElements = cigarElements.size(); @@ -997,7 +1027,6 @@ public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref mdString.append(matchCount); mdString.appendCodePoint(refBase); matchCount = 0; - ++nmCount; } } if (inBlockOffset < blockLength) break; @@ -1013,11 +1042,9 @@ public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref matchCount = 0; if (inBlockOffset < blockLength) break; blockRefPos += blockLength; - nmCount += blockLength; } else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) { blockReadStart += blockLength; - if (op == CigarOperator.INSERTION) nmCount += blockLength; } else if (op == CigarOperator.SKIPPED_REGION) { blockRefPos += blockLength; } @@ -1025,7 +1052,7 @@ public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref mdString.append(matchCount); if (calcMD) record.setAttribute(SAMTag.MD, mdString.toString()); - if (calcNM) record.setAttribute(SAMTag.NM, nmCount); + if (calcNM) record.setAttribute(SAMTag.NM, calculateSamNmTag(record, ref)); } public static byte upperCase(final byte base) { diff --git a/src/test/java/htsjdk/samtools/util/SequenceUtilTest.java b/src/test/java/htsjdk/samtools/util/SequenceUtilTest.java index 9db60bab75..893eb92357 100644 --- a/src/test/java/htsjdk/samtools/util/SequenceUtilTest.java +++ b/src/test/java/htsjdk/samtools/util/SequenceUtilTest.java @@ -138,7 +138,7 @@ public Object[][] testMakeReferenceFromAlignmentDataProvider() { @Test(dataProvider = "mismatchCountsDataProvider") public void testCountMismatches(final String readString, final String cigar, final String reference, - final int expectedMismatchesExact, final int expectedMismatchesAmbiguous) { + final int expectedMismatchesExact, final int expectedMismatchesAmbiguous, final int expectedMismatchesNMTagMode) { final SAMRecord rec = new SAMRecord(null); rec.setReadName("test"); rec.setReadString(readString); @@ -151,33 +151,37 @@ public void testCountMismatches(final String readString, final String cigar, fin final byte[] refBases = StringUtil.stringToBytes(reference); - final int nExact = SequenceUtil.countMismatches(rec, refBases, -1, false, false); + final int nExact = SequenceUtil.countMismatches(rec, refBases, -1, false, SequenceUtil.BaseComparisonMode.MatchExact); Assert.assertEquals(nExact, expectedMismatchesExact); final int sumMismatchesQualityExact = SequenceUtil.sumQualitiesOfMismatches(rec, refBases, -1, false); Assert.assertEquals(sumMismatchesQualityExact, expectedMismatchesExact * 33); - final int nAmbiguous = SequenceUtil.countMismatches(rec, refBases, -1, false, true); + final int nAmbiguous = SequenceUtil.countMismatches(rec, refBases, -1, false, SequenceUtil.BaseComparisonMode.MatchAmbiguity); Assert.assertEquals(nAmbiguous, expectedMismatchesAmbiguous); + + final int nNMTagMode = SequenceUtil.countMismatches(rec, refBases, -1, false, SequenceUtil.BaseComparisonMode.NMTagMode); + Assert.assertEquals(nNMTagMode, expectedMismatchesNMTagMode); } @DataProvider(name = "mismatchCountsDataProvider") public Object[][] testMakeMismatchCountsDataProvider() { // note: R=A|G return new Object[][]{ - {"A", "1M", "A", 0, 0}, - {"A", "1M", "R", 1, 0}, - {"G", "1M", "R", 1, 0}, - {"C", "1M", "R", 1, 1}, - {"T", "1M", "R", 1, 1}, - {"N", "1M", "R", 1, 1}, - {"R", "1M", "A", 1, 1}, - {"R", "1M", "C", 1, 1}, - {"R", "1M", "G", 1, 1}, - {"R", "1M", "T", 1, 1}, - {"R", "1M", "N", 1, 0}, - {"R", "1M", "R", 0, 0}, - {"N", "1M", "N", 0, 0} + {"A", "1M", "A", 0, 0, 0}, + {"A", "1M", "R", 1, 0, 1}, + {"G", "1M", "R", 1, 0, 1}, + {"C", "1M", "R", 1, 1, 1}, + {"T", "1M", "R", 1, 1, 1}, + {"N", "1M", "R", 1, 1, 1}, + {"R", "1M", "A", 1, 1, 1}, + {"R", "1M", "C", 1, 1, 1}, + {"R", "1M", "G", 1, 1, 1}, + {"R", "1M", "T", 1, 1, 1}, + {"R", "1M", "N", 1, 0, 1}, + {"R", "1M", "R", 0, 0, 1}, + {"N", "1M", "N", 0, 0, 1}, + {"=", "1M", "A", 1, 0, 0} }; } @@ -224,7 +228,7 @@ public void testMismatchBisulfiteCounts(final String readString, final String ci final byte[] refBases = StringUtil.stringToBytes(reference); - final int nExact = SequenceUtil.countMismatches(rec, refBases, -1, true, false); + final int nExact = SequenceUtil.countMismatches(rec, refBases, -1, true, SequenceUtil.BaseComparisonMode.MatchExact); Assert.assertEquals(nExact, expectedMismatches); final int sumMismatchesQualityExact = SequenceUtil.sumQualitiesOfMismatches(rec, refBases, -1, true); @@ -296,7 +300,8 @@ public void testBisulfiteConversion(final char readBase, final char refBase, fin public void testBasesEqual(final char base1, final char base2, final boolean expectedB1EqualsB2, final boolean expectedB1ReadMatchesB2Ref, - final boolean expectedB2ReadMatchesB1Ref) { + final boolean expectedB2ReadMatchesB1Ref, + final boolean expectedB1MatchesB2InNMMode) { final char[] base1UcLc = new char[]{toUpperCase(base1), toLowerCase(base1)}; final char[] base2UcLc = new char[]{toUpperCase(base2), toLowerCase(base2)}; @@ -314,6 +319,12 @@ public void testBasesEqual(final char base1, final char base2, Assert.assertEquals(b1ReadMatchesB2Ref, expectedB1ReadMatchesB2Ref, "readBaseMatchesRefBaseWithAmbiguity test failed for '" + theBase1 + "' vs. '" + theBase2 + "'"); final boolean b2ReadMatchesB1Ref = SequenceUtil.readBaseMatchesRefBaseWithAmbiguity((byte) theBase2, (byte) theBase1); Assert.assertEquals(b2ReadMatchesB1Ref, expectedB2ReadMatchesB1Ref, "readBaseMatchesRefBaseWithAmbiguity test failed for '" + theBase1 + "' vs. '" + theBase2 + "'"); + + // for NM mode, the order should not matter + final boolean b1MatchesB2InNMMode = SequenceUtil.readBaseMatchesRefBaseForNM((byte) theBase1, (byte) theBase2); + Assert.assertEquals(b1MatchesB2InNMMode, expectedB1MatchesB2InNMMode, "readBaseMatchesRefBaseForNM test failed for '" + theBase1 + "' vs. '" + theBase2 + "'"); + final boolean b2MatchesB1InNMMode = SequenceUtil.readBaseMatchesRefBaseForNM((byte) theBase2, (byte) theBase1); + Assert.assertEquals(b2MatchesB1InNMMode, expectedB1MatchesB2InNMMode, "readBaseMatchesRefBaseForNM test failed for '" + theBase1 + "' vs. '" + theBase2 + "'"); } } } @@ -335,126 +346,126 @@ public void testBasesEqual(final char base1, final char base2, @DataProvider(name = "basesEqualDataProvider") public Object[][] testBasesEqualDataProvider() { return new Object[][]{ - {'A', 'A', true, true, true}, - {'A', 'C', false, false, false}, - {'A', 'G', false, false, false}, - {'A', 'T', false, false, false}, - {'A', 'M', false, true, false}, - {'A', 'R', false, true, false}, - {'A', 'W', false, true, false}, - {'A', 'S', false, false, false}, - {'A', 'Y', false, false, false}, - {'A', 'K', false, false, false}, - {'A', 'V', false, true, false}, - {'A', 'H', false, true, false}, - {'A', 'D', false, true, false}, - {'A', 'B', false, false, false}, - {'A', 'N', false, true, false}, - {'C', 'C', true, true, true}, - {'C', 'G', false, false, false}, - {'C', 'T', false, false, false}, - {'C', 'M', false, true, false}, - {'C', 'R', false, false, false}, - {'C', 'W', false, false, false}, - {'C', 'S', false, true, false}, - {'C', 'Y', false, true, false}, - {'C', 'K', false, false, false}, - {'C', 'V', false, true, false}, - {'C', 'H', false, true, false}, - {'C', 'D', false, false, false}, - {'C', 'N', false, true, false}, - {'G', 'G', true, true, true}, - {'G', 'T', false, false, false}, - {'G', 'M', false, false, false}, - {'G', 'R', false, true, false}, - {'G', 'W', false, false, false}, - {'G', 'S', false, true, false}, - {'G', 'Y', false, false, false}, - {'G', 'K', false, true, false}, - {'G', 'V', false, true, false}, - {'G', 'H', false, false, false}, - {'G', 'N', false, true, false}, - {'T', 'T', true, true, true}, - {'T', 'W', false, true, false}, - {'T', 'Y', false, true, false}, - {'T', 'V', false, false, false}, - {'M', 'T', false, false, false}, - {'M', 'M', true, true, true}, - {'M', 'R', false, false, false}, - {'M', 'W', false, false, false}, - {'M', 'S', false, false, false}, - {'M', 'Y', false, false, false}, - {'M', 'V', false, true, false}, - {'M', 'N', false, true, false}, - {'R', 'T', false, false, false}, - {'R', 'R', true, true, true}, - {'R', 'W', false, false, false}, - {'R', 'S', false, false, false}, - {'R', 'Y', false, false, false}, - {'R', 'V', false, true, false}, - {'W', 'W', true, true, true}, - {'W', 'Y', false, false, false}, - {'S', 'T', false, false, false}, - {'S', 'W', false, false, false}, - {'S', 'S', true, true, true}, - {'S', 'Y', false, false, false}, - {'S', 'V', false, true, false}, - {'Y', 'Y', true, true, true}, - {'K', 'T', false, false, true}, - {'K', 'M', false, false, false}, - {'K', 'R', false, false, false}, - {'K', 'W', false, false, false}, - {'K', 'S', false, false, false}, - {'K', 'Y', false, false, false}, - {'K', 'K', true, true, true}, - {'K', 'V', false, false, false}, - {'K', 'N', false, true, false}, - {'V', 'W', false, false, false}, - {'V', 'Y', false, false, false}, - {'V', 'V', true, true, true}, - {'H', 'T', false, false, true}, - {'H', 'M', false, false, true}, - {'H', 'R', false, false, false}, - {'H', 'W', false, false, true}, - {'H', 'S', false, false, false}, - {'H', 'Y', false, false, true}, - {'H', 'K', false, false, false}, - {'H', 'V', false, false, false}, - {'H', 'H', true, true, true}, - {'H', 'N', false, true, false}, - {'D', 'G', false, false, true}, - {'D', 'T', false, false, true}, - {'D', 'M', false, false, false}, - {'D', 'R', false, false, true}, - {'D', 'W', false, false, true}, - {'D', 'S', false, false, false}, - {'D', 'Y', false, false, false}, - {'D', 'K', false, false, true}, - {'D', 'V', false, false, false}, - {'D', 'H', false, false, false}, - {'D', 'D', true, true, true}, - {'D', 'N', false, true, false}, - {'B', 'C', false, false, true}, - {'B', 'G', false, false, true}, - {'B', 'T', false, false, true}, - {'B', 'M', false, false, false}, - {'B', 'R', false, false, false}, - {'B', 'W', false, false, false}, - {'B', 'S', false, false, true}, - {'B', 'Y', false, false, true}, - {'B', 'K', false, false, true}, - {'B', 'V', false, false, false}, - {'B', 'H', false, false, false}, - {'B', 'D', false, false, false}, - {'B', 'B', true, true, true}, - {'B', 'N', false, true, false}, - {'N', 'T', false, false, true}, - {'N', 'R', false, false, true}, - {'N', 'W', false, false, true}, - {'N', 'S', false, false, true}, - {'N', 'Y', false, false, true}, - {'N', 'V', false, false, true}, - {'N', 'N', true, true, true} + {'A', 'A', true, true, true, true}, + {'A', 'C', false, false, false, false}, + {'A', 'G', false, false, false, false}, + {'A', 'T', false, false, false, false}, + {'A', 'M', false, true, false, false}, + {'A', 'R', false, true, false, false}, + {'A', 'W', false, true, false, false}, + {'A', 'S', false, false, false, false}, + {'A', 'Y', false, false, false, false}, + {'A', 'K', false, false, false, false}, + {'A', 'V', false, true, false, false}, + {'A', 'H', false, true, false, false}, + {'A', 'D', false, true, false, false}, + {'A', 'B', false, false, false, false}, + {'A', 'N', false, true, false, false}, + {'C', 'C', true, true, true, true}, + {'C', 'G', false, false, false, false}, + {'C', 'T', false, false, false, false}, + {'C', 'M', false, true, false, false}, + {'C', 'R', false, false, false, false}, + {'C', 'W', false, false, false, false}, + {'C', 'S', false, true, false, false}, + {'C', 'Y', false, true, false, false}, + {'C', 'K', false, false, false, false}, + {'C', 'V', false, true, false, false}, + {'C', 'H', false, true, false, false}, + {'C', 'D', false, false, false, false}, + {'C', 'N', false, true, false, false}, + {'G', 'G', true, true, true, true}, + {'G', 'T', false, false, false, false}, + {'G', 'M', false, false, false, false}, + {'G', 'R', false, true, false, false}, + {'G', 'W', false, false, false, false}, + {'G', 'S', false, true, false, false}, + {'G', 'Y', false, false, false, false}, + {'G', 'K', false, true, false, false}, + {'G', 'V', false, true, false, false}, + {'G', 'H', false, false, false, false}, + {'G', 'N', false, true, false, false}, + {'T', 'T', true, true, true, true}, + {'T', 'W', false, true, false, false}, + {'T', 'Y', false, true, false, false}, + {'T', 'V', false, false, false, false}, + {'M', 'T', false, false, false, false}, + {'M', 'M', true, true, true, false}, + {'M', 'R', false, false, false, false}, + {'M', 'W', false, false, false, false}, + {'M', 'S', false, false, false, false}, + {'M', 'Y', false, false, false, false}, + {'M', 'V', false, true, false, false}, + {'M', 'N', false, true, false, false}, + {'R', 'T', false, false, false, false}, + {'R', 'R', true, true, true, false}, + {'R', 'W', false, false, false, false}, + {'R', 'S', false, false, false, false}, + {'R', 'Y', false, false, false, false}, + {'R', 'V', false, true, false, false}, + {'W', 'W', true, true, true, false}, + {'W', 'Y', false, false, false, false}, + {'S', 'T', false, false, false, false}, + {'S', 'W', false, false, false, false}, + {'S', 'S', true, true, true, false}, + {'S', 'Y', false, false, false, false}, + {'S', 'V', false, true, false, false}, + {'Y', 'Y', true, true, true, false}, + {'K', 'T', false, false, true, false}, + {'K', 'M', false, false, false, false}, + {'K', 'R', false, false, false, false}, + {'K', 'W', false, false, false, false}, + {'K', 'S', false, false, false, false}, + {'K', 'Y', false, false, false, false}, + {'K', 'K', true, true, true, false}, + {'K', 'V', false, false, false, false}, + {'K', 'N', false, true, false, false}, + {'V', 'W', false, false, false, false}, + {'V', 'Y', false, false, false, false}, + {'V', 'V', true, true, true, false}, + {'H', 'T', false, false, true, false}, + {'H', 'M', false, false, true, false}, + {'H', 'R', false, false, false, false}, + {'H', 'W', false, false, true, false}, + {'H', 'S', false, false, false, false}, + {'H', 'Y', false, false, true, false}, + {'H', 'K', false, false, false, false}, + {'H', 'V', false, false, false, false}, + {'H', 'H', true, true, true, false}, + {'H', 'N', false, true, false, false}, + {'D', 'G', false, false, true, false}, + {'D', 'T', false, false, true, false}, + {'D', 'M', false, false, false, false}, + {'D', 'R', false, false, true, false}, + {'D', 'W', false, false, true, false}, + {'D', 'S', false, false, false, false}, + {'D', 'Y', false, false, false, false}, + {'D', 'K', false, false, true, false}, + {'D', 'V', false, false, false, false}, + {'D', 'H', false, false, false, false}, + {'D', 'D', true, true, true, false}, + {'D', 'N', false, true, false, false}, + {'B', 'C', false, false, true, false}, + {'B', 'G', false, false, true, false}, + {'B', 'T', false, false, true, false}, + {'B', 'M', false, false, false, false}, + {'B', 'R', false, false, false, false}, + {'B', 'W', false, false, false, false}, + {'B', 'S', false, false, true, false}, + {'B', 'Y', false, false, true, false}, + {'B', 'K', false, false, true, false}, + {'B', 'V', false, false, false, false}, + {'B', 'H', false, false, false, false}, + {'B', 'D', false, false, false, false}, + {'B', 'B', true, true, true, false}, + {'B', 'N', false, true, false, false}, + {'N', 'T', false, false, true, false}, + {'N', 'R', false, false, true, false}, + {'N', 'W', false, false, true, false}, + {'N', 'S', false, false, true, false}, + {'N', 'Y', false, false, true, false}, + {'N', 'V', false, false, true, false}, + {'N', 'N', true, true, true, false} }; } diff --git a/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.dict b/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.dict index db5b251d0b..0e4df20881 100644 --- a/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.dict +++ b/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.dict @@ -1,3 +1,4 @@ @HD VN:1.5 SO:unsorted @SQ SN:chr1 LN:16 M5:56b74a652b3ed2f610263b8bb423167c UR:file:src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta @SQ SN:chr2 LN:16 M5:b835d2c026aa66c52a05838dcc0b59d4 UR:file:src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta +@SQ SN:chr3 LN:16 M5:5dc87213005ebcb4ec69610ef5e7fee8 UR:file:src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta diff --git a/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta b/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta index 0b446caa8e..7e3524e67c 100644 --- a/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta +++ b/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta @@ -1,4 +1,6 @@ >chr1 ACGTACGTacgtacgt >chr2 -TCGATCGAtcgatcga \ No newline at end of file +TCGATCGAtcgatcga +>chr3 +NARCGTNNnarcgtnn \ No newline at end of file diff --git a/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta.fai b/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta.fai index 9314c8fe55..7158153f04 100644 --- a/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta.fai +++ b/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta.fai @@ -1,2 +1,3 @@ chr1 16 6 16 17 -chr2 16 29 16 16 +chr2 16 29 16 17 +chr3 16 52 16 17 diff --git a/src/test/resources/htsjdk/samtools/SequenceUtil/upper_and_lowercase_read.sam b/src/test/resources/htsjdk/samtools/SequenceUtil/upper_and_lowercase_read.sam index 335d8159c8..4d8e71048e 100644 --- a/src/test/resources/htsjdk/samtools/SequenceUtil/upper_and_lowercase_read.sam +++ b/src/test/resources/htsjdk/samtools/SequenceUtil/upper_and_lowercase_read.sam @@ -1,10 +1,15 @@ @HD VN:1.5 SO:coordinate @SQ SN:chr1 LN:16 M5:56b74a652b3ed2f610263b8bb423167c UR:file:src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta @SQ SN:chr2 LN:16 M5:b835d2c026aa66c52a05838dcc0b59d4 UR:file:src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta +@SQ SN:chr3 LN:16 M5:5dc87213005ebcb4ec69610ef5e7fee8 UR:file:src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta @CO chr1 value is ACGTACGTacgtacgt @CO chr2 value is TCGATCGAtcgatcga +@CO chr3 value is NARCGTNNnarcgtnn read1 0 chr1 1 0 16M * 0 0 AcGtAcGTaCGtAcGt AAAAAAAAAAAAAAAA NM:i:0 read2 0 chr1 1 0 16M * 0 0 AcGtAcGTaCGtAcGt AAAAAAAAAAAAAAAA NM:i:0 read3 0 chr2 1 0 16M * 0 0 AcGtAcGTaCGtAcGt AAAAAAAAAAAAAAAA NM:i:8 MD:Z:0T2A0T2A0t2a0t2a0 read4 0 chr2 1 0 8M * 0 0 TCGATCGA AAAAAAAA NM:i:0 read5 0 chr2 1 0 4M1D2M2S * 0 0 TCGACGAA AAAAAAAA NM:i:1 MD:Z:4^T2 +read6 0 chr3 1 0 16M * 0 0 NARCGTNNnarcgtnn AAAAAAAAAAAAAAAA NM:i:8 +read7 0 chr3 1 0 16M * 0 0 AAACGTNNaaacgtnn AAAAAAAAAAAAAAAA NM:i:8 +read8 0 chr3 1 0 16M * 0 0 naRcgtnNNArCGTN= AAAAAAAAAAAAAAAA NM:i:7