Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 16 additions & 13 deletions src/main/java/htsjdk/samtools/SAMSequenceRecord.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
package htsjdk.samtools;


import htsjdk.variant.variantcontext.VariantContext;

import java.math.BigInteger;
import java.net.URI;
import java.net.URISyntaxException;
Expand Down Expand Up @@ -57,23 +59,27 @@ public class SAMSequenceRecord extends AbstractSAMHeaderRecord implements Clonea


/**
* This is not a valid sequence name, because it is reserved in the MRNM field of SAM text format
* This is not a valid sequence name, because it is reserved in the RNEXT field of SAM text format
* to mean "same reference as RNAME field."
*/
public static final String RESERVED_MRNM_SEQUENCE_NAME = "=";

public static final String RESERVED_RNEXT_SEQUENCE_NAME = "=";
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

deprecate this instead of just renaming it


/* use RESERVED_RNEXT_SEQUENCE_NAME instead. */
@Deprecated
public static final String RESERVED_MRNM_SEQUENCE_NAME = RESERVED_RNEXT_SEQUENCE_NAME;

/**
* The standard tags are stored in text header without type information, because the type of these tags is known.
*/
public static final Set<String> STANDARD_TAGS =
new HashSet<String>(Arrays.asList(SEQUENCE_NAME_TAG, SEQUENCE_LENGTH_TAG, ASSEMBLY_TAG, MD5_TAG, URI_TAG,
SPECIES_TAG));
new HashSet<>(Arrays.asList(SEQUENCE_NAME_TAG, SEQUENCE_LENGTH_TAG, ASSEMBLY_TAG, MD5_TAG, URI_TAG, SPECIES_TAG));

// Split on any whitespace
private static final Pattern SEQUENCE_NAME_SPLITTER = Pattern.compile("\\s");
// These are the chars matched by \\s.
private static final char[] WHITESPACE_CHARS = {' ', '\t', '\n', '\013', '\f', '\r'}; // \013 is vertical tab

private static final Pattern LEGAL_RNAME_PATTERN = Pattern.compile("[0-9A-Za-z!#$%&+./:;?@^_|~-][0-9A-Za-z!#$%&*+./:;=?@^_|~-]*");

/**
* @deprecated Use {@link #SAMSequenceRecord(String, int)} instead.
* sequenceLength is required for the object to be considered valid.
Expand All @@ -85,9 +91,6 @@ public SAMSequenceRecord(final String name) {

public SAMSequenceRecord(final String name, final int sequenceLength) {
if (name != null) {
if (SEQUENCE_NAME_SPLITTER.matcher(name).find()) {
throw new SAMException("Sequence name contains invalid character: " + name);
}
validateSequenceName(name);
mSequenceName = name.intern();
} else {
Expand Down Expand Up @@ -188,8 +191,8 @@ public final SAMSequenceRecord clone() {
public static String truncateSequenceName(final String sequenceName) {
/*
* Instead of using regex split, do it manually for better performance.
return SEQUENCE_NAME_SPLITTER.split(sequenceName, 2)[0];
*/
*/

int truncateAt = sequenceName.length();
for (final char c : WHITESPACE_CHARS) {
int index = sequenceName.indexOf(c);
Expand All @@ -204,8 +207,8 @@ public static String truncateSequenceName(final String sequenceName) {
* Throw an exception if the sequence name is not valid.
*/
public static void validateSequenceName(final String name) {
if (RESERVED_MRNM_SEQUENCE_NAME.equals(name)) {
throw new SAMException("'" + RESERVED_MRNM_SEQUENCE_NAME + "' is not a valid sequence name");
if (!LEGAL_RNAME_PATTERN.matcher(name).useAnchoringBounds(true).matches()) {
throw new SAMException(String.format("Sequence name '%s' doesn't match regex: '%s' ", name, LEGAL_RNAME_PATTERN));
}
}

Expand Down
23 changes: 22 additions & 1 deletion src/test/java/htsjdk/samtools/SAMSequenceRecordTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,32 @@ public void testIsSameSequence(final SAMSequenceRecord rec1 , final SAMSequenceR
}

@Test
public void testSetAndCheckDescription(){
public void testSetAndCheckDescription() {
final SAMSequenceRecord record = new SAMSequenceRecord("Test", 1000);
Assert.assertNull(record.getDescription());
final String description = "A description.";
record.setDescription(description);
Assert.assertEquals(record.getDescription(), description);
}

@DataProvider
public Object[][] illegalSequenceNames(){
return new Object[][]{
{"space "},
{"comma,"},
{"lbrace["},
{"rbrace]"},
{"slash\\"},
{"smaller<"},
{"bigger<"},
{"lparen("},
{"rparen)"},
{"lbracket{"},
{"rbracket}"}};
}

@Test(dataProvider = "illegalSequenceNames", expectedExceptions = SAMException.class)
public void testIllegalSequenceNames(final String sequenceName){
new SAMSequenceRecord(sequenceName,100);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public class SequenceNameTruncationAndValidationTest extends HtsjdkTest {
@Test(expectedExceptions = {SAMException.class}, dataProvider = "badSequenceNames")
public void testSequenceRecordThrowsWhenInvalid(final String sequenceName) {
new SAMSequenceRecord(sequenceName, 123);
Assert.fail("Should not reach here.");
Assert.fail("Should not reach here. Sequence " + sequenceName + " should have failed.");
}

@DataProvider(name = "badSequenceNames")
Expand All @@ -53,7 +53,13 @@ public Object[][] badSequenceNames() {
{"\t"},
{"\n"},
{"="},
{"Hi, Mom!"}
{"Hi: Mom!"},
{"=Hi:Mom!"},
{"Hi:'Mom!"},
{"Hi:\"Mom!"},
{"Hi:)Mom!"},
{"Hi:(Mom!"},
{"Hi,@Mom!"}
};
}

Expand All @@ -65,7 +71,8 @@ public void testSequenceRecordPositiveTest(final String sequenceName) {
@DataProvider(name = "goodSequenceNames")
public Object[][] goodSequenceNames() {
return new Object[][]{
{"Hi,@Mom!"}
{"Hi:@Mom!"},
{"Hi:=Mom!"}
};
}

Expand Down