Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions src/main/java/htsjdk/samtools/SAMSequenceRecord.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ public class SAMSequenceRecord extends AbstractSAMHeaderRecord implements Clonea


/**
* This is not a valid sequence name, because it is reserved in the MRNM field of SAM text format
* This is not a valid sequence name, because it is reserved in the RNEXT field of SAM text format
* to mean "same reference as RNAME field."
*/
public static final String RESERVED_MRNM_SEQUENCE_NAME = "=";
public static final String RESERVED_RNEXT_SEQUENCE_NAME = "=";
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

deprecate this instead of just renaming it


/**
* The standard tags are stored in text header without type information, because the type of these tags is known.
Expand All @@ -70,10 +70,12 @@ public class SAMSequenceRecord extends AbstractSAMHeaderRecord implements Clonea
SPECIES_TAG));

// Split on any whitespace
private static final Pattern SEQUENCE_NAME_SPLITTER = Pattern.compile("\\s");
private static final Pattern SEQUENCE_NAME_SPLITTER = Pattern.compile("[\\s]");
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

delete this if unused

// These are the chars matched by \\s.
private static final char[] WHITESPACE_CHARS = {' ', '\t', '\n', '\013', '\f', '\r'}; // \013 is vertical tab

private static Pattern LEGAL_RNAME_PATTERN = Pattern.compile("[0-9A-Za-z!#$%&+./:;?@^_|~-][0-9A-Za-z!#$%&*+./:;=?@^_|~-]*");
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make this final


/**
* @deprecated Use {@link #SAMSequenceRecord(String, int)} instead.
* sequenceLength is required for the object to be considered valid.
Expand All @@ -85,8 +87,8 @@ public SAMSequenceRecord(final String name) {

public SAMSequenceRecord(final String name, final int sequenceLength) {
if (name != null) {
if (SEQUENCE_NAME_SPLITTER.matcher(name).find()) {
throw new SAMException("Sequence name contains invalid character: " + name);
if (!LEGAL_RNAME_PATTERN.matcher(name).useAnchoringBounds(true).matches()) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move this into the validate method

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SEQUENCE_NAME_SPLITTER in both code and comments can be removed correct? I still see it elsewhere.

throw new SAMException(String.format("Sequence name '%s' doesn't match regex: '%s' ", name, LEGAL_RNAME_PATTERN));
}
validateSequenceName(name);
mSequenceName = name.intern();
Expand Down Expand Up @@ -204,8 +206,8 @@ public static String truncateSequenceName(final String sequenceName) {
* Throw an exception if the sequence name is not valid.
*/
public static void validateSequenceName(final String name) {
if (RESERVED_MRNM_SEQUENCE_NAME.equals(name)) {
throw new SAMException("'" + RESERVED_MRNM_SEQUENCE_NAME + "' is not a valid sequence name");
if (RESERVED_RNEXT_SEQUENCE_NAME.equals(name)) {
throw new SAMException("'" + RESERVED_RNEXT_SEQUENCE_NAME + "' is not a valid sequence name");
}
}

Expand Down
23 changes: 22 additions & 1 deletion src/test/java/htsjdk/samtools/SAMSequenceRecordTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,32 @@ public void testIsSameSequence(final SAMSequenceRecord rec1 , final SAMSequenceR
}

@Test
public void testSetAndCheckDescription(){
public void testSetAndCheckDescription() {
final SAMSequenceRecord record = new SAMSequenceRecord("Test", 1000);
Assert.assertNull(record.getDescription());
final String description = "A description.";
record.setDescription(description);
Assert.assertEquals(record.getDescription(), description);
}

@DataProvider
public Object[][] illegalSequenceNames(){
return new Object[][]{
{"space "},
{"comma,"},
{"lbrace["},
{"rbrace]"},
{"slash\\"},
{"smaller<"},
{"bigger<"},
{"lparen("},
{"rparen)"},
{"lbracket{"},
{"rbracket}"}};
}

@Test(dataProvider = "illegalSequenceNames", expectedExceptions = SAMException.class)
public void testIllegalSequenceNames(final String sequenceName){
new SAMSequenceRecord(sequenceName,100);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public class SequenceNameTruncationAndValidationTest extends HtsjdkTest {
@Test(expectedExceptions = {SAMException.class}, dataProvider = "badSequenceNames")
public void testSequenceRecordThrowsWhenInvalid(final String sequenceName) {
new SAMSequenceRecord(sequenceName, 123);
Assert.fail("Should not reach here.");
Assert.fail("Should not reach here. Sequence " + sequenceName + " should have failed.");
}

@DataProvider(name = "badSequenceNames")
Expand All @@ -53,7 +53,13 @@ public Object[][] badSequenceNames() {
{"\t"},
{"\n"},
{"="},
{"Hi, Mom!"}
{"Hi: Mom!"},
{"=Hi:Mom!"},
{"Hi:'Mom!"},
{"Hi:\"Mom!"},
{"Hi:)Mom!"},
{"Hi:(Mom!"},
{"Hi,@Mom!"}
};
}

Expand All @@ -65,7 +71,8 @@ public void testSequenceRecordPositiveTest(final String sequenceName) {
@DataProvider(name = "goodSequenceNames")
public Object[][] goodSequenceNames() {
return new Object[][]{
{"Hi,@Mom!"}
{"Hi:@Mom!"},
{"Hi:=Mom!"}
};
}

Expand Down