From ea536c63e748f156547df19dcff9ee934ea36d73 Mon Sep 17 00:00:00 2001 From: lindenb Date: Thu, 1 Mar 2018 14:26:39 +0100 Subject: [PATCH 1/5] CreateSequenceDictionary support for alternative names (@SEQ:AN) --- .../picard/sam/CreateSequenceDictionary.java | 69 +++++++++++++++++++ .../sam/CreateSequenceDictionaryTest.java | 57 +++++++++++++++ 2 files changed, 126 insertions(+) diff --git a/src/main/java/picard/sam/CreateSequenceDictionary.java b/src/main/java/picard/sam/CreateSequenceDictionary.java index 0c071b2b47..baee3e4ab7 100644 --- a/src/main/java/picard/sam/CreateSequenceDictionary.java +++ b/src/main/java/picard/sam/CreateSequenceDictionary.java @@ -48,12 +48,17 @@ import java.io.*; import java.math.BigInteger; +import java.nio.file.Files; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; +import java.util.regex.Pattern; +import java.util.stream.Collectors; /** * Create a SAM/BAM file from a fasta containing reference sequence. The output SAM file contains a header but no @@ -106,6 +111,12 @@ public class CreateSequenceDictionary extends CommandLineProgram { @Argument(doc = "Stop after writing this many sequences. For testing.") public int NUM_SEQUENCES = Integer.MAX_VALUE; + @Argument(shortName = "AN", doc = "Optional file containing the alternative names for the contigs. " + + "First column is the original name, the second column is an alternative name. " + + "One contig may have more than one alternative name. " , + optional=true) + public File ALT_NAMES = null; + private final MessageDigest md5; public CreateSequenceDictionary() { @@ -185,6 +196,58 @@ protected int doWork() { " already exists. Delete this file and try again, or specify a different output file."); } + // map for aliases mapping a contig to its' aliases + final Map> contig2aliases = new HashMap<>(); + + // fill the alias map + if(this.ALT_NAMES != null) { + try { + // regex defined in the sam spec + final Pattern altNameRegex = Pattern.compile("[0-9A-Za-z][0-9A-Za-z\\*\\+@\\|\\-]*"); + for(final String line :IOUtil.slurpLines(this.ALT_NAMES)) { + if(StringUtil.isBlank(line)) continue; + final int tab = line.indexOf('\t'); + if(tab == -1 ) throw new IOException("tabulation missing in " + line); + final String contigName = line.substring(0,tab); + final String altName = line.substring(tab + 1); + //check for empty values + if(StringUtil.isBlank(contigName)) { + throw new IOException("empty contig in " + line); + } + if(StringUtil.isBlank(altName)) { + throw new IOException("empty alternative name in " + line); + } + if(altName.equals(contigName)) continue; + if(!altNameRegex.matcher(altName).matches()) { + throw new IOException("alternative name in " + line + + " doesn't match the regular expression : " + + altNameRegex.pattern()); + } + //check alias not previously defined as contig + if(contig2aliases.containsKey(altName)) { + throw new IOException("alternate name " + altName + + "previously defined as a contig in " + line); + } + //check contig not previously defined as alias + if(contig2aliases.keySet().stream(). + filter(K->!K.equals(contigName)). // not an error is defined twice for same contig + flatMap(K->contig2aliases.get(K).stream()). + anyMatch(S->S.contains(contigName))) { + throw new IOException("contig " + contigName + + "previously defined as an alternate name in " + line); + } + //add alias + if(!contig2aliases.containsKey(contigName)) { + contig2aliases.put(contigName, new HashSet<>()); + } + contig2aliases.get(contigName).add(altName); + } + } + catch (final IOException e) { + throw new PicardException("Can't read alias file " + ALT_NAMES, e); + } + } + // SortingCollection is used to check uniqueness of sequence names final SortingCollection sequenceNames = makeSortingCollection(); try (BufferedWriter writer = makeWriter()) { @@ -196,6 +259,12 @@ protected int doWork() { // read reference sequence one by one and write its metadata for (ReferenceSequence refSeq = refSeqFile.nextSequence(); refSeq != null; refSeq = refSeqFile.nextSequence()) { final SAMSequenceRecord samSequenceRecord = makeSequenceRecord(refSeq); + //add aliases + if(contig2aliases.containsKey(samSequenceRecord.getSequenceName())) { + final Set aliases = contig2aliases.get(samSequenceRecord.getSequenceName()); + // "Alternative names is a comma separated list of alternative names" + samSequenceRecord.setAttribute("AN",String.join(",",aliases)); //TODO replace "AN" with constants/methods: https://github.com/samtools/htsjdk/pull/956/files + } samDictCodec.encodeSequenceRecord(samSequenceRecord); sequenceNames.add(refSeq.getName()); } diff --git a/src/test/java/picard/sam/CreateSequenceDictionaryTest.java b/src/test/java/picard/sam/CreateSequenceDictionaryTest.java index e371434f45..b863b5e181 100644 --- a/src/test/java/picard/sam/CreateSequenceDictionaryTest.java +++ b/src/test/java/picard/sam/CreateSequenceDictionaryTest.java @@ -25,13 +25,21 @@ import org.testng.Assert; import org.testng.annotations.Test; + +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.variant.utils.SAMSequenceDictionaryExtractor; import picard.cmdline.CommandLineProgramTest; import picard.PicardException; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; +import java.io.PrintWriter; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; /** @@ -114,4 +122,53 @@ public void testNonUniqueSequenceName() throws Exception { Assert.assertEquals(runPicardCommandLine(argv), 0); Assert.fail("Exception should have been thrown."); } + + + @Test + public void testAltNames() throws Exception { + final File altFile = File.createTempFile("CreateSequenceDictionaryTest.", ".alt"); + final PrintWriter pw = new PrintWriter(altFile); + pw.println("chr1\t1"); + pw.println("chr1\t01"); + pw.println("chr1\tk1"); + pw.println("chrMT\tM"); + pw.flush(); + pw.close(); + altFile.deleteOnExit(); + + final File outputDict = File.createTempFile("CreateSequenceDictionaryTest.", ".dict"); + outputDict.delete(); + outputDict.deleteOnExit(); + final String[] argv = { + "REFERENCE=" + BASIC_FASTA, + "AN=" + altFile, + "OUTPUT=" + outputDict, + "TRUNCATE_NAMES_AT_WHITESPACE=true" + }; + Assert.assertEquals(runPicardCommandLine(argv), 0); + final SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor.extractDictionary(outputDict); + Assert.assertNotNull(dict, "dictionary is null"); + + // check chr1 + SAMSequenceRecord ssr = dict.getSequence("chr1"); + Assert.assertNotNull(ssr, "chr1 missing in dictionary"); + String an = ssr.getAttribute("AN"); + Assert.assertNotNull(ssr, "AN Missing"); + Set anSet = new HashSet<>(Arrays.asList(an.split("[,]"))); + Assert.assertTrue(anSet.contains("1")); + Assert.assertTrue(anSet.contains("01")); + Assert.assertTrue(anSet.contains("k1")); + Assert.assertFalse(anSet.contains("M")); + + // check chr2 + ssr = dict.getSequence("chr2"); + Assert.assertNotNull(ssr, "chr2 missing in dictionary"); + an = ssr.getAttribute("AN"); + Assert.assertNull(an, "AN Present"); + + // check chrM + ssr = dict.getSequence("chrM"); + Assert.assertNull(ssr, "chrM presnt in dictionary"); + } + } From 6e93f07c9a8f5361f0e0d2de11756084963007f7 Mon Sep 17 00:00:00 2001 From: lindenb Date: Thu, 8 Mar 2018 11:38:42 +0100 Subject: [PATCH 2/5] answers to https://github.com/broadinstitute/picard/pull/1127 --- .../picard/sam/CreateSequenceDictionary.java | 129 ++++++++++-------- .../sam/CreateSequenceDictionaryTest.java | 4 +- 2 files changed, 77 insertions(+), 56 deletions(-) diff --git a/src/main/java/picard/sam/CreateSequenceDictionary.java b/src/main/java/picard/sam/CreateSequenceDictionary.java index baee3e4ab7..f661aa7072 100644 --- a/src/main/java/picard/sam/CreateSequenceDictionary.java +++ b/src/main/java/picard/sam/CreateSequenceDictionary.java @@ -52,6 +52,7 @@ import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -118,6 +119,18 @@ public class CreateSequenceDictionary extends CommandLineProgram { public File ALT_NAMES = null; private final MessageDigest md5; + + /** + * Regular expression defined in the sam spec. Any alternative contig should match this regular expression + * TODO: replace the pattern with a constant : see https://github.com/samtools/htsjdk/pull/956/files + */ + private static final Pattern ALTERNATIVE_CONTIG_NAME_PATTERN = Pattern.compile("[0-9A-Za-z][0-9A-Za-z\\*\\+@\\|\\-]*"); + + /** + * 'AN' attribute in the dictionary + * TODO: replace "AN" with a constant : see https://github.com/samtools/htsjdk/pull/956/files + */ + private static final String AN_ATTRIBUTE = "AN"; public CreateSequenceDictionary() { try { @@ -196,57 +209,8 @@ protected int doWork() { " already exists. Delete this file and try again, or specify a different output file."); } - // map for aliases mapping a contig to its' aliases - final Map> contig2aliases = new HashMap<>(); - - // fill the alias map - if(this.ALT_NAMES != null) { - try { - // regex defined in the sam spec - final Pattern altNameRegex = Pattern.compile("[0-9A-Za-z][0-9A-Za-z\\*\\+@\\|\\-]*"); - for(final String line :IOUtil.slurpLines(this.ALT_NAMES)) { - if(StringUtil.isBlank(line)) continue; - final int tab = line.indexOf('\t'); - if(tab == -1 ) throw new IOException("tabulation missing in " + line); - final String contigName = line.substring(0,tab); - final String altName = line.substring(tab + 1); - //check for empty values - if(StringUtil.isBlank(contigName)) { - throw new IOException("empty contig in " + line); - } - if(StringUtil.isBlank(altName)) { - throw new IOException("empty alternative name in " + line); - } - if(altName.equals(contigName)) continue; - if(!altNameRegex.matcher(altName).matches()) { - throw new IOException("alternative name in " + line + - " doesn't match the regular expression : " + - altNameRegex.pattern()); - } - //check alias not previously defined as contig - if(contig2aliases.containsKey(altName)) { - throw new IOException("alternate name " + altName + - "previously defined as a contig in " + line); - } - //check contig not previously defined as alias - if(contig2aliases.keySet().stream(). - filter(K->!K.equals(contigName)). // not an error is defined twice for same contig - flatMap(K->contig2aliases.get(K).stream()). - anyMatch(S->S.contains(contigName))) { - throw new IOException("contig " + contigName + - "previously defined as an alternate name in " + line); - } - //add alias - if(!contig2aliases.containsKey(contigName)) { - contig2aliases.put(contigName, new HashSet<>()); - } - contig2aliases.get(contigName).add(altName); - } - } - catch (final IOException e) { - throw new PicardException("Can't read alias file " + ALT_NAMES, e); - } - } + // map for aliases mapping a contig to its aliases + final Map> aliasesByContig = loadContigAliasesMap(); // SortingCollection is used to check uniqueness of sequence names final SortingCollection sequenceNames = makeSortingCollection(); @@ -260,10 +224,10 @@ protected int doWork() { for (ReferenceSequence refSeq = refSeqFile.nextSequence(); refSeq != null; refSeq = refSeqFile.nextSequence()) { final SAMSequenceRecord samSequenceRecord = makeSequenceRecord(refSeq); //add aliases - if(contig2aliases.containsKey(samSequenceRecord.getSequenceName())) { - final Set aliases = contig2aliases.get(samSequenceRecord.getSequenceName()); + if (aliasesByContig.containsKey(samSequenceRecord.getSequenceName())) { + final Set aliases = aliasesByContig.get(samSequenceRecord.getSequenceName()); // "Alternative names is a comma separated list of alternative names" - samSequenceRecord.setAttribute("AN",String.join(",",aliases)); //TODO replace "AN" with constants/methods: https://github.com/samtools/htsjdk/pull/956/files + samSequenceRecord.setAttribute(AN_ATTRIBUTE, String.join(",", aliases)); } samDictCodec.encodeSequenceRecord(samSequenceRecord); sequenceNames.add(refSeq.getName()); @@ -353,6 +317,63 @@ private SortingCollection makeSortingCollection() { ); } + /** + * Load the file ALT_NAMES containing the alternative contig names + * @author Pierre Lindenbaum + * @return a Map<src_contig,Set<new_names>>. Never null. May be empty if ALT_NAMES is null. + * @throws PicardException if there is any error in the file ALT_NAMES + */ + private Map> loadContigAliasesMap() throws PicardException { + // return an empty map if no mapping file was provided + if (this.ALT_NAMES == null) return Collections.emptyMap(); + // the map returned by the function + final Map> aliasesByContig = new HashMap<>(); + try { + for(final String line :IOUtil.slurpLines(this.ALT_NAMES)) { + if (StringUtil.isBlank(line)) continue; + final int tab = line.indexOf('\t'); + if (tab == -1 ) throw new IOException("tabulation missing in " + line); + final String contigName = line.substring(0,tab); + final String altName = line.substring(tab + 1); + //check for empty values + if (StringUtil.isBlank(contigName)) { + throw new IOException("empty contig in " + line); + } + if (StringUtil.isBlank(altName)) { + throw new IOException("empty alternative name in " + line); + } + if (altName.equals(contigName)) continue; + if (!ALTERNATIVE_CONTIG_NAME_PATTERN.matcher(altName).matches()) { + throw new IOException("alternative name in " + line + + " doesn't match the regular expression : " + + ALTERNATIVE_CONTIG_NAME_PATTERN.pattern()); + } + //check alias not previously defined as contig + if (aliasesByContig.containsKey(altName)) { + throw new IOException("alternate name " + altName + + "previously defined as a contig in " + line); + } + //check contig not previously defined as alias + if (aliasesByContig.keySet().stream(). + filter(K->!K.equals(contigName)). // not an error is defined twice for same contig + anyMatch(K->aliasesByContig.get(K).contains(contigName))) { + throw new IOException("contig " + contigName + + "previously defined as an alternate name in " + line); + } + //add alias + if (!aliasesByContig.containsKey(contigName)) { + aliasesByContig.put(contigName, new HashSet<>()); + } + aliasesByContig.get(contigName).add(altName); + } + return aliasesByContig; + } + catch (final IOException e) { + throw new PicardException("Can't read alias file " + ALT_NAMES, e); + } + + } + private static class StringCodec implements SortingCollection.Codec { private DataInputStream dis; private DataOutputStream dos; diff --git a/src/test/java/picard/sam/CreateSequenceDictionaryTest.java b/src/test/java/picard/sam/CreateSequenceDictionaryTest.java index b863b5e181..bef74f2337 100644 --- a/src/test/java/picard/sam/CreateSequenceDictionaryTest.java +++ b/src/test/java/picard/sam/CreateSequenceDictionaryTest.java @@ -126,7 +126,7 @@ public void testNonUniqueSequenceName() throws Exception { @Test public void testAltNames() throws Exception { - final File altFile = File.createTempFile("CreateSequenceDictionaryTest.", ".alt"); + final File altFile = File.createTempFile("CreateSequenceDictionaryTest", ".alt"); final PrintWriter pw = new PrintWriter(altFile); pw.println("chr1\t1"); pw.println("chr1\t01"); @@ -168,7 +168,7 @@ public void testAltNames() throws Exception { // check chrM ssr = dict.getSequence("chrM"); - Assert.assertNull(ssr, "chrM presnt in dictionary"); + Assert.assertNull(ssr, "chrM present in dictionary"); } } From bfce5f7826b0fdda5a809471f0728874d1966cc6 Mon Sep 17 00:00:00 2001 From: lindenb Date: Fri, 9 Mar 2018 09:58:30 +0100 Subject: [PATCH 3/5] fix https://github.com/broadinstitute/picard/pull/1127#pullrequestreview-102315839, remove tabs from the code --- .../picard/sam/CreateSequenceDictionary.java | 104 ++++++++++-------- 1 file changed, 56 insertions(+), 48 deletions(-) diff --git a/src/main/java/picard/sam/CreateSequenceDictionary.java b/src/main/java/picard/sam/CreateSequenceDictionary.java index f661aa7072..f64e6a6c03 100644 --- a/src/main/java/picard/sam/CreateSequenceDictionary.java +++ b/src/main/java/picard/sam/CreateSequenceDictionary.java @@ -223,9 +223,9 @@ protected int doWork() { // read reference sequence one by one and write its metadata for (ReferenceSequence refSeq = refSeqFile.nextSequence(); refSeq != null; refSeq = refSeqFile.nextSequence()) { final SAMSequenceRecord samSequenceRecord = makeSequenceRecord(refSeq); - //add aliases - if (aliasesByContig.containsKey(samSequenceRecord.getSequenceName())) { - final Set aliases = aliasesByContig.get(samSequenceRecord.getSequenceName()); + // retrieve aliases, if any + final Set aliases = aliasesByContig.get(samSequenceRecord.getSequenceName()); + if (aliases != null) { // "Alternative names is a comma separated list of alternative names" samSequenceRecord.setAttribute(AN_ATTRIBUTE, String.join(",", aliases)); } @@ -241,7 +241,7 @@ protected int doWork() { // check uniqueness of sequences names final CloseableIterator iterator = sequenceNames.iterator(); - if(!iterator.hasNext()) return 0; + if (!iterator.hasNext()) return 0; String current = iterator.next(); while (iterator.hasNext()) { @@ -324,51 +324,59 @@ private SortingCollection makeSortingCollection() { * @throws PicardException if there is any error in the file ALT_NAMES */ private Map> loadContigAliasesMap() throws PicardException { - // return an empty map if no mapping file was provided - if (this.ALT_NAMES == null) return Collections.emptyMap(); - // the map returned by the function - final Map> aliasesByContig = new HashMap<>(); - try { - for(final String line :IOUtil.slurpLines(this.ALT_NAMES)) { - if (StringUtil.isBlank(line)) continue; - final int tab = line.indexOf('\t'); - if (tab == -1 ) throw new IOException("tabulation missing in " + line); - final String contigName = line.substring(0,tab); - final String altName = line.substring(tab + 1); - //check for empty values - if (StringUtil.isBlank(contigName)) { - throw new IOException("empty contig in " + line); - } - if (StringUtil.isBlank(altName)) { - throw new IOException("empty alternative name in " + line); - } - if (altName.equals(contigName)) continue; - if (!ALTERNATIVE_CONTIG_NAME_PATTERN.matcher(altName).matches()) { - throw new IOException("alternative name in " + line + - " doesn't match the regular expression : " + - ALTERNATIVE_CONTIG_NAME_PATTERN.pattern()); - } - //check alias not previously defined as contig - if (aliasesByContig.containsKey(altName)) { - throw new IOException("alternate name " + altName + - "previously defined as a contig in " + line); - } - //check contig not previously defined as alias - if (aliasesByContig.keySet().stream(). - filter(K->!K.equals(contigName)). // not an error is defined twice for same contig - anyMatch(K->aliasesByContig.get(K).contains(contigName))) { - throw new IOException("contig " + contigName + - "previously defined as an alternate name in " + line); - } - //add alias - if (!aliasesByContig.containsKey(contigName)) { - aliasesByContig.put(contigName, new HashSet<>()); - } - aliasesByContig.get(contigName).add(altName); - } - return aliasesByContig; + // return an empty map if no mapping file was provided + if (this.ALT_NAMES == null) { + return Collections.emptyMap(); } - catch (final IOException e) { + // the map returned by the function + final Map> aliasesByContig = new HashMap<>(); + try { + for (final String line :IOUtil.slurpLines(this.ALT_NAMES)) { + if (StringUtil.isBlank(line)) { + continue; + } + final int tab = line.indexOf('\t'); + if (tab == -1 ) { + throw new IOException("tabulation missing in " + line); + } + final String contigName = line.substring(0,tab); + final String altName = line.substring(tab + 1); + // check for empty values + if (StringUtil.isBlank(contigName)) { + throw new IOException("empty contig in " + line); + } + if (StringUtil.isBlank(altName)) { + throw new IOException("empty alternative name in " + line); + } + if (altName.equals(contigName)) { + continue; + } + if (!ALTERNATIVE_CONTIG_NAME_PATTERN.matcher(altName).matches()) { + throw new IOException("alternative name in " + line + + " doesn't match the regular expression : " + + ALTERNATIVE_CONTIG_NAME_PATTERN.pattern()); + } + // check alias not previously defined as contig + if (aliasesByContig.containsKey(altName)) { + throw new IOException("alternate name " + altName + + "previously defined as a contig in " + line); + } + // check contig not previously defined as alias + if (aliasesByContig.keySet().stream(). + // not an error if defined twice for same contig + filter(K->!K.equals(contigName)). + anyMatch(K->aliasesByContig.get(K).contains(contigName))) { + throw new IOException("contig " + contigName + + "previously defined as an alternate name in " + line); + } + // add alias + if (!aliasesByContig.containsKey(contigName)) { + aliasesByContig.put(contigName, new HashSet<>()); + } + aliasesByContig.get(contigName).add(altName); + } + return aliasesByContig; + } catch (final IOException e) { throw new PicardException("Can't read alias file " + ALT_NAMES, e); } From 0f450664d5b42a05badb1675745612182c398976 Mon Sep 17 00:00:00 2001 From: lindenb Date: Mon, 28 May 2018 11:31:22 +0200 Subject: [PATCH 4/5] answers to https://github.com/broadinstitute/picard/pull/1127 --- .../picard/sam/CreateSequenceDictionary.java | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/main/java/picard/sam/CreateSequenceDictionary.java b/src/main/java/picard/sam/CreateSequenceDictionary.java index f64e6a6c03..7ca8d7d327 100644 --- a/src/main/java/picard/sam/CreateSequenceDictionary.java +++ b/src/main/java/picard/sam/CreateSequenceDictionary.java @@ -48,7 +48,6 @@ import java.io.*; import java.math.BigInteger; -import java.nio.file.Files; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; @@ -59,7 +58,6 @@ import java.util.Map; import java.util.Set; import java.util.regex.Pattern; -import java.util.stream.Collectors; /** * Create a SAM/BAM file from a fasta containing reference sequence. The output SAM file contains a header but no @@ -113,8 +111,9 @@ public class CreateSequenceDictionary extends CommandLineProgram { public int NUM_SEQUENCES = Integer.MAX_VALUE; @Argument(shortName = "AN", doc = "Optional file containing the alternative names for the contigs. " + + "Tools may use this information to consider different contig notations as identical (e.g: 'chr1' and '1'). " + "First column is the original name, the second column is an alternative name. " - + "One contig may have more than one alternative name. " , + + "One contig may have more than one alternative name." , optional=true) public File ALT_NAMES = null; @@ -331,15 +330,15 @@ private Map> loadContigAliasesMap() throws PicardException { // the map returned by the function final Map> aliasesByContig = new HashMap<>(); try { - for (final String line :IOUtil.slurpLines(this.ALT_NAMES)) { - if (StringUtil.isBlank(line)) { - continue; - } + for (final String line : IOUtil.slurpLines(this.ALT_NAMES)) { + if (StringUtil.isBlank(line)) { + continue; + } final int tab = line.indexOf('\t'); - if (tab == -1 ) { + if (tab == -1) { throw new IOException("tabulation missing in " + line); } - final String contigName = line.substring(0,tab); + final String contigName = line.substring(0, tab); final String altName = line.substring(tab + 1); // check for empty values if (StringUtil.isBlank(contigName)) { @@ -359,15 +358,15 @@ private Map> loadContigAliasesMap() throws PicardException { // check alias not previously defined as contig if (aliasesByContig.containsKey(altName)) { throw new IOException("alternate name " + altName + - "previously defined as a contig in " + line); + " previously defined as a contig in " + line); } // check contig not previously defined as alias if (aliasesByContig.keySet().stream(). // not an error if defined twice for same contig - filter(K->!K.equals(contigName)). - anyMatch(K->aliasesByContig.get(K).contains(contigName))) { - throw new IOException("contig " + contigName + - "previously defined as an alternate name in " + line); + filter(K -> !K.equals(contigName)). + anyMatch(K -> aliasesByContig.get(K).contains(contigName))) { + throw new IOException("contig " + contigName + + " previously defined as an alternate name in " + line); } // add alias if (!aliasesByContig.containsKey(contigName)) { From 82fcb0b02b60834db4b625504c8269c3ce20af43 Mon Sep 17 00:00:00 2001 From: lindenb Date: Tue, 29 May 2018 10:03:47 +0200 Subject: [PATCH 5/5] answer to https://github.com/broadinstitute/picard/pull/1127#discussion_r191289677 --- src/main/java/picard/sam/CreateSequenceDictionary.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/picard/sam/CreateSequenceDictionary.java b/src/main/java/picard/sam/CreateSequenceDictionary.java index 7ca8d7d327..d3cfcf400b 100644 --- a/src/main/java/picard/sam/CreateSequenceDictionary.java +++ b/src/main/java/picard/sam/CreateSequenceDictionary.java @@ -112,7 +112,9 @@ public class CreateSequenceDictionary extends CommandLineProgram { @Argument(shortName = "AN", doc = "Optional file containing the alternative names for the contigs. " + "Tools may use this information to consider different contig notations as identical (e.g: 'chr1' and '1'). " - + "First column is the original name, the second column is an alternative name. " + + "The alternative names will be put into the appropriate @AN annotation for each contig. " + + "No header. " + + "First column is the original name, the second column is an alternative name. " + "One contig may have more than one alternative name." , optional=true) public File ALT_NAMES = null;