Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions lirical-cli/src/assemble/distribution.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@
<include>legal/**</include>
</includes>
</fileSet>
<fileSet>
<directory>${project.basedir}/src</directory>
<outputDirectory>./</outputDirectory>
<includes>
<include>examples/</include>
</includes>
</fileSet>
<fileSet>
<directory>${project.build.directory}</directory>
<outputDirectory>./</outputDirectory>
Expand Down
78 changes: 78 additions & 0 deletions lirical-cli/src/examples/marfan.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
##fileformat=VCFv4.2
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths (counting only informative reads out of the total reads) for the ref and alt alleles in the order listed">
##FORMAT=<ID=AF,Number=A,Type=Float,Description="Allele fractions for alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
##FORMAT=<ID=F1R2,Number=R,Type=Integer,Description="Count of reads in F1R2 pair orientation supporting each allele">
##FORMAT=<ID=F2R1,Number=R,Type=Integer,Description="Count of reads in F2R1 pair orientation supporting each allele">
##FORMAT=<ID=GP,Number=G,Type=Float,Description="Phred-scaled posterior probabilities for genotypes as defined in the VCF specification">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=MB,Number=4,Type=Integer,Description="Per-sample component statistics to detect mate bias">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
##FORMAT=<ID=PRI,Number=G,Type=Float,Description="Phred-scaled prior probabilities for genotypes">
##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group">
##FORMAT=<ID=SB,Number=4,Type=Integer,Description="Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias">
##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Somatic quality">
##DRAGENCommandLine=<ID=HashTableBuild,Version="SW: 01.003.044.3.5.3-38-gefdaeaff, HashTableVersion: 8",CommandLineOptions="/opt/edico/bin/dragen --lic-instance-id-location /root/.edico --build-hash-table true --ht-reference /data/input/appresults/217816899/hg38.fa --ht-alt-liftover /opt/edico/liftover/bwa-kit_hs38DH_liftover.sam --ht-build-rna-hashtable true --enable-cnv true --ht-alt-aware-validate true --output-directory /data/scratch/hg38_altaware">
##DRAGENCommandLine=<ID=dragen,Version="SW: 05.021.595.3.7.5, HW: 05.021.595",Date="Sun Jan 01 00:00:00 UTC 2022",CommandLineOptions="--lic-server https://XXXXXXXXXXXX:YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY@license.edicogenome.com --lic-instance-id-location /root/.edico --output_status_file /data/scratch/progress.log --enable-duplicate-marking true --enable-map-align true --enable-map-align-output true --output-format BAM --auto-detect-sample-sex true --enable-bam-indexing true --enable-metrics-json true --json-dataset-type /staging/files/dataset-types/dragen_complete_v02.json --enable-variant-caller true --enable-vcf-compression true --vc-emit-ref-confidence GVCF --vc-enable-vcf-output true --vc-enable-bqd true --output-directory /data/output/appresults/292294008/EXAMPLE_SAMPLE --intermediate-results-dir /data/scratch/intermediate --output-file-prefix EXAMPLE_SAMPLE --fastq-list /data/scratch/fastq_sheet.csv --ref-dir /data/scratch/hg38-altaware-cnv-anchor.v8 --qc-cross-cont-vcf /opt/edico/config/sample_cross_contamination_resource_hg38.vcf.gz">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (informative and non-informative); some reads may have been filtered based on mapq etc.">
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
##INFO=<ID=FractionInformativeReads,Number=1,Type=Float,Description="The fraction of informative reads out of the total reads">
##INFO=<ID=LOD,Number=1,Type=Float,Description="Variant LOD score">
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
##INFO=<ID=R2_5P_bias,Number=1,Type=Float,Description="Score based on mate bias and distance from 5 prime end">
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
##INFO=<ID=SOR,Number=1,Type=Float,Description="Symmetric Odds Ratio of 2x2 contingency table to detect strand bias">
##INFO=<ID=GENE,Number=1,Type=String,Description="Causal gene symbol. The field is present in true positive variants">
##INFO=<ID=INHERITANCE,Number=1,Type=String,Description="Mode of inheritance of the disease. The field is present in true positive variants">
##INFO=<ID=MIM,Number=1,Type=String,Description="OMIM number of the disease associated with the variant presence. The field is present in true positive variants">
##INFO=<ID=PMID,Number=1,Type=String,Description="Publication describing the variant. The field is present in true positive variants">
##FILTER=<ID=DRAGENSnpHardQUAL,Description="Set if true:QUAL < 10.41">
##FILTER=<ID=DRAGENIndelHardQUAL,Description="Set if true:QUAL < 7.83">
##FILTER=<ID=LowDepth,Description="Set if true:DP <= 1">
##FILTER=<ID=LowGQ,Description="Set if true:GQ = 0">
##FILTER=<ID=PloidyConflict,Description="Genotype call from variant caller not consistent with chromosome ploidy">
##FILTER=<ID=RMxNRepeatRegion,Description="Site filtered because all or part of the variant allele is a repeat of the reference">
##FILTER=<ID=base_quality,Description="Site filtered because median base quality of alt reads at this locus does not meet threshold">
##FILTER=<ID=lod_fstar,Description="Variant does not meet likelihood threshold (default threshold is 6.3)">
##contig=<ID=chr1,length=248956422>
##contig=<ID=chr2,length=242193529>
##contig=<ID=chr3,length=198295559>
##contig=<ID=chr4,length=190214555>
##contig=<ID=chr5,length=181538259>
##contig=<ID=chr6,length=170805979>
##contig=<ID=chr7,length=159345973>
##contig=<ID=chr8,length=145138636>
##contig=<ID=chr9,length=138394717>
##contig=<ID=chr10,length=133797422>
##contig=<ID=chr11,length=135086622>
##contig=<ID=chr12,length=133275309>
##contig=<ID=chr13,length=114364328>
##contig=<ID=chr14,length=107043718>
##contig=<ID=chr15,length=101991189>
##contig=<ID=chr16,length=90338345>
##contig=<ID=chr17,length=83257441>
##contig=<ID=chr18,length=80373285>
##contig=<ID=chr19,length=58617616>
##contig=<ID=chr20,length=64444167>
##contig=<ID=chr21,length=46709983>
##contig=<ID=chr22,length=50818468>
##contig=<ID=chrX,length=156040895>
##contig=<ID=chrY,length=57227415>
##contig=<ID=chrM,length=16569>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT MARFAN_EXAMPLE
chr1 168044638 rs113151068 G A 284.62 PASS AC=1;AF=0.500;AN=2;DP=43 GT:AD:DP:GQ:PL 0/1:23,20:43:41:77,0,45
chr3 126006425 rs111477552 TAACA T 258.17 PASS AC=1;AF=0.500;AN=2;DP=43 GT:AD:DP:GQ:PL 0/1:23,20:43:41:77,0,45
chr15 48411070 MARFAN_PATHOGENIC C A 100 PASS GENE=FBN1;INHERITANCE=AD;MIM=154700;AC=1;AF=0.500;AN=2;DP=43 GT:AD:DP:GQ:PL 0/1:23,20:43:41:77,0,45
chrX 101539008 rs6523510 G C 739.17 PASS AC=2;AF=1.00;AN=2;DP=31 GT:AD:DP:GQ:PL 1/1:1,30:31:22.40:772,22,0
chrX 154462369 rs72227759 CTGG C 196.20 PASS AC=1;AF=0.500;AN=2;DP=9 GT:AD:DP:GQ:PL 1/1:0,9:9:23:93,26,0
chr17_KI270907v1_alt 88449 . C A 21.05 PASS AC=1;AF=0.500;AN=2;DP=122 GT:AD:DP:GQ:PL 0/1:5,2:7:21:56,0,44
chr3_KI270937v1_alt 81596 . A G 7.80 DRAGENSnpHardQUAL AC=2;AF=1.000;AN=2;DP=59 GT:AD:DP:GQ:PL 1/1:0,1:1:2:42,3,0
HLA-DRB1*09:21 5012 . T C 25.79 PASS AC=2;AF=1.000;AN=2;DP=2 GT:AD:DP:GQ:PL 1/1:0,2:2:5:62,6,0
18 changes: 18 additions & 0 deletions lirical-cli/src/examples/phenotype-and-genotype.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
## LIRICAL Analysis Template.
# Use this as a template for your own set-up.
---
# Sample ID
sampleId: MARFAN_EXAMPLE
# List of observed HPO terms
hpoIds: ['HP:0001519', 'HP:0001083', 'HP:0001653']
# List of excluded HPO terms
negatedHpoIds: ['HP:0000275']

# Age in ISO8601 notation (e.g. 'P12Y6M5D' to indicate 12 years, 6 months, and 5 days).
age: P20Y6M
# Choose from {MALE, FEMALE}
sex: FEMALE
# Path to a VCF file. Update to match path to your VCF file,
# or to point to an example VCF with deleterious FBN1 variant
# stored at `lirical-cli/src/examples/marfan.vcf`.
#vcf: /path/to/marfan.vcf
13 changes: 13 additions & 0 deletions lirical-cli/src/examples/phenotype-only.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
## LIRICAL Analysis Template.
# Use this as a template for your own set-up.
---
sampleId: Sample ID
hpoIds: ['HP:0002352', 'HP:0002490', 'HP:0001290']
negatedHpoIds: ['HP:0000486']

# Age in ISO8601 notation (e.g. 'P12Y6M5D' to indicate 12 years, 6 months, and 5 days).
#age:
# Choose from {MALE, FEMALE}
#sex:
# Path to a VCF file (optional)
#vcf:
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ public AnalysisData parse(InputStream is) throws LiricalParseException {


private static Sex parseSex(String sex) {
if (sex == null)
return Sex.UNKNOWN;

return switch (sex.toLowerCase()) {
case "male" -> Sex.MALE;
case "female" -> Sex.FEMALE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
*/
public class YamlConfig {

private String sampleId;
private List<String> hpoIds;
private List<String> negatedHpoIds;
private String sampleId = "Sample ID";
private List<String> hpoIds = List.of();
private List<String> negatedHpoIds = List.of();
private String age;
private String sex;
private String sex = "UNKNOWN";
private String vcf;

public void setSampleId(String sampleId) {
Expand Down Expand Up @@ -66,7 +66,7 @@ public String vcf() {
}

public Optional<Path> vcfPath() {
return vcf == null ?
return vcf == null || vcf.isBlank() ?
Optional.empty()
: Optional.of(Path.of(vcf));
}
Expand Down