diff --git a/modules/nf-core/simpleaf/multiplexquant/environment.yml b/modules/nf-core/simpleaf/multiplexquant/environment.yml new file mode 100644 index 00000000000..3befe917f1c --- /dev/null +++ b/modules/nf-core/simpleaf/multiplexquant/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - bioconda + - conda-forge + +dependencies: + - bioconda::alevin-fry=0.15.0 + - bioconda::piscem=0.20.0 + - bioconda::simpleaf=0.25.0 diff --git a/modules/nf-core/simpleaf/multiplexquant/main.nf b/modules/nf-core/simpleaf/multiplexquant/main.nf new file mode 100644 index 00000000000..8ef178b132e --- /dev/null +++ b/modules/nf-core/simpleaf/multiplexquant/main.nf @@ -0,0 +1,101 @@ +process SIMPLEAF_MULTIPLEXQUANT { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/aa/aaba033a0179fd6ccc20c677f9df1fac5d8eac2dbd1bed73c4fa9f7adb65d963/data': + 'community.wave.seqera.io/library/simpleaf:0.25.0--b9f96d8b71a01864' }" + + input: + // + // Input reads are expected as: [ meta, chemistry_preset, [ pair1_read1, pair1_read2, pair2_read1, pair2_read2 ] ] + // Reads are split into R1/R2 pairs and joined with commas before being passed to simpleaf. + // + tuple val(meta), val(chemistry), path(reads) // chemistry preset and reads + tuple val(meta2), path(index, stageAs: 'index/*'), path(t2g_map) // optional pre-built piscem probe index and t2g map + tuple val(meta3), path(probe_set), path(sample_bc_list), path(cell_bc_list) // optional probe set / sample-BC TSV / cell-BC whitelist overrides + val resolution // UMI resolution (cr-like, cr-like-em, parsimony, ...) + + output: + tuple val(meta), path("${prefix}/af_map") , emit: map + tuple val(meta), path("${prefix}/af_quant") , emit: quant + tuple val(meta), path("${prefix}/af_quant/alevin/quants.h5ad") , emit: h5ad, optional: true + tuple val(meta), path("${prefix}/probe_t2g.tsv") , emit: t2g, optional: true + tuple val(meta), path("${prefix}/probe_index/index") , emit: probe_index, optional: true + tuple val("${task.process}"), val('alevin-fry'), eval("alevin-fry --version | sed 's/alevin-fry //'"), topic: versions, emit: versions_alevin_fry + tuple val("${task.process}"), val('piscem'), eval("piscem --version | sed 's/piscem //'"), topic: versions, emit: versions_piscem + tuple val("${task.process}"), val('simpleaf'), eval("ALEVIN_FRY_HOME=. simpleaf --version | sed 's/simpleaf //'"), topic: versions, emit: versions_simpleaf + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def mapping_args = mappingArgs(chemistry, reads) + def reference_args = referenceArgs(index, probe_set, sample_bc_list, cell_bc_list, t2g_map) + + meta = meta2 + meta3 + meta + + """ + export ALEVIN_FRY_HOME=. + simpleaf set-paths + + # run simpleaf multiplex-quant + simpleaf multiplex-quant \\ + ${mapping_args} \\ + ${reference_args} \\ + --resolution ${resolution} \\ + --output ${prefix} \\ + --threads ${task.cpus} \\ + --anndata-out \\ + ${args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + export ALEVIN_FRY_HOME=. + + mkdir -p ${prefix}/af_map + mkdir -p ${prefix}/af_quant/alevin + + touch ${prefix}/af_map/map.rad + touch ${prefix}/af_map/map_info.json + touch ${prefix}/af_quant/quant.json + touch ${prefix}/af_quant/generate_permit_list.json + touch ${prefix}/af_quant/alevin/quants_mat.mtx + touch ${prefix}/af_quant/alevin/quants_mat_rows.txt + touch ${prefix}/af_quant/alevin/quants_mat_cols.txt + touch ${prefix}/af_quant/alevin/quants.h5ad + touch ${prefix}/probe_t2g.tsv + """ +} + +// `simpleaf multiplex-quant` requires both reads and a chemistry preset (or, with extra +// ext.args, a --geometry override + --cell-bc-list). Only the mainstream case is enforced +// here; non-default geometries can still be set via ext.args. +def mappingArgs(chemistry, reads) { + if (!reads) error "Missing read files; could not proceed." + if (!chemistry) error "Missing chemistry; could not proceed." + + def (forward, reverse) = reads.collate(2).transpose() + return """--chemistry ${chemistry} \\ + --reads1 ${forward.join(',')} \\ + --reads2 ${reverse.join(',')}""" +} + +// Build optional reference-override flags. With none of these set, simpleaf auto-downloads +// a probe set + sample BC TSV based on the chemistry preset and (if also provided in ext.args) +// `--organism`. Any combination of overrides is allowed. +def referenceArgs(index, probe_set, sample_bc_list, cell_bc_list, t2g_map) { + def parts = [] + if (index) parts << "--index ${index}" + if (probe_set) parts << "--probe-set ${probe_set}" + if (sample_bc_list) parts << "--sample-bc-list ${sample_bc_list}" + if (cell_bc_list) parts << "--cell-bc-list ${cell_bc_list}" + if (t2g_map) parts << "--t2g-map ${t2g_map}" + return parts.join(' \\\n ') +} diff --git a/modules/nf-core/simpleaf/multiplexquant/meta.yml b/modules/nf-core/simpleaf/multiplexquant/meta.yml new file mode 100644 index 00000000000..b6ffa0fade9 --- /dev/null +++ b/modules/nf-core/simpleaf/multiplexquant/meta.yml @@ -0,0 +1,202 @@ +name: simpleaf_multiplexquant +description: | + Quantify a sample-multiplexed single-cell library (e.g. 10x Chromium Fixed RNA + Profiling / Flex) end-to-end with simpleaf — auto-resolves probe set and sample + barcode rotation map from the chemistry preset, builds a piscem probe index if one + is not supplied, maps reads with `piscem map-sc`, performs hierarchical cell-barcode + + sample-barcode correction, collates, and quantifies. Barcodes in the resulting + count matrix are prefixed with the demultiplexed sample name. +keywords: + - quantification + - gene expression + - multiplexed + - flex + - SimpleAF +tools: + - simpleaf: + description: | + SimpleAF is a program to simplify and customize the running and configuration of single-cell processing with alevin-fry. + homepage: https://github.com/COMBINE-lab/simpleaf + documentation: https://simpleaf.readthedocs.io/en/latest/flex-quant-command.html + tool_dev_url: https://github.com/COMBINE-lab/simpleaf + licence: + - "BSD-3-Clause" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - chemistry: + type: string + description: | + Chemistry preset name (required). Typical values: `10x-flexv1-gex-3p`, + `10x-flexv2-gex-3p`. The preset drives auto-resolution of the probe set, + sample-BC TSV, and cell-BC whitelist (each overridable via the inputs + below). Custom geometry / orientation can be passed through ext.args + (`--geometry '...'`, `--expected-ori fw`) without changing the preset. + - reads: + type: file + description: | + List of input FastQ files for paired-end data, grouped by pairs. + Example: [ [R1_1.fastq.gz, R2_1.fastq.gz], [R1_2.fastq.gz, R2_2.fastq.gz] ] + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map describing the (optional) pre-built piscem probe index input. + e.g. [ tool:'piscem' ] + - index: + type: directory + description: | + Folder containing a pre-built piscem probe index (the directory holding `simpleaf_index.json` + or its parent). When set, simpleaf will skip auto-building the probe index. Pass `[]` + to let simpleaf auto-build from `probe_set` (or the chemistry default). + - t2g_map: + type: file + description: | + Transcript-to-gene map. Use this when running against a transcriptome reference + instead of a probe set. Pass `[]` for probe-set-based runs. + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map describing (optional) probe-set / barcode-list overrides. + e.g. [ probe_set:'custom_10xFlex_v1' ] + - probe_set: + type: file + description: | + Probe set CSV or FASTA. Overrides the chemistry-preset default probe set. Pass `[]` + to use whatever the chemistry preset (and `--organism` in ext.args) auto-resolves. + ontologies: [] + - sample_bc_list: + type: file + description: | + Three-column TSV listing the sample barcodes used by the experiment: + `observed_seqcanonical_seqsample_name`. Required when overriding the + chemistry default. Pass `[]` to use the chemistry-preset default. + ontologies: [] + - cell_bc_list: + type: file + description: | + Cell barcode whitelist (one barcode per line). Overrides the chemistry-preset + default. Pass `[]` to use the chemistry-preset default. + ontologies: [] + - resolution: + type: string + description: | + UMI resolution mode (https://alevin-fry.readthedocs.io/en/latest/quant.html). + Possible values: 'cr-like', 'cr-like-em', 'parsimony', 'parsimony-em', + 'parsimony-gene', 'parsimony-gene-em'. +output: + map: + - - meta: + type: map + description: Groovy Map containing sample information + - ${prefix}/af_map: + type: directory + description: | + piscem `map-sc` output directory. Contains `map.rad`, `map_info.json` and + `unmapped_bc_count.bin`. + quant: + - - meta: + type: map + description: Groovy Map containing sample information + - ${prefix}/af_quant: + type: directory + description: | + alevin-fry quantification output directory. Contains `quant.json`, + `generate_permit_list.json`, `alevin/quants_mat.mtx`, and barcode/feature TSVs. + Barcodes in `quants_mat_rows.txt` are prefixed with the demultiplexed sample name. + h5ad: + - - meta: + type: map + description: Groovy Map containing sample information + - ${prefix}/af_quant/alevin/quants.h5ad: + type: file + description: | + AnnData representation of the count matrix, emitted because `--anndata-out` + is set by default in this module. + ontologies: [] + t2g: + - - meta: + type: map + description: Groovy Map containing sample information + - ${prefix}/probe_t2g.tsv: + type: file + description: | + Gene-level transcript-to-gene map resolved by multiplex-quant from the probe set. + ontologies: [] + probe_index: + - - meta: + type: map + description: Groovy Map containing sample information + - ${prefix}/probe_index/index: + type: directory + description: | + Auto-built piscem probe index, emitted only when no pre-built `index` was provided + on input. Useful for caching the index between runs. + versions_alevin_fry: + - - ${task.process}: + type: string + description: The name of the process + - alevin-fry: + type: string + description: The name of the tool + - alevin-fry --version | sed 's/alevin-fry //': + type: eval + description: The expression to obtain the version of the tool + versions_piscem: + - - ${task.process}: + type: string + description: The name of the process + - piscem: + type: string + description: The name of the tool + - piscem --version | sed 's/piscem //': + type: eval + description: The expression to obtain the version of the tool + versions_simpleaf: + - - ${task.process}: + type: string + description: The name of the process + - simpleaf: + type: string + description: The name of the tool + - ALEVIN_FRY_HOME=. simpleaf --version | sed 's/simpleaf //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - alevin-fry: + type: string + description: The name of the tool + - alevin-fry --version | sed 's/alevin-fry //': + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - piscem: + type: string + description: The name of the tool + - piscem --version | sed 's/piscem //': + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - simpleaf: + type: string + description: The name of the tool + - ALEVIN_FRY_HOME=. simpleaf --version | sed 's/simpleaf //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@an-altosian" +maintainers: + - "@an-altosian" diff --git a/modules/nf-core/simpleaf/multiplexquant/tests/main.nf.test b/modules/nf-core/simpleaf/multiplexquant/tests/main.nf.test new file mode 100644 index 00000000000..47a69468276 --- /dev/null +++ b/modules/nf-core/simpleaf/multiplexquant/tests/main.nf.test @@ -0,0 +1,79 @@ +nextflow_process { + + name "Test Process SIMPLEAF_MULTIPLEXQUANT" + script "../main.nf" + process "SIMPLEAF_MULTIPLEXQUANT" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "simpleaf" + tag "simpleaf/multiplexquant" + + test("test_simpleaf_multiplexquant - flex - auto") { + when { + process { + """ + meta = [id:'test_flex', single_end:false] + files = [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/singleplex_flex/Human_Kidney_GEM-X_Flex_S1_L001_R1_001.subsampled.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/singleplex_flex/Human_Kidney_GEM-X_Flex_S1_L001_R2_001.subsampled.fastq.gz', checkIfExists: true) + ] + input[0] = Channel.of([ meta, '10x-flexv1-gex-3p', files ]) + input[1] = Channel.of([ [:], [], [] ]) + input[2] = Channel.of([ [:], [], [], [] ]) + input[3] = Channel.of('cr-like') + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert file("${process.out.map.get(0).get(1)}/map.rad").exists() }, + { assert file("${process.out.map.get(0).get(1)}/map_info.json").exists() }, + { assert file("${process.out.map.get(0).get(1)}/unmapped_bc_count.bin").exists() }, + { assert file("${process.out.quant.get(0).get(1)}/collate.json").exists() }, + { assert file("${process.out.quant.get(0).get(1)}/generate_permit_list.json").exists() }, + { assert file("${process.out.quant.get(0).get(1)}/quant.json").exists() }, + { assert file("${process.out.quant.get(0).get(1)}/featureDump.txt").exists() }, + { assert file("${process.out.quant.get(0).get(1)}/sample_info.json").exists() }, + { assert file("${process.out.quant.get(0).get(1)}/sample_permit_map.bin").exists() }, + { assert file("${process.out.quant.get(0).get(1)}/simpleaf_multiplex_quant_info.json").exists() }, + { assert file("${process.out.quant.get(0).get(1)}/map.collated.rad").exists() }, + { assert file("${process.out.quant.get(0).get(1)}/alevin/quants_mat.mtx").exists() }, + { assert file("${process.out.quant.get(0).get(1)}/alevin/quants_mat_rows.txt").exists() }, + { assert file("${process.out.quant.get(0).get(1)}/alevin/quants_mat_cols.txt").exists() }, + { assert file("${process.out.quant.get(0).get(1)}/alevin/quants.h5ad").exists() }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions")}).match() } + ) + } + } + + test("test_simpleaf_multiplexquant - flex - auto - stub") { + options "-stub-run" + + when { + process { + """ + meta = [id:'test_flex', single_end:false] + files = [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/singleplex_flex/Human_Kidney_GEM-X_Flex_S1_L001_R1_001.subsampled.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/singleplex_flex/Human_Kidney_GEM-X_Flex_S1_L001_R2_001.subsampled.fastq.gz', checkIfExists: true) + ] + input[0] = Channel.of([ meta, '10x-flexv1-gex-3p', files ]) + input[1] = Channel.of([ [:], [], [] ]) + input[2] = Channel.of([ [:], [], [], [] ]) + input[3] = Channel.of('cr-like') + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/simpleaf/multiplexquant/tests/main.nf.test.snap b/modules/nf-core/simpleaf/multiplexquant/tests/main.nf.test.snap new file mode 100644 index 00000000000..a6f1a501a92 --- /dev/null +++ b/modules/nf-core/simpleaf/multiplexquant/tests/main.nf.test.snap @@ -0,0 +1,189 @@ +{ + "test_simpleaf_multiplexquant - flex - auto": { + "content": [ + { + "versions_alevin_fry": [ + [ + "SIMPLEAF_MULTIPLEXQUANT", + "alevin-fry", + "0.15.0" + ] + ], + "versions_piscem": [ + [ + "SIMPLEAF_MULTIPLEXQUANT", + "piscem", + "0.20.0" + ] + ], + "versions_simpleaf": [ + [ + "SIMPLEAF_MULTIPLEXQUANT", + "simpleaf", + "0.25.0" + ] + ] + } + ], + "timestamp": "2026-05-20T15:38:41.653140292", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "test_simpleaf_multiplexquant - flex - auto - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_flex", + "single_end": false + }, + [ + "map.rad:md5,d41d8cd98f00b204e9800998ecf8427e", + "map_info.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + { + "id": "test_flex", + "single_end": false + }, + [ + [ + "quants.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e", + "quants_mat.mtx:md5,d41d8cd98f00b204e9800998ecf8427e", + "quants_mat_cols.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "quants_mat_rows.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "generate_permit_list.json:md5,d41d8cd98f00b204e9800998ecf8427e", + "quant.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "2": [ + [ + { + "id": "test_flex", + "single_end": false + }, + "quants.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test_flex", + "single_end": false + }, + "probe_t2g.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + [ + "SIMPLEAF_MULTIPLEXQUANT", + "alevin-fry", + "0.15.0" + ] + ], + "6": [ + [ + "SIMPLEAF_MULTIPLEXQUANT", + "piscem", + "0.20.0" + ] + ], + "7": [ + [ + "SIMPLEAF_MULTIPLEXQUANT", + "simpleaf", + "0.25.0" + ] + ], + "h5ad": [ + [ + { + "id": "test_flex", + "single_end": false + }, + "quants.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "map": [ + [ + { + "id": "test_flex", + "single_end": false + }, + [ + "map.rad:md5,d41d8cd98f00b204e9800998ecf8427e", + "map_info.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "probe_index": [ + + ], + "quant": [ + [ + { + "id": "test_flex", + "single_end": false + }, + [ + [ + "quants.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e", + "quants_mat.mtx:md5,d41d8cd98f00b204e9800998ecf8427e", + "quants_mat_cols.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "quants_mat_rows.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "generate_permit_list.json:md5,d41d8cd98f00b204e9800998ecf8427e", + "quant.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "t2g": [ + [ + { + "id": "test_flex", + "single_end": false + }, + "probe_t2g.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_alevin_fry": [ + [ + "SIMPLEAF_MULTIPLEXQUANT", + "alevin-fry", + "0.15.0" + ] + ], + "versions_piscem": [ + [ + "SIMPLEAF_MULTIPLEXQUANT", + "piscem", + "0.20.0" + ] + ], + "versions_simpleaf": [ + [ + "SIMPLEAF_MULTIPLEXQUANT", + "simpleaf", + "0.25.0" + ] + ] + } + ], + "timestamp": "2026-05-20T15:38:48.085984091", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/simpleaf/multiplexquant/tests/nextflow.config b/modules/nf-core/simpleaf/multiplexquant/tests/nextflow.config new file mode 100644 index 00000000000..0e8853ed131 --- /dev/null +++ b/modules/nf-core/simpleaf/multiplexquant/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'SIMPLEAF_MULTIPLEXQUANT' { + ext.args = '--organism human' + } +}