From 8fa87d4d6979ebe999636174de2411375c4b7146 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 21 Mar 2026 21:24:50 +0800 Subject: [PATCH 01/17] Add regenie/step1 module --- modules/nf-core/regenie/step1/environment.yml | 7 + modules/nf-core/regenie/step1/main.nf | 55 +++++ modules/nf-core/regenie/step1/meta.yml | 141 +++++++++++ .../nf-core/regenie/step1/tests/main.nf.test | 222 ++++++++++++++++++ .../regenie/step1/tests/main.nf.test.snap | 153 ++++++++++++ .../regenie/step1/tests/nextflow.config | 3 + 6 files changed, 581 insertions(+) create mode 100644 modules/nf-core/regenie/step1/environment.yml create mode 100644 modules/nf-core/regenie/step1/main.nf create mode 100644 modules/nf-core/regenie/step1/meta.yml create mode 100644 modules/nf-core/regenie/step1/tests/main.nf.test create mode 100644 modules/nf-core/regenie/step1/tests/main.nf.test.snap create mode 100644 modules/nf-core/regenie/step1/tests/nextflow.config diff --git a/modules/nf-core/regenie/step1/environment.yml b/modules/nf-core/regenie/step1/environment.yml new file mode 100644 index 000000000000..98fe8277cc05 --- /dev/null +++ b/modules/nf-core/regenie/step1/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::regenie=4.1.2" diff --git a/modules/nf-core/regenie/step1/main.nf b/modules/nf-core/regenie/step1/main.nf new file mode 100644 index 000000000000..1786071b248b --- /dev/null +++ b/modules/nf-core/regenie/step1/main.nf @@ -0,0 +1,55 @@ +process REGENIE_STEP1 { + tag "${meta.id}:${meta2.pheno_col}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'oras://community.wave.seqera.io/library/regenie:4.1.2--7c121fb4ecd57890' + : 'community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf'}" + + input: + tuple val(meta), path(plink_genotype_file), path(plink_variant_file), path(plink_sample_file) + tuple val(meta2), path(pheno) + tuple val(meta3), path(covar) + val(bsize) + + output: + tuple val(meta2), path("*_pred.list"), path("*.loco.gz"), emit: predictions + tuple val(meta2), path("*.log"), emit: log + tuple val("${task.process}"), val('regenie'), eval("regenie --version 2>&1 | head -n 1"), topic: versions, emit: versions_regenie + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def binary_arg = meta2.is_binary ? '--bt' : '' + def covar_arg = covar ? "--covarFile ${covar}" : '' + def pheno_col = meta2.pheno_col + def genotype_flag = plink_genotype_file.name.endsWith('.pgen') ? '--pgen' : '--bed' + def prefix = task.ext.prefix ?: "${meta.id}" + def bsize_arg = bsize ?: 1000 + + """ + regenie \\ + --step 1 \\ + ${genotype_flag} ${prefix} \\ + --phenoFile ${pheno} \\ + --phenoColList ${pheno_col} \\ + ${covar_arg} \\ + ${binary_arg} \\ + --bsize ${bsize_arg} \\ + --gz \\ + --threads ${task.cpus} \\ + ${args} \\ + --out ${prefix} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_pred.list + printf '' | gzip > ${prefix}_1.loco.gz + touch ${prefix}.log + """ +} diff --git a/modules/nf-core/regenie/step1/meta.yml b/modules/nf-core/regenie/step1/meta.yml new file mode 100644 index 000000000000..e189e1f67e40 --- /dev/null +++ b/modules/nf-core/regenie/step1/meta.yml @@ -0,0 +1,141 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "regenie_step1" +description: Run REGENIE step 1 to fit whole-genome regression models and emit LOCO predictions +keywords: + - regenie + - gwas + - association + - burden test + - genomics +tools: + - "regenie": + description: "Regenie is a C++ program for whole genome regression modelling of large genome-wide association studies (GWAS)." + homepage: "https://rgcgithub.github.io/regenie/" + documentation: "https://rgcgithub.github.io/regenie/options/" + tool_dev_url: "https://github.com/rgcgithub/regenie" + doi: "10.1038/s41588-021-00870-7" + licence: ["MIT"] + identifier: "biotools:regenie" + +input: + - - meta: + type: map + description: | + Groovy Map containing genotype information + Keep only the genotype analysis identifier in this map + The PLINK bundle must already be staged with basename `meta.id` + e.g. `[ id:'cohort' ]` + - plink_genotype_file: + type: file + description: PLINK primary genotype file in BED or PGEN format + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" # BED + - plink_variant_file: + type: file + description: PLINK variant metadata file in BIM or PVAR format + pattern: "*.{bim,pvar,zst}" + ontologies: [] + - plink_sample_file: + type: file + description: PLINK sample metadata file in FAM or PSAM format + pattern: "*.{fam,psam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing phenotype file information and phenotype selector + Keep `id` for the phenotype file identity and use `pheno_col` for the phenotype column passed to `--phenoColList` + e.g. `[ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:true ]` + - pheno: + type: file + description: Phenotype file passed to `--phenoFile` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - - meta3: + type: map + description: | + Groovy Map containing covariate file information + e.g. `[ id:'covariates' ]` + - covar: + type: file + optional: true + description: Optional covariate file passed to `--covarFile`; provide `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - bsize: + type: integer + description: Optional block size passed to `--bsize`; pass `[]` to use the module default of `1000` + +output: + predictions: + - - meta2: + type: map + description: | + Groovy Map containing phenotype file information and phenotype selector + e.g. `[ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:true ]` + - "*_pred.list": + type: file + description: REGENIE prediction list file + pattern: "*_pred.list" + ontologies: [] + - "*.loco.gz": + type: file + description: REGENIE LOCO prediction files + pattern: "*.loco.gz" + ontologies: + - edam: "http://edamontology.org/format_3987" # GZIP + log: + - - meta2: + type: map + description: | + Groovy Map containing phenotype file information and phenotype selector + e.g. `[ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:true ]` + - "*.log": + type: file + description: REGENIE step 1 log file + pattern: "*.log" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + versions_regenie: + - - "${task.process}": + type: string + description: The process the versions were collected from + - "regenie": + type: string + description: The tool name + - "regenie --version 2>&1 | head -n 1": + type: eval + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - regenie: + type: string + description: The tool name + - regenie --version 2>&1 | head -n 1: + type: eval + description: The command used to generate the version of the tool +authors: + - "@andongni" +maintainers: + - "@andongni" +containers: + conda: + linux_amd64: + lock_file: "https://wave.seqera.io/v1alpha1/builds/bd-5d361f9fcb2f85cf_1/condalock" + docker: + linux_amd64: + build_id: "bd-5d361f9fcb2f85cf_1" + name: "community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf" + scanId: "sc-cc9eb5ed5eb381dd_2" + singularity: + linux_amd64: + build_id: "bd-7c121fb4ecd57890_1" + name: "oras://community.wave.seqera.io/library/regenie:4.1.2--7c121fb4ecd57890" + https: "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data" diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test b/modules/nf-core/regenie/step1/tests/main.nf.test new file mode 100644 index 000000000000..04f42d375c93 --- /dev/null +++ b/modules/nf-core/regenie/step1/tests/main.nf.test @@ -0,0 +1,222 @@ +nextflow_process { + + name "Test Process REGENIE_STEP1" + script "../main.nf" + process "REGENIE_STEP1" + + tag "modules" + tag "modules_nfcore" + tag "regenie" + tag "regenie/step1" + + test("homo_sapiens popgen - quantitative plink1 with covariates") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'covariates' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.predictions.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.predictions.get(0).get(0).id == 'plink_simulated_quantitative_phenoname' }, + { assert process.out.predictions.get(0).get(0).pheno_col == 'QuantitativeTrait' }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { + def predList = path(process.out.predictions.get(0).get(1)) + def predListFields = predList.text.trim().split(/\s+/, 2) + assert predList.exists() + assert predListFields.size() == 2 + assert predListFields[0] == 'QuantitativeTrait' + assert predListFields[1].endsWith('/plink_simulated_1.loco.gz') + }, + { + def stablePredictions = process.out.predictions.collect { prediction -> + [prediction[0], prediction[2]] + } + assert snapshot( + stablePredictions, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + + test("homo_sapiens popgen - binary plink1 with covariates") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated_binary_phenoname', pheno_col:'BinaryTrait', is_binary:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_binary_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'covariates' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.predictions.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.predictions.get(0).get(0).id == 'plink_simulated_binary_phenoname' }, + { assert process.out.predictions.get(0).get(0).pheno_col == 'BinaryTrait' }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { + def predList = path(process.out.predictions.get(0).get(1)) + def predListFields = predList.text.trim().split(/\s+/, 2) + assert predList.exists() + assert predListFields.size() == 2 + assert predListFields[0] == 'BinaryTrait' + assert predListFields[1].endsWith('/plink_simulated_1.loco.gz') + }, + { + def stablePredictions = process.out.predictions.collect { prediction -> + [prediction[0], prediction[2]] + } + assert snapshot( + stablePredictions, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + + test("homo_sapiens popgen - quantitative plink1 without covariates") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [[:], []] + + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.predictions.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.predictions.get(0).get(0).id == 'plink_simulated_quantitative_phenoname' }, + { assert process.out.predictions.get(0).get(0).pheno_col == 'QuantitativeTrait' }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { + def predList = path(process.out.predictions.get(0).get(1)) + def predListFields = predList.text.trim().split(/\s+/, 2) + assert predList.exists() + assert predListFields.size() == 2 + assert predListFields[0] == 'QuantitativeTrait' + assert predListFields[1].endsWith('/plink_simulated_1.loco.gz') + }, + { + def stablePredictions = process.out.predictions.collect { prediction -> + [prediction[0], prediction[2]] + } + assert snapshot( + stablePredictions, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + + test("homo_sapiens popgen - plink2 - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:false ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'covariates' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test.snap b/modules/nf-core/regenie/step1/tests/main.nf.test.snap new file mode 100644 index 000000000000..88816b6fc9b1 --- /dev/null +++ b/modules/nf-core/regenie/step1/tests/main.nf.test.snap @@ -0,0 +1,153 @@ +{ + "homo_sapiens popgen - quantitative plink1 with covariates": { + "content": [ + [ + [ + { + "id": "plink_simulated_quantitative_phenoname", + "pheno_col": "QuantitativeTrait", + "is_binary": false + }, + "plink_simulated_1.loco.gz:md5,93aa1d97d8f164e57cd8fb2551a482f3" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_STEP1", + "regenie", + "v4.1.2.gz" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-18T19:00:00.400329317" + }, + "homo_sapiens popgen - quantitative plink1 without covariates": { + "content": [ + [ + [ + { + "id": "plink_simulated_quantitative_phenoname", + "pheno_col": "QuantitativeTrait", + "is_binary": false + }, + "plink_simulated_1.loco.gz:md5,72bbd85ce5e9b6ce1a9aa8237521ba22" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_STEP1", + "regenie", + "v4.1.2.gz" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-18T19:00:08.647419408" + }, + "homo_sapiens popgen - plink2 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "plink_simulated_quantitative_phenoname", + "pheno_col": "QuantitativeTrait", + "is_binary": false + }, + "plink_simulated_pred.list:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_1.loco.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "plink_simulated_quantitative_phenoname", + "pheno_col": "QuantitativeTrait", + "is_binary": false + }, + "plink_simulated.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "REGENIE_STEP1", + "regenie", + "v4.1.2.gz" + ] + ], + "log": [ + [ + { + "id": "plink_simulated_quantitative_phenoname", + "pheno_col": "QuantitativeTrait", + "is_binary": false + }, + "plink_simulated.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "predictions": [ + [ + { + "id": "plink_simulated_quantitative_phenoname", + "pheno_col": "QuantitativeTrait", + "is_binary": false + }, + "plink_simulated_pred.list:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_1.loco.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_regenie": [ + [ + "REGENIE_STEP1", + "regenie", + "v4.1.2.gz" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-16T13:47:26.824816889" + }, + "homo_sapiens popgen - binary plink1 with covariates": { + "content": [ + [ + [ + { + "id": "plink_simulated_binary_phenoname", + "pheno_col": "BinaryTrait", + "is_binary": true + }, + "plink_simulated_1.loco.gz:md5,ec5520f07607f2989a6d4f5b80a2b16b" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_STEP1", + "regenie", + "v4.1.2.gz" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-16T13:47:12.636697182" + } +} \ No newline at end of file diff --git a/modules/nf-core/regenie/step1/tests/nextflow.config b/modules/nf-core/regenie/step1/tests/nextflow.config new file mode 100644 index 000000000000..76b9ab148074 --- /dev/null +++ b/modules/nf-core/regenie/step1/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv('NF_MODULES_TESTDATA_BASE_PATH') ?: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' +} From 5958522cb961d8d768a9152a9efd4e294173e0ce Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Tue, 14 Apr 2026 16:04:21 +0800 Subject: [PATCH 02/17] Refine REGENIE step1 inputs and remove PR test config --- modules/nf-core/regenie/step1/main.nf | 17 +++++------ modules/nf-core/regenie/step1/meta.yml | 28 +++++++++++-------- .../regenie/step1/tests/nextflow.config | 3 -- 3 files changed, 26 insertions(+), 22 deletions(-) delete mode 100644 modules/nf-core/regenie/step1/tests/nextflow.config diff --git a/modules/nf-core/regenie/step1/main.nf b/modules/nf-core/regenie/step1/main.nf index 1786071b248b..dcb750ee9a4a 100644 --- a/modules/nf-core/regenie/step1/main.nf +++ b/modules/nf-core/regenie/step1/main.nf @@ -1,5 +1,5 @@ process REGENIE_STEP1 { - tag "${meta.id}:${meta2.pheno_col}" + tag "${meta.id}" label 'process_medium' conda "${moduleDir}/environment.yml" @@ -11,23 +11,24 @@ process REGENIE_STEP1 { tuple val(meta), path(plink_genotype_file), path(plink_variant_file), path(plink_sample_file) tuple val(meta2), path(pheno) tuple val(meta3), path(covar) - val(bsize) + val pheno_col + val is_binary + val bsize output: tuple val(meta2), path("*_pred.list"), path("*.loco.gz"), emit: predictions tuple val(meta2), path("*.log"), emit: log - tuple val("${task.process}"), val('regenie'), eval("regenie --version 2>&1 | head -n 1"), topic: versions, emit: versions_regenie + tuple val("${task.process}"), val('regenie'), eval('regenie --version 2>&1 | sed -n "1{s/^v//;s/\\.gz$//;p}"'), topic: versions, emit: versions_regenie when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def binary_arg = meta2.is_binary ? '--bt' : '' + def binary_arg = is_binary ? '--bt' : '' def covar_arg = covar ? "--covarFile ${covar}" : '' - def pheno_col = meta2.pheno_col def genotype_flag = plink_genotype_file.name.endsWith('.pgen') ? '--pgen' : '--bed' - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = plink_genotype_file.baseName def bsize_arg = bsize ?: 1000 """ @@ -46,10 +47,10 @@ process REGENIE_STEP1 { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = plink_genotype_file.baseName """ touch ${prefix}_pred.list - printf '' | gzip > ${prefix}_1.loco.gz + echo | gzip > ${prefix}_1.loco.gz touch ${prefix}.log """ } diff --git a/modules/nf-core/regenie/step1/meta.yml b/modules/nf-core/regenie/step1/meta.yml index e189e1f67e40..4e8d16f18916 100644 --- a/modules/nf-core/regenie/step1/meta.yml +++ b/modules/nf-core/regenie/step1/meta.yml @@ -23,7 +23,7 @@ input: description: | Groovy Map containing genotype information Keep only the genotype analysis identifier in this map - The PLINK bundle must already be staged with basename `meta.id` + REGENIE consumes the staged basename of `plink_genotype_file` as the `--bed` or `--pgen` prefix, so the `.bed/.bim/.fam` or `.pgen/.pvar/.psam` files must share one basename e.g. `[ id:'cohort' ]` - plink_genotype_file: type: file @@ -44,9 +44,9 @@ input: - - meta2: type: map description: | - Groovy Map containing phenotype file information and phenotype selector - Keep `id` for the phenotype file identity and use `pheno_col` for the phenotype column passed to `--phenoColList` - e.g. `[ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:true ]` + Groovy Map containing phenotype file information + Keep only phenotype provenance in this map + e.g. `[ id:'plink_simulated_quantitative_phenoname' ]` - pheno: type: file description: Phenotype file passed to `--phenoFile` @@ -65,6 +65,12 @@ input: pattern: "*.{covar,cov,txt,tsv}" ontologies: - edam: "http://edamontology.org/format_3475" # TSV + - pheno_col: + type: string + description: Phenotype column passed to `--phenoColList` + - is_binary: + type: boolean + description: Whether to add `--bt` for a binary trait - bsize: type: integer description: Optional block size passed to `--bsize`; pass `[]` to use the module default of `1000` @@ -74,8 +80,8 @@ output: - - meta2: type: map description: | - Groovy Map containing phenotype file information and phenotype selector - e.g. `[ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:true ]` + Groovy Map containing phenotype file information + e.g. `[ id:'plink_simulated_quantitative_phenoname' ]` - "*_pred.list": type: file description: REGENIE prediction list file @@ -91,8 +97,8 @@ output: - - meta2: type: map description: | - Groovy Map containing phenotype file information and phenotype selector - e.g. `[ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:true ]` + Groovy Map containing phenotype file information + e.g. `[ id:'plink_simulated_quantitative_phenoname' ]` - "*.log": type: file description: REGENIE step 1 log file @@ -106,7 +112,7 @@ output: - "regenie": type: string description: The tool name - - "regenie --version 2>&1 | head -n 1": + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': type: eval description: The command used to generate the version of the tool @@ -118,7 +124,7 @@ topics: - regenie: type: string description: The tool name - - regenie --version 2>&1 | head -n 1: + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': type: eval description: The command used to generate the version of the tool authors: @@ -128,7 +134,7 @@ maintainers: containers: conda: linux_amd64: - lock_file: "https://wave.seqera.io/v1alpha1/builds/bd-5d361f9fcb2f85cf_1/condalock" + lock_file: "modules/nf-core/regenie/step1/.conda-lock/linux_amd64-bd-5d361f9fcb2f85cf_1.txt" docker: linux_amd64: build_id: "bd-5d361f9fcb2f85cf_1" diff --git a/modules/nf-core/regenie/step1/tests/nextflow.config b/modules/nf-core/regenie/step1/tests/nextflow.config deleted file mode 100644 index 76b9ab148074..000000000000 --- a/modules/nf-core/regenie/step1/tests/nextflow.config +++ /dev/null @@ -1,3 +0,0 @@ -params { - modules_testdata_base_path = System.getenv('NF_MODULES_TESTDATA_BASE_PATH') ?: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' -} From 2372f6dd16dd39132948410cc0514e4586aa604f Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Tue, 14 Apr 2026 16:05:04 +0800 Subject: [PATCH 03/17] Expand REGENIE step1 tests for explicit phenotype inputs --- .../nf-core/regenie/step1/tests/main.nf.test | 132 ++++++++++++++---- .../regenie/step1/tests/main.nf.test.snap | 94 ++++--------- 2 files changed, 134 insertions(+), 92 deletions(-) diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test b/modules/nf-core/regenie/step1/tests/main.nf.test index 04f42d375c93..0657e726a8c3 100644 --- a/modules/nf-core/regenie/step1/tests/main.nf.test +++ b/modules/nf-core/regenie/step1/tests/main.nf.test @@ -10,8 +10,6 @@ nextflow_process { tag "regenie/step1" test("homo_sapiens popgen - quantitative plink1 with covariates") { - config "./nextflow.config" - when { process { """ @@ -23,7 +21,7 @@ nextflow_process { ] input[1] = [ - [ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:false ], + [ id:'plink_simulated_quantitative_phenoname' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) ] @@ -32,7 +30,9 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) ] - input[3] = [] + input[3] = 'QuantitativeTrait' + input[4] = false + input[5] = [] """ } } @@ -43,15 +43,18 @@ nextflow_process { { assert process.out.predictions.size() == 1 }, { assert process.out.log.size() == 1 }, { assert process.out.predictions.get(0).get(0).id == 'plink_simulated_quantitative_phenoname' }, - { assert process.out.predictions.get(0).get(0).pheno_col == 'QuantitativeTrait' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated_quantitative_phenoname' }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) - def predListFields = predList.text.trim().split(/\s+/, 2) + def locoFile = path(process.out.predictions.get(0).get(2)) + def predListLines = predList.text.readLines().findAll { it } assert predList.exists() + assert predListLines.size() == 1 + def predListFields = predListLines[0].split(/\s+/) assert predListFields.size() == 2 assert predListFields[0] == 'QuantitativeTrait' - assert predListFields[1].endsWith('/plink_simulated_1.loco.gz') + assert predListFields[1] == locoFile.toString() }, { def stablePredictions = process.out.predictions.collect { prediction -> @@ -68,8 +71,6 @@ nextflow_process { } test("homo_sapiens popgen - binary plink1 with covariates") { - config "./nextflow.config" - when { process { """ @@ -81,7 +82,7 @@ nextflow_process { ] input[1] = [ - [ id:'plink_simulated_binary_phenoname', pheno_col:'BinaryTrait', is_binary:true ], + [ id:'plink_simulated_binary_phenoname' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_binary_phenoname.phe', checkIfExists: true) ] @@ -90,7 +91,9 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) ] - input[3] = [] + input[3] = 'BinaryTrait' + input[4] = true + input[5] = [] """ } } @@ -101,15 +104,18 @@ nextflow_process { { assert process.out.predictions.size() == 1 }, { assert process.out.log.size() == 1 }, { assert process.out.predictions.get(0).get(0).id == 'plink_simulated_binary_phenoname' }, - { assert process.out.predictions.get(0).get(0).pheno_col == 'BinaryTrait' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated_binary_phenoname' }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) - def predListFields = predList.text.trim().split(/\s+/, 2) + def locoFile = path(process.out.predictions.get(0).get(2)) + def predListLines = predList.text.readLines().findAll { it } assert predList.exists() + assert predListLines.size() == 1 + def predListFields = predListLines[0].split(/\s+/) assert predListFields.size() == 2 assert predListFields[0] == 'BinaryTrait' - assert predListFields[1].endsWith('/plink_simulated_1.loco.gz') + assert predListFields[1] == locoFile.toString() }, { def stablePredictions = process.out.predictions.collect { prediction -> @@ -126,8 +132,6 @@ nextflow_process { } test("homo_sapiens popgen - quantitative plink1 without covariates") { - config "./nextflow.config" - when { process { """ @@ -139,13 +143,14 @@ nextflow_process { ] input[1] = [ - [ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:false ], + [ id:'plink_simulated_quantitative_phenoname' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) ] input[2] = [[:], []] - - input[3] = [] + input[3] = 'QuantitativeTrait' + input[4] = false + input[5] = [] """ } } @@ -156,15 +161,18 @@ nextflow_process { { assert process.out.predictions.size() == 1 }, { assert process.out.log.size() == 1 }, { assert process.out.predictions.get(0).get(0).id == 'plink_simulated_quantitative_phenoname' }, - { assert process.out.predictions.get(0).get(0).pheno_col == 'QuantitativeTrait' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated_quantitative_phenoname' }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) - def predListFields = predList.text.trim().split(/\s+/, 2) + def locoFile = path(process.out.predictions.get(0).get(2)) + def predListLines = predList.text.readLines().findAll { it } assert predList.exists() + assert predListLines.size() == 1 + def predListFields = predListLines[0].split(/\s+/) assert predListFields.size() == 2 assert predListFields[0] == 'QuantitativeTrait' - assert predListFields[1].endsWith('/plink_simulated_1.loco.gz') + assert predListFields[1] == locoFile.toString() }, { def stablePredictions = process.out.predictions.collect { prediction -> @@ -180,10 +188,68 @@ nextflow_process { } + test("homo_sapiens popgen - quantitative plink2 with covariates and custom bsize") { + when { + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated_quantitative_phenoname' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'covariates' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = 'QuantitativeTrait' + input[4] = false + input[5] = 256 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.predictions.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.predictions.get(0).get(0).id == 'plink_simulated_quantitative_phenoname' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated_quantitative_phenoname' }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { + def predList = path(process.out.predictions.get(0).get(1)) + def locoFile = path(process.out.predictions.get(0).get(2)) + def predListLines = predList.text.readLines().findAll { it } + assert predList.exists() + assert predListLines.size() == 1 + def predListFields = predListLines[0].split(/\s+/) + assert predListFields.size() == 2 + assert predListFields[0] == 'QuantitativeTrait' + assert predListFields[1] == locoFile.toString() + }, + { assert path(process.out.predictions.get(0).get(2)).exists() }, + { + def versionInfo = process.out.versions_regenie.get(0) + assert versionInfo[0] == 'REGENIE_STEP1' + assert versionInfo[1] == 'regenie' + assert versionInfo[2] == '4.1.2' + } + ) + } + + } + test("homo_sapiens popgen - plink2 - stub") { options "-stub" - config "./nextflow.config" when { process { @@ -196,7 +262,7 @@ nextflow_process { ] input[1] = [ - [ id:'plink_simulated_quantitative_phenoname', pheno_col:'QuantitativeTrait', is_binary:false ], + [ id:'plink_simulated_quantitative_phenoname' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) ] @@ -205,7 +271,9 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) ] - input[3] = [] + input[3] = 'QuantitativeTrait' + input[4] = false + input[5] = 256 """ } } @@ -213,7 +281,19 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { + def stablePredictions = process.out.predictions.collect { prediction -> + [prediction[0], path(prediction[1]).getFileName().toString(), path(prediction[2]).getFileName().toString()] + } + def stableLogs = process.out.log.collect { logTuple -> + [logTuple[0], path(logTuple[1]).getFileName().toString()] + } + assert snapshot( + stablePredictions, + stableLogs, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } ) } diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test.snap b/modules/nf-core/regenie/step1/tests/main.nf.test.snap index 88816b6fc9b1..2c8667ef2536 100644 --- a/modules/nf-core/regenie/step1/tests/main.nf.test.snap +++ b/modules/nf-core/regenie/step1/tests/main.nf.test.snap @@ -4,9 +4,7 @@ [ [ { - "id": "plink_simulated_quantitative_phenoname", - "pheno_col": "QuantitativeTrait", - "is_binary": false + "id": "plink_simulated_quantitative_phenoname" }, "plink_simulated_1.loco.gz:md5,93aa1d97d8f164e57cd8fb2551a482f3" ] @@ -16,7 +14,7 @@ [ "REGENIE_STEP1", "regenie", - "v4.1.2.gz" + "4.1.2" ] ] } @@ -25,16 +23,14 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-18T19:00:00.400329317" + "timestamp": "2026-04-14T15:46:03.219303814" }, "homo_sapiens popgen - quantitative plink1 without covariates": { "content": [ [ [ { - "id": "plink_simulated_quantitative_phenoname", - "pheno_col": "QuantitativeTrait", - "is_binary": false + "id": "plink_simulated_quantitative_phenoname" }, "plink_simulated_1.loco.gz:md5,72bbd85ce5e9b6ce1a9aa8237521ba22" ] @@ -44,7 +40,7 @@ [ "REGENIE_STEP1", "regenie", - "v4.1.2.gz" + "4.1.2" ] ] } @@ -53,65 +49,33 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-18T19:00:08.647419408" + "timestamp": "2026-04-14T15:42:05.540721466" }, "homo_sapiens popgen - plink2 - stub": { "content": [ + [ + [ + { + "id": "plink_simulated_quantitative_phenoname" + }, + "plink_simulated_pred.list", + "plink_simulated_1.loco.gz" + ] + ], + [ + [ + { + "id": "plink_simulated_quantitative_phenoname" + }, + "plink_simulated.log" + ] + ], { - "0": [ - [ - { - "id": "plink_simulated_quantitative_phenoname", - "pheno_col": "QuantitativeTrait", - "is_binary": false - }, - "plink_simulated_pred.list:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_1.loco.gz:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "plink_simulated_quantitative_phenoname", - "pheno_col": "QuantitativeTrait", - "is_binary": false - }, - "plink_simulated.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - "REGENIE_STEP1", - "regenie", - "v4.1.2.gz" - ] - ], - "log": [ - [ - { - "id": "plink_simulated_quantitative_phenoname", - "pheno_col": "QuantitativeTrait", - "is_binary": false - }, - "plink_simulated.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "predictions": [ - [ - { - "id": "plink_simulated_quantitative_phenoname", - "pheno_col": "QuantitativeTrait", - "is_binary": false - }, - "plink_simulated_pred.list:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_1.loco.gz:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], "versions_regenie": [ [ "REGENIE_STEP1", "regenie", - "v4.1.2.gz" + "4.1.2" ] ] } @@ -120,16 +84,14 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-16T13:47:26.824816889" + "timestamp": "2026-04-14T15:44:26.571212219" }, "homo_sapiens popgen - binary plink1 with covariates": { "content": [ [ [ { - "id": "plink_simulated_binary_phenoname", - "pheno_col": "BinaryTrait", - "is_binary": true + "id": "plink_simulated_binary_phenoname" }, "plink_simulated_1.loco.gz:md5,ec5520f07607f2989a6d4f5b80a2b16b" ] @@ -139,7 +101,7 @@ [ "REGENIE_STEP1", "regenie", - "v4.1.2.gz" + "4.1.2" ] ] } @@ -148,6 +110,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-16T13:47:12.636697182" + "timestamp": "2026-04-14T15:41:05.634909197" } } \ No newline at end of file From ee78d424b5dc535ab4149bb49b7735c061c0952d Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Tue, 14 Apr 2026 16:32:25 +0800 Subject: [PATCH 04/17] Use genotype metadata for REGENIE step1 log output --- modules/nf-core/regenie/step1/main.nf | 3 +-- modules/nf-core/regenie/step1/meta.yml | 6 +++--- modules/nf-core/regenie/step1/tests/main.nf.test | 8 ++++---- modules/nf-core/regenie/step1/tests/main.nf.test.snap | 4 ++-- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/modules/nf-core/regenie/step1/main.nf b/modules/nf-core/regenie/step1/main.nf index dcb750ee9a4a..25e10d3a5c27 100644 --- a/modules/nf-core/regenie/step1/main.nf +++ b/modules/nf-core/regenie/step1/main.nf @@ -17,7 +17,7 @@ process REGENIE_STEP1 { output: tuple val(meta2), path("*_pred.list"), path("*.loco.gz"), emit: predictions - tuple val(meta2), path("*.log"), emit: log + tuple val(meta), path("*.log"), emit: log tuple val("${task.process}"), val('regenie'), eval('regenie --version 2>&1 | sed -n "1{s/^v//;s/\\.gz$//;p}"'), topic: versions, emit: versions_regenie when: @@ -30,7 +30,6 @@ process REGENIE_STEP1 { def genotype_flag = plink_genotype_file.name.endsWith('.pgen') ? '--pgen' : '--bed' def prefix = plink_genotype_file.baseName def bsize_arg = bsize ?: 1000 - """ regenie \\ --step 1 \\ diff --git a/modules/nf-core/regenie/step1/meta.yml b/modules/nf-core/regenie/step1/meta.yml index 4e8d16f18916..520edb7bfa56 100644 --- a/modules/nf-core/regenie/step1/meta.yml +++ b/modules/nf-core/regenie/step1/meta.yml @@ -94,11 +94,11 @@ output: ontologies: - edam: "http://edamontology.org/format_3987" # GZIP log: - - - meta2: + - - meta: type: map description: | - Groovy Map containing phenotype file information - e.g. `[ id:'plink_simulated_quantitative_phenoname' ]` + Groovy Map containing genotype information + e.g. `[ id:'plink_simulated' ]` - "*.log": type: file description: REGENIE step 1 log file diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test b/modules/nf-core/regenie/step1/tests/main.nf.test index 0657e726a8c3..5c8444b75654 100644 --- a/modules/nf-core/regenie/step1/tests/main.nf.test +++ b/modules/nf-core/regenie/step1/tests/main.nf.test @@ -43,7 +43,7 @@ nextflow_process { { assert process.out.predictions.size() == 1 }, { assert process.out.log.size() == 1 }, { assert process.out.predictions.get(0).get(0).id == 'plink_simulated_quantitative_phenoname' }, - { assert process.out.log.get(0).get(0).id == 'plink_simulated_quantitative_phenoname' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) @@ -104,7 +104,7 @@ nextflow_process { { assert process.out.predictions.size() == 1 }, { assert process.out.log.size() == 1 }, { assert process.out.predictions.get(0).get(0).id == 'plink_simulated_binary_phenoname' }, - { assert process.out.log.get(0).get(0).id == 'plink_simulated_binary_phenoname' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) @@ -161,7 +161,7 @@ nextflow_process { { assert process.out.predictions.size() == 1 }, { assert process.out.log.size() == 1 }, { assert process.out.predictions.get(0).get(0).id == 'plink_simulated_quantitative_phenoname' }, - { assert process.out.log.get(0).get(0).id == 'plink_simulated_quantitative_phenoname' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) @@ -222,7 +222,7 @@ nextflow_process { { assert process.out.predictions.size() == 1 }, { assert process.out.log.size() == 1 }, { assert process.out.predictions.get(0).get(0).id == 'plink_simulated_quantitative_phenoname' }, - { assert process.out.log.get(0).get(0).id == 'plink_simulated_quantitative_phenoname' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test.snap b/modules/nf-core/regenie/step1/tests/main.nf.test.snap index 2c8667ef2536..b278442dd07b 100644 --- a/modules/nf-core/regenie/step1/tests/main.nf.test.snap +++ b/modules/nf-core/regenie/step1/tests/main.nf.test.snap @@ -65,7 +65,7 @@ [ [ { - "id": "plink_simulated_quantitative_phenoname" + "id": "plink_simulated" }, "plink_simulated.log" ] @@ -84,7 +84,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-04-14T15:44:26.571212219" + "timestamp": "2026-04-14T16:31:26.027053996" }, "homo_sapiens popgen - binary plink1 with covariates": { "content": [ From d1b987d7d73688226cf843c04877b49992bf18c0 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Tue, 14 Apr 2026 19:28:46 +0800 Subject: [PATCH 05/17] Use genotype metadata for REGENIE step1 tag and outputs --- modules/nf-core/regenie/step1/main.nf | 2 +- modules/nf-core/regenie/step1/meta.yml | 16 ++++++++-------- modules/nf-core/regenie/step1/tests/main.nf.test | 8 ++++---- .../regenie/step1/tests/main.nf.test.snap | 16 ++++++++-------- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/modules/nf-core/regenie/step1/main.nf b/modules/nf-core/regenie/step1/main.nf index 25e10d3a5c27..708ae9e52eff 100644 --- a/modules/nf-core/regenie/step1/main.nf +++ b/modules/nf-core/regenie/step1/main.nf @@ -16,7 +16,7 @@ process REGENIE_STEP1 { val bsize output: - tuple val(meta2), path("*_pred.list"), path("*.loco.gz"), emit: predictions + tuple val(meta), path("*_pred.list"), path("*.loco.gz"), emit: predictions tuple val(meta), path("*.log"), emit: log tuple val("${task.process}"), val('regenie'), eval('regenie --version 2>&1 | sed -n "1{s/^v//;s/\\.gz$//;p}"'), topic: versions, emit: versions_regenie diff --git a/modules/nf-core/regenie/step1/meta.yml b/modules/nf-core/regenie/step1/meta.yml index 520edb7bfa56..e5d9d8b0a1bf 100644 --- a/modules/nf-core/regenie/step1/meta.yml +++ b/modules/nf-core/regenie/step1/meta.yml @@ -44,9 +44,9 @@ input: - - meta2: type: map description: | - Groovy Map containing phenotype file information - Keep only phenotype provenance in this map - e.g. `[ id:'plink_simulated_quantitative_phenoname' ]` + Groovy Map containing genotype/sample information associated with the phenotype file input + Keep only the shared genotype/sample identifier in this map + e.g. `[ id:'plink_simulated' ]` - pheno: type: file description: Phenotype file passed to `--phenoFile` @@ -56,8 +56,8 @@ input: - - meta3: type: map description: | - Groovy Map containing covariate file information - e.g. `[ id:'covariates' ]` + Groovy Map containing genotype/sample information associated with the covariate input + e.g. `[ id:'plink_simulated' ]` - covar: type: file optional: true @@ -77,11 +77,11 @@ input: output: predictions: - - - meta2: + - - meta: type: map description: | - Groovy Map containing phenotype file information - e.g. `[ id:'plink_simulated_quantitative_phenoname' ]` + Groovy Map containing genotype/sample information + e.g. `[ id:'plink_simulated' ]` - "*_pred.list": type: file description: REGENIE prediction list file diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test b/modules/nf-core/regenie/step1/tests/main.nf.test index 5c8444b75654..86ad64870e7a 100644 --- a/modules/nf-core/regenie/step1/tests/main.nf.test +++ b/modules/nf-core/regenie/step1/tests/main.nf.test @@ -42,7 +42,7 @@ nextflow_process { { assert process.success }, { assert process.out.predictions.size() == 1 }, { assert process.out.log.size() == 1 }, - { assert process.out.predictions.get(0).get(0).id == 'plink_simulated_quantitative_phenoname' }, + { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert path(process.out.log.get(0).get(1)).exists() }, { @@ -103,7 +103,7 @@ nextflow_process { { assert process.success }, { assert process.out.predictions.size() == 1 }, { assert process.out.log.size() == 1 }, - { assert process.out.predictions.get(0).get(0).id == 'plink_simulated_binary_phenoname' }, + { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert path(process.out.log.get(0).get(1)).exists() }, { @@ -160,7 +160,7 @@ nextflow_process { { assert process.success }, { assert process.out.predictions.size() == 1 }, { assert process.out.log.size() == 1 }, - { assert process.out.predictions.get(0).get(0).id == 'plink_simulated_quantitative_phenoname' }, + { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert path(process.out.log.get(0).get(1)).exists() }, { @@ -221,7 +221,7 @@ nextflow_process { { assert process.success }, { assert process.out.predictions.size() == 1 }, { assert process.out.log.size() == 1 }, - { assert process.out.predictions.get(0).get(0).id == 'plink_simulated_quantitative_phenoname' }, + { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert path(process.out.log.get(0).get(1)).exists() }, { diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test.snap b/modules/nf-core/regenie/step1/tests/main.nf.test.snap index b278442dd07b..ea41c80d468d 100644 --- a/modules/nf-core/regenie/step1/tests/main.nf.test.snap +++ b/modules/nf-core/regenie/step1/tests/main.nf.test.snap @@ -4,7 +4,7 @@ [ [ { - "id": "plink_simulated_quantitative_phenoname" + "id": "plink_simulated" }, "plink_simulated_1.loco.gz:md5,93aa1d97d8f164e57cd8fb2551a482f3" ] @@ -23,14 +23,14 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-04-14T15:46:03.219303814" + "timestamp": "2026-04-14T20:43:22.874702329" }, "homo_sapiens popgen - quantitative plink1 without covariates": { "content": [ [ [ { - "id": "plink_simulated_quantitative_phenoname" + "id": "plink_simulated" }, "plink_simulated_1.loco.gz:md5,72bbd85ce5e9b6ce1a9aa8237521ba22" ] @@ -49,14 +49,14 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-04-14T15:42:05.540721466" + "timestamp": "2026-04-14T20:45:27.737582442" }, "homo_sapiens popgen - plink2 - stub": { "content": [ [ [ { - "id": "plink_simulated_quantitative_phenoname" + "id": "plink_simulated" }, "plink_simulated_pred.list", "plink_simulated_1.loco.gz" @@ -84,14 +84,14 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-04-14T16:31:26.027053996" + "timestamp": "2026-04-14T20:47:32.641837868" }, "homo_sapiens popgen - binary plink1 with covariates": { "content": [ [ [ { - "id": "plink_simulated_binary_phenoname" + "id": "plink_simulated" }, "plink_simulated_1.loco.gz:md5,ec5520f07607f2989a6d4f5b80a2b16b" ] @@ -110,6 +110,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-04-14T15:41:05.634909197" + "timestamp": "2026-04-14T20:44:25.24930827" } } \ No newline at end of file From c80d0128d7d2037ab253e007086fcfd440f6fc15 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Tue, 14 Apr 2026 21:47:18 +0800 Subject: [PATCH 06/17] Fix REGENIE step1 stub gzip syntax --- modules/nf-core/regenie/step1/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/regenie/step1/main.nf b/modules/nf-core/regenie/step1/main.nf index 708ae9e52eff..a4660120e09e 100644 --- a/modules/nf-core/regenie/step1/main.nf +++ b/modules/nf-core/regenie/step1/main.nf @@ -49,7 +49,7 @@ process REGENIE_STEP1 { def prefix = plink_genotype_file.baseName """ touch ${prefix}_pred.list - echo | gzip > ${prefix}_1.loco.gz + echo "" | gzip > ${prefix}_1.loco.gz touch ${prefix}.log """ } From 357492478a7bc87b7916a421b8c801fd68f028b5 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Tue, 14 Apr 2026 22:09:20 +0800 Subject: [PATCH 07/17] Split REGENIE step1 prediction outputs --- modules/nf-core/regenie/step1/main.nf | 3 +- modules/nf-core/regenie/step1/meta.yml | 6 +++ .../nf-core/regenie/step1/tests/main.nf.test | 42 +++++++++++++---- .../regenie/step1/tests/main.nf.test.snap | 47 +++++++++++++++---- 4 files changed, 80 insertions(+), 18 deletions(-) diff --git a/modules/nf-core/regenie/step1/main.nf b/modules/nf-core/regenie/step1/main.nf index a4660120e09e..899f61cb5967 100644 --- a/modules/nf-core/regenie/step1/main.nf +++ b/modules/nf-core/regenie/step1/main.nf @@ -16,7 +16,8 @@ process REGENIE_STEP1 { val bsize output: - tuple val(meta), path("*_pred.list"), path("*.loco.gz"), emit: predictions + tuple val(meta), path("*_pred.list"), emit: predictions + tuple val(meta), path("*.loco.gz"), emit: loco tuple val(meta), path("*.log"), emit: log tuple val("${task.process}"), val('regenie'), eval('regenie --version 2>&1 | sed -n "1{s/^v//;s/\\.gz$//;p}"'), topic: versions, emit: versions_regenie diff --git a/modules/nf-core/regenie/step1/meta.yml b/modules/nf-core/regenie/step1/meta.yml index e5d9d8b0a1bf..4cad445158d6 100644 --- a/modules/nf-core/regenie/step1/meta.yml +++ b/modules/nf-core/regenie/step1/meta.yml @@ -87,6 +87,12 @@ output: description: REGENIE prediction list file pattern: "*_pred.list" ontologies: [] + loco: + - - meta: + type: map + description: | + Groovy Map containing genotype/sample information + e.g. `[ id:'plink_simulated' ]` - "*.loco.gz": type: file description: REGENIE LOCO prediction files diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test b/modules/nf-core/regenie/step1/tests/main.nf.test index 86ad64870e7a..6b14dd3304f1 100644 --- a/modules/nf-core/regenie/step1/tests/main.nf.test +++ b/modules/nf-core/regenie/step1/tests/main.nf.test @@ -41,13 +41,15 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.predictions.size() == 1 }, + { assert process.out.loco.size() == 1 }, { assert process.out.log.size() == 1 }, { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) - def locoFile = path(process.out.predictions.get(0).get(2)) + def locoFile = path(process.out.loco.get(0).get(1)) def predListLines = predList.text.readLines().findAll { it } assert predList.exists() assert predListLines.size() == 1 @@ -58,10 +60,14 @@ nextflow_process { }, { def stablePredictions = process.out.predictions.collect { prediction -> - [prediction[0], prediction[2]] + [prediction[0], path(prediction[1]).getFileName().toString()] + } + def stableLoco = process.out.loco.collect { loco -> + [loco[0], path(loco[1]).getFileName().toString()] } assert snapshot( stablePredictions, + stableLoco, process.out.findAll { key, val -> key.startsWith('versions') } ).match() } @@ -102,13 +108,15 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.predictions.size() == 1 }, + { assert process.out.loco.size() == 1 }, { assert process.out.log.size() == 1 }, { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) - def locoFile = path(process.out.predictions.get(0).get(2)) + def locoFile = path(process.out.loco.get(0).get(1)) def predListLines = predList.text.readLines().findAll { it } assert predList.exists() assert predListLines.size() == 1 @@ -119,10 +127,14 @@ nextflow_process { }, { def stablePredictions = process.out.predictions.collect { prediction -> - [prediction[0], prediction[2]] + [prediction[0], path(prediction[1]).getFileName().toString()] + } + def stableLoco = process.out.loco.collect { loco -> + [loco[0], path(loco[1]).getFileName().toString()] } assert snapshot( stablePredictions, + stableLoco, process.out.findAll { key, val -> key.startsWith('versions') } ).match() } @@ -159,13 +171,15 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.predictions.size() == 1 }, + { assert process.out.loco.size() == 1 }, { assert process.out.log.size() == 1 }, { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) - def locoFile = path(process.out.predictions.get(0).get(2)) + def locoFile = path(process.out.loco.get(0).get(1)) def predListLines = predList.text.readLines().findAll { it } assert predList.exists() assert predListLines.size() == 1 @@ -176,10 +190,14 @@ nextflow_process { }, { def stablePredictions = process.out.predictions.collect { prediction -> - [prediction[0], prediction[2]] + [prediction[0], path(prediction[1]).getFileName().toString()] + } + def stableLoco = process.out.loco.collect { loco -> + [loco[0], path(loco[1]).getFileName().toString()] } assert snapshot( stablePredictions, + stableLoco, process.out.findAll { key, val -> key.startsWith('versions') } ).match() } @@ -220,13 +238,15 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.predictions.size() == 1 }, + { assert process.out.loco.size() == 1 }, { assert process.out.log.size() == 1 }, { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) - def locoFile = path(process.out.predictions.get(0).get(2)) + def locoFile = path(process.out.loco.get(0).get(1)) def predListLines = predList.text.readLines().findAll { it } assert predList.exists() assert predListLines.size() == 1 @@ -235,7 +255,7 @@ nextflow_process { assert predListFields[0] == 'QuantitativeTrait' assert predListFields[1] == locoFile.toString() }, - { assert path(process.out.predictions.get(0).get(2)).exists() }, + { assert path(process.out.loco.get(0).get(1)).exists() }, { def versionInfo = process.out.versions_regenie.get(0) assert versionInfo[0] == 'REGENIE_STEP1' @@ -283,13 +303,17 @@ nextflow_process { { assert process.success }, { def stablePredictions = process.out.predictions.collect { prediction -> - [prediction[0], path(prediction[1]).getFileName().toString(), path(prediction[2]).getFileName().toString()] + [prediction[0], path(prediction[1]).getFileName().toString()] + } + def stableLoco = process.out.loco.collect { loco -> + [loco[0], path(loco[1]).getFileName().toString()] } def stableLogs = process.out.log.collect { logTuple -> [logTuple[0], path(logTuple[1]).getFileName().toString()] } assert snapshot( stablePredictions, + stableLoco, stableLogs, process.out.findAll { key, val -> key.startsWith('versions') } ).match() diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test.snap b/modules/nf-core/regenie/step1/tests/main.nf.test.snap index ea41c80d468d..9a217d5d56a7 100644 --- a/modules/nf-core/regenie/step1/tests/main.nf.test.snap +++ b/modules/nf-core/regenie/step1/tests/main.nf.test.snap @@ -6,7 +6,15 @@ { "id": "plink_simulated" }, - "plink_simulated_1.loco.gz:md5,93aa1d97d8f164e57cd8fb2551a482f3" + "plink_simulated_pred.list" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_1.loco.gz" ] ], { @@ -23,7 +31,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-04-14T20:43:22.874702329" + "timestamp": "2026-04-14T22:03:48.558604476" }, "homo_sapiens popgen - quantitative plink1 without covariates": { "content": [ @@ -32,7 +40,15 @@ { "id": "plink_simulated" }, - "plink_simulated_1.loco.gz:md5,72bbd85ce5e9b6ce1a9aa8237521ba22" + "plink_simulated_pred.list" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_1.loco.gz" ] ], { @@ -49,7 +65,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-04-14T20:45:27.737582442" + "timestamp": "2026-04-14T22:05:57.569991954" }, "homo_sapiens popgen - plink2 - stub": { "content": [ @@ -58,7 +74,14 @@ { "id": "plink_simulated" }, - "plink_simulated_pred.list", + "plink_simulated_pred.list" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, "plink_simulated_1.loco.gz" ] ], @@ -84,7 +107,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-04-14T20:47:32.641837868" + "timestamp": "2026-04-14T22:08:22.46827067" }, "homo_sapiens popgen - binary plink1 with covariates": { "content": [ @@ -93,7 +116,15 @@ { "id": "plink_simulated" }, - "plink_simulated_1.loco.gz:md5,ec5520f07607f2989a6d4f5b80a2b16b" + "plink_simulated_pred.list" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_1.loco.gz" ] ], { @@ -110,6 +141,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-04-14T20:44:25.24930827" + "timestamp": "2026-04-14T22:04:58.463271992" } } \ No newline at end of file From 0bf2937b098dd5ed9acdd623ee7ea297f335bb88 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Wed, 15 Apr 2026 20:05:37 +0800 Subject: [PATCH 08/17] Move REGENIE step1 phenotype selectors into tuples --- modules/nf-core/regenie/step1/main.nf | 4 +- modules/nf-core/regenie/step1/meta.yml | 12 ++-- .../nf-core/regenie/step1/tests/main.nf.test | 58 +++++++++---------- 3 files changed, 36 insertions(+), 38 deletions(-) diff --git a/modules/nf-core/regenie/step1/main.nf b/modules/nf-core/regenie/step1/main.nf index 899f61cb5967..f3df21620c2b 100644 --- a/modules/nf-core/regenie/step1/main.nf +++ b/modules/nf-core/regenie/step1/main.nf @@ -9,10 +9,8 @@ process REGENIE_STEP1 { input: tuple val(meta), path(plink_genotype_file), path(plink_variant_file), path(plink_sample_file) - tuple val(meta2), path(pheno) + tuple val(meta2), path(pheno), val(pheno_col), val(is_binary) tuple val(meta3), path(covar) - val pheno_col - val is_binary val bsize output: diff --git a/modules/nf-core/regenie/step1/meta.yml b/modules/nf-core/regenie/step1/meta.yml index 4cad445158d6..91ba7a90f287 100644 --- a/modules/nf-core/regenie/step1/meta.yml +++ b/modules/nf-core/regenie/step1/meta.yml @@ -53,6 +53,12 @@ input: pattern: "*.{phe,pheno,txt,tsv}" ontologies: - edam: "http://edamontology.org/format_3475" # TSV + - pheno_col: + type: string + description: Phenotype column passed to `--phenoColList` + - is_binary: + type: boolean + description: Whether to add `--bt` for a binary trait - - meta3: type: map description: | @@ -65,12 +71,6 @@ input: pattern: "*.{covar,cov,txt,tsv}" ontologies: - edam: "http://edamontology.org/format_3475" # TSV - - pheno_col: - type: string - description: Phenotype column passed to `--phenoColList` - - is_binary: - type: boolean - description: Whether to add `--bt` for a binary trait - bsize: type: integer description: Optional block size passed to `--bsize`; pass `[]` to use the module default of `1000` diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test b/modules/nf-core/regenie/step1/tests/main.nf.test index 6b14dd3304f1..8ea990bb9ca8 100644 --- a/modules/nf-core/regenie/step1/tests/main.nf.test +++ b/modules/nf-core/regenie/step1/tests/main.nf.test @@ -21,18 +21,18 @@ nextflow_process { ] input[1] = [ - [ id:'plink_simulated_quantitative_phenoname' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true), + 'QuantitativeTrait', + false ] input[2] = [ - [ id:'covariates' ], + [ id:'plink_simulated' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) ] - input[3] = 'QuantitativeTrait' - input[4] = false - input[5] = [] + input[3] = [] """ } } @@ -88,18 +88,18 @@ nextflow_process { ] input[1] = [ - [ id:'plink_simulated_binary_phenoname' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_binary_phenoname.phe', checkIfExists: true) + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_binary_phenoname.phe', checkIfExists: true), + 'BinaryTrait', + true ] input[2] = [ - [ id:'covariates' ], + [ id:'plink_simulated' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) ] - input[3] = 'BinaryTrait' - input[4] = true - input[5] = [] + input[3] = [] """ } } @@ -155,14 +155,14 @@ nextflow_process { ] input[1] = [ - [ id:'plink_simulated_quantitative_phenoname' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true), + 'QuantitativeTrait', + false ] input[2] = [[:], []] - input[3] = 'QuantitativeTrait' - input[4] = false - input[5] = [] + input[3] = [] """ } } @@ -218,18 +218,18 @@ nextflow_process { ] input[1] = [ - [ id:'plink_simulated_quantitative_phenoname' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true), + 'QuantitativeTrait', + false ] input[2] = [ - [ id:'covariates' ], + [ id:'plink_simulated' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) ] - input[3] = 'QuantitativeTrait' - input[4] = false - input[5] = 256 + input[3] = 256 """ } } @@ -282,18 +282,18 @@ nextflow_process { ] input[1] = [ - [ id:'plink_simulated_quantitative_phenoname' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true), + 'QuantitativeTrait', + false ] input[2] = [ - [ id:'covariates' ], + [ id:'plink_simulated' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) ] - input[3] = 'QuantitativeTrait' - input[4] = false - input[5] = 256 + input[3] = 256 """ } } From 1e99539c6d186584d2dc425b1a77a2aaaf05625c Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sun, 10 May 2026 18:16:00 +0800 Subject: [PATCH 09/17] Emit REGENIE step1 phenotype selectors --- modules/nf-core/regenie/step1/main.nf | 8 ++-- modules/nf-core/regenie/step1/meta.yml | 18 ++++++++ .../nf-core/regenie/step1/tests/main.nf.test | 42 +++++++++++++---- .../regenie/step1/tests/main.nf.test.snap | 46 +++++++++++++------ 4 files changed, 87 insertions(+), 27 deletions(-) diff --git a/modules/nf-core/regenie/step1/main.nf b/modules/nf-core/regenie/step1/main.nf index f3df21620c2b..eef660589b94 100644 --- a/modules/nf-core/regenie/step1/main.nf +++ b/modules/nf-core/regenie/step1/main.nf @@ -1,5 +1,5 @@ process REGENIE_STEP1 { - tag "${meta.id}" + tag "${meta.id}:${pheno_col}" label 'process_medium' conda "${moduleDir}/environment.yml" @@ -14,9 +14,9 @@ process REGENIE_STEP1 { val bsize output: - tuple val(meta), path("*_pred.list"), emit: predictions - tuple val(meta), path("*.loco.gz"), emit: loco - tuple val(meta), path("*.log"), emit: log + tuple val(meta), path("*_pred.list"), val(pheno_col), val(is_binary), emit: predictions + tuple val(meta), path("*.loco.gz"), val(pheno_col), val(is_binary), emit: loco + tuple val(meta), path("*.log"), val(pheno_col), val(is_binary), emit: log tuple val("${task.process}"), val('regenie'), eval('regenie --version 2>&1 | sed -n "1{s/^v//;s/\\.gz$//;p}"'), topic: versions, emit: versions_regenie when: diff --git a/modules/nf-core/regenie/step1/meta.yml b/modules/nf-core/regenie/step1/meta.yml index 91ba7a90f287..3e7c148330c3 100644 --- a/modules/nf-core/regenie/step1/meta.yml +++ b/modules/nf-core/regenie/step1/meta.yml @@ -87,6 +87,12 @@ output: description: REGENIE prediction list file pattern: "*_pred.list" ontologies: [] + - pheno_col: + type: string + description: Phenotype column passed to `--phenoColList` + - is_binary: + type: boolean + description: Whether `--bt` was used for the emitted phenotype loco: - - meta: type: map @@ -99,6 +105,12 @@ output: pattern: "*.loco.gz" ontologies: - edam: "http://edamontology.org/format_3987" # GZIP + - pheno_col: + type: string + description: Phenotype column passed to `--phenoColList` + - is_binary: + type: boolean + description: Whether `--bt` was used for the emitted phenotype log: - - meta: type: map @@ -111,6 +123,12 @@ output: pattern: "*.log" ontologies: - edam: "http://edamontology.org/format_2330" # Text + - pheno_col: + type: string + description: Phenotype column passed to `--phenoColList` + - is_binary: + type: boolean + description: Whether `--bt` was used for the emitted phenotype versions_regenie: - - "${task.process}": type: string diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test b/modules/nf-core/regenie/step1/tests/main.nf.test index 8ea990bb9ca8..58f528c37469 100644 --- a/modules/nf-core/regenie/step1/tests/main.nf.test +++ b/modules/nf-core/regenie/step1/tests/main.nf.test @@ -46,6 +46,12 @@ nextflow_process { { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.predictions.get(0).get(2) == 'QuantitativeTrait' }, + { assert process.out.predictions.get(0).get(3) == false }, + { assert process.out.loco.get(0).get(2) == 'QuantitativeTrait' }, + { assert process.out.loco.get(0).get(3) == false }, + { assert process.out.log.get(0).get(2) == 'QuantitativeTrait' }, + { assert process.out.log.get(0).get(3) == false }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) @@ -60,10 +66,10 @@ nextflow_process { }, { def stablePredictions = process.out.predictions.collect { prediction -> - [prediction[0], path(prediction[1]).getFileName().toString()] + [prediction[0], path(prediction[1]).getFileName().toString(), prediction[2], prediction[3]] } def stableLoco = process.out.loco.collect { loco -> - [loco[0], path(loco[1]).getFileName().toString()] + [loco[0], path(loco[1]).getFileName().toString(), loco[2], loco[3]] } assert snapshot( stablePredictions, @@ -113,6 +119,12 @@ nextflow_process { { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.predictions.get(0).get(2) == 'BinaryTrait' }, + { assert process.out.predictions.get(0).get(3) == true }, + { assert process.out.loco.get(0).get(2) == 'BinaryTrait' }, + { assert process.out.loco.get(0).get(3) == true }, + { assert process.out.log.get(0).get(2) == 'BinaryTrait' }, + { assert process.out.log.get(0).get(3) == true }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) @@ -127,10 +139,10 @@ nextflow_process { }, { def stablePredictions = process.out.predictions.collect { prediction -> - [prediction[0], path(prediction[1]).getFileName().toString()] + [prediction[0], path(prediction[1]).getFileName().toString(), prediction[2], prediction[3]] } def stableLoco = process.out.loco.collect { loco -> - [loco[0], path(loco[1]).getFileName().toString()] + [loco[0], path(loco[1]).getFileName().toString(), loco[2], loco[3]] } assert snapshot( stablePredictions, @@ -176,6 +188,12 @@ nextflow_process { { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.predictions.get(0).get(2) == 'QuantitativeTrait' }, + { assert process.out.predictions.get(0).get(3) == false }, + { assert process.out.loco.get(0).get(2) == 'QuantitativeTrait' }, + { assert process.out.loco.get(0).get(3) == false }, + { assert process.out.log.get(0).get(2) == 'QuantitativeTrait' }, + { assert process.out.log.get(0).get(3) == false }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) @@ -190,10 +208,10 @@ nextflow_process { }, { def stablePredictions = process.out.predictions.collect { prediction -> - [prediction[0], path(prediction[1]).getFileName().toString()] + [prediction[0], path(prediction[1]).getFileName().toString(), prediction[2], prediction[3]] } def stableLoco = process.out.loco.collect { loco -> - [loco[0], path(loco[1]).getFileName().toString()] + [loco[0], path(loco[1]).getFileName().toString(), loco[2], loco[3]] } assert snapshot( stablePredictions, @@ -243,6 +261,12 @@ nextflow_process { { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.predictions.get(0).get(2) == 'QuantitativeTrait' }, + { assert process.out.predictions.get(0).get(3) == false }, + { assert process.out.loco.get(0).get(2) == 'QuantitativeTrait' }, + { assert process.out.loco.get(0).get(3) == false }, + { assert process.out.log.get(0).get(2) == 'QuantitativeTrait' }, + { assert process.out.log.get(0).get(3) == false }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) @@ -303,13 +327,13 @@ nextflow_process { { assert process.success }, { def stablePredictions = process.out.predictions.collect { prediction -> - [prediction[0], path(prediction[1]).getFileName().toString()] + [prediction[0], path(prediction[1]).getFileName().toString(), prediction[2], prediction[3]] } def stableLoco = process.out.loco.collect { loco -> - [loco[0], path(loco[1]).getFileName().toString()] + [loco[0], path(loco[1]).getFileName().toString(), loco[2], loco[3]] } def stableLogs = process.out.log.collect { logTuple -> - [logTuple[0], path(logTuple[1]).getFileName().toString()] + [logTuple[0], path(logTuple[1]).getFileName().toString(), logTuple[2], logTuple[3]] } assert snapshot( stablePredictions, diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test.snap b/modules/nf-core/regenie/step1/tests/main.nf.test.snap index 9a217d5d56a7..33743ba9d474 100644 --- a/modules/nf-core/regenie/step1/tests/main.nf.test.snap +++ b/modules/nf-core/regenie/step1/tests/main.nf.test.snap @@ -6,7 +6,9 @@ { "id": "plink_simulated" }, - "plink_simulated_pred.list" + "plink_simulated_pred.list", + "QuantitativeTrait", + false ] ], [ @@ -14,7 +16,9 @@ { "id": "plink_simulated" }, - "plink_simulated_1.loco.gz" + "plink_simulated_1.loco.gz", + "QuantitativeTrait", + false ] ], { @@ -31,7 +35,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-04-14T22:03:48.558604476" + "timestamp": "2026-04-17T14:27:17.114946536" }, "homo_sapiens popgen - quantitative plink1 without covariates": { "content": [ @@ -40,7 +44,9 @@ { "id": "plink_simulated" }, - "plink_simulated_pred.list" + "plink_simulated_pred.list", + "QuantitativeTrait", + false ] ], [ @@ -48,7 +54,9 @@ { "id": "plink_simulated" }, - "plink_simulated_1.loco.gz" + "plink_simulated_1.loco.gz", + "QuantitativeTrait", + false ] ], { @@ -65,7 +73,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-04-14T22:05:57.569991954" + "timestamp": "2026-04-17T14:27:46.324894984" }, "homo_sapiens popgen - plink2 - stub": { "content": [ @@ -74,7 +82,9 @@ { "id": "plink_simulated" }, - "plink_simulated_pred.list" + "plink_simulated_pred.list", + "QuantitativeTrait", + false ] ], [ @@ -82,7 +92,9 @@ { "id": "plink_simulated" }, - "plink_simulated_1.loco.gz" + "plink_simulated_1.loco.gz", + "QuantitativeTrait", + false ] ], [ @@ -90,7 +102,9 @@ { "id": "plink_simulated" }, - "plink_simulated.log" + "plink_simulated.log", + "QuantitativeTrait", + false ] ], { @@ -107,7 +121,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-04-14T22:08:22.46827067" + "timestamp": "2026-04-17T14:28:16.024988753" }, "homo_sapiens popgen - binary plink1 with covariates": { "content": [ @@ -116,7 +130,9 @@ { "id": "plink_simulated" }, - "plink_simulated_pred.list" + "plink_simulated_pred.list", + "BinaryTrait", + true ] ], [ @@ -124,7 +140,9 @@ { "id": "plink_simulated" }, - "plink_simulated_1.loco.gz" + "plink_simulated_1.loco.gz", + "BinaryTrait", + true ] ], { @@ -141,6 +159,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-04-14T22:04:58.463271992" + "timestamp": "2026-04-17T14:27:31.70779572" } -} \ No newline at end of file +} From 40cbd5d09bab34fb5347a4bd34272f628896ec96 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Wed, 13 May 2026 22:05:49 +0800 Subject: [PATCH 10/17] Update REGENIE step1 maintainer metadata --- modules/nf-core/regenie/step1/meta.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/regenie/step1/meta.yml b/modules/nf-core/regenie/step1/meta.yml index 3e7c148330c3..4a810cdf9eb4 100644 --- a/modules/nf-core/regenie/step1/meta.yml +++ b/modules/nf-core/regenie/step1/meta.yml @@ -152,9 +152,9 @@ topics: type: eval description: The command used to generate the version of the tool authors: - - "@andongni" + - "@lyh970817" maintainers: - - "@andongni" + - "@lyh970817" containers: conda: linux_amd64: From a3ed826acf7dee672bf52c2af5a05b7b971765be Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Thu, 14 May 2026 21:26:08 +0800 Subject: [PATCH 11/17] Use HTTPS Singularity container for REGENIE step1 --- modules/nf-core/regenie/step1/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/regenie/step1/main.nf b/modules/nf-core/regenie/step1/main.nf index eef660589b94..9400ec80e5a7 100644 --- a/modules/nf-core/regenie/step1/main.nf +++ b/modules/nf-core/regenie/step1/main.nf @@ -4,7 +4,7 @@ process REGENIE_STEP1 { conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'oras://community.wave.seqera.io/library/regenie:4.1.2--7c121fb4ecd57890' + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data' : 'community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf'}" input: From b3a99806165c65b25d0386a27bc295ac966c555a Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Fri, 15 May 2026 20:51:33 +0800 Subject: [PATCH 12/17] Allow ext.prefix in regenie step1 --- modules/nf-core/regenie/step1/main.nf | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/modules/nf-core/regenie/step1/main.nf b/modules/nf-core/regenie/step1/main.nf index 9400ec80e5a7..c4c0b9697e35 100644 --- a/modules/nf-core/regenie/step1/main.nf +++ b/modules/nf-core/regenie/step1/main.nf @@ -24,15 +24,16 @@ process REGENIE_STEP1 { script: def args = task.ext.args ?: '' - def binary_arg = is_binary ? '--bt' : '' - def covar_arg = covar ? "--covarFile ${covar}" : '' + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix def genotype_flag = plink_genotype_file.name.endsWith('.pgen') ? '--pgen' : '--bed' - def prefix = plink_genotype_file.baseName + def covar_arg = covar ? "--covarFile ${covar}" : '' + def binary_arg = is_binary ? '--bt' : '' def bsize_arg = bsize ?: 1000 """ regenie \\ --step 1 \\ - ${genotype_flag} ${prefix} \\ + ${genotype_flag} ${input_prefix} \\ --phenoFile ${pheno} \\ --phenoColList ${pheno_col} \\ ${covar_arg} \\ @@ -45,7 +46,8 @@ process REGENIE_STEP1 { """ stub: - def prefix = plink_genotype_file.baseName + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix """ touch ${prefix}_pred.list echo "" | gzip > ${prefix}_1.loco.gz From 51b254f3f9f2c9c0cff83cb0c30b0d620ca1c8de Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Tue, 19 May 2026 15:55:31 +0800 Subject: [PATCH 13/17] Remove is_binary from regenie step1 --- modules/nf-core/regenie/step1/main.nf | 10 ++- modules/nf-core/regenie/step1/meta.yml | 12 ---- .../nf-core/regenie/step1/tests/main.nf.test | 67 ++++++++++--------- .../regenie/step1/tests/main.nf.test.snap | 27 +++----- 4 files changed, 48 insertions(+), 68 deletions(-) diff --git a/modules/nf-core/regenie/step1/main.nf b/modules/nf-core/regenie/step1/main.nf index c4c0b9697e35..c22e1e1980d1 100644 --- a/modules/nf-core/regenie/step1/main.nf +++ b/modules/nf-core/regenie/step1/main.nf @@ -9,14 +9,14 @@ process REGENIE_STEP1 { input: tuple val(meta), path(plink_genotype_file), path(plink_variant_file), path(plink_sample_file) - tuple val(meta2), path(pheno), val(pheno_col), val(is_binary) + tuple val(meta2), path(pheno), val(pheno_col) tuple val(meta3), path(covar) val bsize output: - tuple val(meta), path("*_pred.list"), val(pheno_col), val(is_binary), emit: predictions - tuple val(meta), path("*.loco.gz"), val(pheno_col), val(is_binary), emit: loco - tuple val(meta), path("*.log"), val(pheno_col), val(is_binary), emit: log + tuple val(meta), path("*_pred.list"), val(pheno_col), emit: predictions + tuple val(meta), path("*.loco.gz"), val(pheno_col), emit: loco + tuple val(meta), path("*.log"), val(pheno_col), emit: log tuple val("${task.process}"), val('regenie'), eval('regenie --version 2>&1 | sed -n "1{s/^v//;s/\\.gz$//;p}"'), topic: versions, emit: versions_regenie when: @@ -28,7 +28,6 @@ process REGENIE_STEP1 { def prefix = task.ext.prefix ?: input_prefix def genotype_flag = plink_genotype_file.name.endsWith('.pgen') ? '--pgen' : '--bed' def covar_arg = covar ? "--covarFile ${covar}" : '' - def binary_arg = is_binary ? '--bt' : '' def bsize_arg = bsize ?: 1000 """ regenie \\ @@ -37,7 +36,6 @@ process REGENIE_STEP1 { --phenoFile ${pheno} \\ --phenoColList ${pheno_col} \\ ${covar_arg} \\ - ${binary_arg} \\ --bsize ${bsize_arg} \\ --gz \\ --threads ${task.cpus} \\ diff --git a/modules/nf-core/regenie/step1/meta.yml b/modules/nf-core/regenie/step1/meta.yml index 4a810cdf9eb4..a9aa90d4d5ae 100644 --- a/modules/nf-core/regenie/step1/meta.yml +++ b/modules/nf-core/regenie/step1/meta.yml @@ -56,9 +56,6 @@ input: - pheno_col: type: string description: Phenotype column passed to `--phenoColList` - - is_binary: - type: boolean - description: Whether to add `--bt` for a binary trait - - meta3: type: map description: | @@ -90,9 +87,6 @@ output: - pheno_col: type: string description: Phenotype column passed to `--phenoColList` - - is_binary: - type: boolean - description: Whether `--bt` was used for the emitted phenotype loco: - - meta: type: map @@ -108,9 +102,6 @@ output: - pheno_col: type: string description: Phenotype column passed to `--phenoColList` - - is_binary: - type: boolean - description: Whether `--bt` was used for the emitted phenotype log: - - meta: type: map @@ -126,9 +117,6 @@ output: - pheno_col: type: string description: Phenotype column passed to `--phenoColList` - - is_binary: - type: boolean - description: Whether `--bt` was used for the emitted phenotype versions_regenie: - - "${task.process}": type: string diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test b/modules/nf-core/regenie/step1/tests/main.nf.test index 58f528c37469..7154b86bec49 100644 --- a/modules/nf-core/regenie/step1/tests/main.nf.test +++ b/modules/nf-core/regenie/step1/tests/main.nf.test @@ -1,3 +1,16 @@ +def regenieStep1TestConfig = new File(".nf-test/regenie_step1.tests.config") +regenieStep1TestConfig.parentFile.mkdirs() +regenieStep1TestConfig.text = """ +params { + modules_testdata_base_path = System.getenv('NF_MODULES_TESTDATA_BASE_PATH') ?: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' +} +process { + withName: REGENIE_STEP1 { + ext.args = { meta2.bt ? '--bt' : '' } + } +} +""" + nextflow_process { name "Test Process REGENIE_STEP1" @@ -10,6 +23,8 @@ nextflow_process { tag "regenie/step1" test("homo_sapiens popgen - quantitative plink1 with covariates") { + config regenieStep1TestConfig.absolutePath + when { process { """ @@ -23,8 +38,7 @@ nextflow_process { input[1] = [ [ id:'plink_simulated' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true), - 'QuantitativeTrait', - false + 'QuantitativeTrait' ] input[2] = [ @@ -47,11 +61,8 @@ nextflow_process { { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert process.out.predictions.get(0).get(2) == 'QuantitativeTrait' }, - { assert process.out.predictions.get(0).get(3) == false }, { assert process.out.loco.get(0).get(2) == 'QuantitativeTrait' }, - { assert process.out.loco.get(0).get(3) == false }, { assert process.out.log.get(0).get(2) == 'QuantitativeTrait' }, - { assert process.out.log.get(0).get(3) == false }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) @@ -66,10 +77,10 @@ nextflow_process { }, { def stablePredictions = process.out.predictions.collect { prediction -> - [prediction[0], path(prediction[1]).getFileName().toString(), prediction[2], prediction[3]] + [prediction[0], path(prediction[1]).getFileName().toString(), prediction[2]] } def stableLoco = process.out.loco.collect { loco -> - [loco[0], path(loco[1]).getFileName().toString(), loco[2], loco[3]] + [loco[0], path(loco[1]).getFileName().toString(), loco[2]] } assert snapshot( stablePredictions, @@ -83,6 +94,8 @@ nextflow_process { } test("homo_sapiens popgen - binary plink1 with covariates") { + config regenieStep1TestConfig.absolutePath + when { process { """ @@ -94,10 +107,9 @@ nextflow_process { ] input[1] = [ - [ id:'plink_simulated' ], + [ id:'plink_simulated', bt:true ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_binary_phenoname.phe', checkIfExists: true), - 'BinaryTrait', - true + 'BinaryTrait' ] input[2] = [ @@ -120,11 +132,8 @@ nextflow_process { { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert process.out.predictions.get(0).get(2) == 'BinaryTrait' }, - { assert process.out.predictions.get(0).get(3) == true }, { assert process.out.loco.get(0).get(2) == 'BinaryTrait' }, - { assert process.out.loco.get(0).get(3) == true }, { assert process.out.log.get(0).get(2) == 'BinaryTrait' }, - { assert process.out.log.get(0).get(3) == true }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) @@ -139,10 +148,10 @@ nextflow_process { }, { def stablePredictions = process.out.predictions.collect { prediction -> - [prediction[0], path(prediction[1]).getFileName().toString(), prediction[2], prediction[3]] + [prediction[0], path(prediction[1]).getFileName().toString(), prediction[2]] } def stableLoco = process.out.loco.collect { loco -> - [loco[0], path(loco[1]).getFileName().toString(), loco[2], loco[3]] + [loco[0], path(loco[1]).getFileName().toString(), loco[2]] } assert snapshot( stablePredictions, @@ -156,6 +165,8 @@ nextflow_process { } test("homo_sapiens popgen - quantitative plink1 without covariates") { + config regenieStep1TestConfig.absolutePath + when { process { """ @@ -169,8 +180,7 @@ nextflow_process { input[1] = [ [ id:'plink_simulated' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true), - 'QuantitativeTrait', - false + 'QuantitativeTrait' ] input[2] = [[:], []] @@ -189,11 +199,8 @@ nextflow_process { { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert process.out.predictions.get(0).get(2) == 'QuantitativeTrait' }, - { assert process.out.predictions.get(0).get(3) == false }, { assert process.out.loco.get(0).get(2) == 'QuantitativeTrait' }, - { assert process.out.loco.get(0).get(3) == false }, { assert process.out.log.get(0).get(2) == 'QuantitativeTrait' }, - { assert process.out.log.get(0).get(3) == false }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) @@ -208,10 +215,10 @@ nextflow_process { }, { def stablePredictions = process.out.predictions.collect { prediction -> - [prediction[0], path(prediction[1]).getFileName().toString(), prediction[2], prediction[3]] + [prediction[0], path(prediction[1]).getFileName().toString(), prediction[2]] } def stableLoco = process.out.loco.collect { loco -> - [loco[0], path(loco[1]).getFileName().toString(), loco[2], loco[3]] + [loco[0], path(loco[1]).getFileName().toString(), loco[2]] } assert snapshot( stablePredictions, @@ -238,8 +245,7 @@ nextflow_process { input[1] = [ [ id:'plink_simulated' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true), - 'QuantitativeTrait', - false + 'QuantitativeTrait' ] input[2] = [ @@ -262,11 +268,8 @@ nextflow_process { { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert process.out.predictions.get(0).get(2) == 'QuantitativeTrait' }, - { assert process.out.predictions.get(0).get(3) == false }, { assert process.out.loco.get(0).get(2) == 'QuantitativeTrait' }, - { assert process.out.loco.get(0).get(3) == false }, { assert process.out.log.get(0).get(2) == 'QuantitativeTrait' }, - { assert process.out.log.get(0).get(3) == false }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) @@ -294,6 +297,7 @@ nextflow_process { test("homo_sapiens popgen - plink2 - stub") { options "-stub" + config regenieStep1TestConfig.absolutePath when { process { @@ -308,8 +312,7 @@ nextflow_process { input[1] = [ [ id:'plink_simulated' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true), - 'QuantitativeTrait', - false + 'QuantitativeTrait' ] input[2] = [ @@ -327,13 +330,13 @@ nextflow_process { { assert process.success }, { def stablePredictions = process.out.predictions.collect { prediction -> - [prediction[0], path(prediction[1]).getFileName().toString(), prediction[2], prediction[3]] + [prediction[0], path(prediction[1]).getFileName().toString(), prediction[2]] } def stableLoco = process.out.loco.collect { loco -> - [loco[0], path(loco[1]).getFileName().toString(), loco[2], loco[3]] + [loco[0], path(loco[1]).getFileName().toString(), loco[2]] } def stableLogs = process.out.log.collect { logTuple -> - [logTuple[0], path(logTuple[1]).getFileName().toString(), logTuple[2], logTuple[3]] + [logTuple[0], path(logTuple[1]).getFileName().toString(), logTuple[2]] } assert snapshot( stablePredictions, diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test.snap b/modules/nf-core/regenie/step1/tests/main.nf.test.snap index 33743ba9d474..252f94259745 100644 --- a/modules/nf-core/regenie/step1/tests/main.nf.test.snap +++ b/modules/nf-core/regenie/step1/tests/main.nf.test.snap @@ -7,8 +7,7 @@ "id": "plink_simulated" }, "plink_simulated_pred.list", - "QuantitativeTrait", - false + "QuantitativeTrait" ] ], [ @@ -17,8 +16,7 @@ "id": "plink_simulated" }, "plink_simulated_1.loco.gz", - "QuantitativeTrait", - false + "QuantitativeTrait" ] ], { @@ -45,8 +43,7 @@ "id": "plink_simulated" }, "plink_simulated_pred.list", - "QuantitativeTrait", - false + "QuantitativeTrait" ] ], [ @@ -55,8 +52,7 @@ "id": "plink_simulated" }, "plink_simulated_1.loco.gz", - "QuantitativeTrait", - false + "QuantitativeTrait" ] ], { @@ -83,8 +79,7 @@ "id": "plink_simulated" }, "plink_simulated_pred.list", - "QuantitativeTrait", - false + "QuantitativeTrait" ] ], [ @@ -93,8 +88,7 @@ "id": "plink_simulated" }, "plink_simulated_1.loco.gz", - "QuantitativeTrait", - false + "QuantitativeTrait" ] ], [ @@ -103,8 +97,7 @@ "id": "plink_simulated" }, "plink_simulated.log", - "QuantitativeTrait", - false + "QuantitativeTrait" ] ], { @@ -131,8 +124,7 @@ "id": "plink_simulated" }, "plink_simulated_pred.list", - "BinaryTrait", - true + "BinaryTrait" ] ], [ @@ -141,8 +133,7 @@ "id": "plink_simulated" }, "plink_simulated_1.loco.gz", - "BinaryTrait", - true + "BinaryTrait" ] ], { From 334c7ba280df3ace2664c8580e99b77f3290988e Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Tue, 19 May 2026 21:31:57 +0800 Subject: [PATCH 14/17] Remove pheno_col from regenie step1 --- modules/nf-core/regenie/step1/main.nf | 11 ++- modules/nf-core/regenie/step1/meta.yml | 12 --- .../nf-core/regenie/step1/tests/main.nf.test | 81 ++++++++----------- .../regenie/step1/tests/nextflow.config | 5 ++ 4 files changed, 42 insertions(+), 67 deletions(-) create mode 100644 modules/nf-core/regenie/step1/tests/nextflow.config diff --git a/modules/nf-core/regenie/step1/main.nf b/modules/nf-core/regenie/step1/main.nf index c22e1e1980d1..aa8328fbdc24 100644 --- a/modules/nf-core/regenie/step1/main.nf +++ b/modules/nf-core/regenie/step1/main.nf @@ -1,5 +1,5 @@ process REGENIE_STEP1 { - tag "${meta.id}:${pheno_col}" + tag "${meta.id}" label 'process_medium' conda "${moduleDir}/environment.yml" @@ -9,14 +9,14 @@ process REGENIE_STEP1 { input: tuple val(meta), path(plink_genotype_file), path(plink_variant_file), path(plink_sample_file) - tuple val(meta2), path(pheno), val(pheno_col) + tuple val(meta2), path(pheno) tuple val(meta3), path(covar) val bsize output: - tuple val(meta), path("*_pred.list"), val(pheno_col), emit: predictions - tuple val(meta), path("*.loco.gz"), val(pheno_col), emit: loco - tuple val(meta), path("*.log"), val(pheno_col), emit: log + tuple val(meta), path("*_pred.list"), emit: predictions + tuple val(meta), path("*.loco.gz"), emit: loco + tuple val(meta), path("*.log"), emit: log tuple val("${task.process}"), val('regenie'), eval('regenie --version 2>&1 | sed -n "1{s/^v//;s/\\.gz$//;p}"'), topic: versions, emit: versions_regenie when: @@ -34,7 +34,6 @@ process REGENIE_STEP1 { --step 1 \\ ${genotype_flag} ${input_prefix} \\ --phenoFile ${pheno} \\ - --phenoColList ${pheno_col} \\ ${covar_arg} \\ --bsize ${bsize_arg} \\ --gz \\ diff --git a/modules/nf-core/regenie/step1/meta.yml b/modules/nf-core/regenie/step1/meta.yml index a9aa90d4d5ae..65b6cb5e7b12 100644 --- a/modules/nf-core/regenie/step1/meta.yml +++ b/modules/nf-core/regenie/step1/meta.yml @@ -53,9 +53,6 @@ input: pattern: "*.{phe,pheno,txt,tsv}" ontologies: - edam: "http://edamontology.org/format_3475" # TSV - - pheno_col: - type: string - description: Phenotype column passed to `--phenoColList` - - meta3: type: map description: | @@ -84,9 +81,6 @@ output: description: REGENIE prediction list file pattern: "*_pred.list" ontologies: [] - - pheno_col: - type: string - description: Phenotype column passed to `--phenoColList` loco: - - meta: type: map @@ -99,9 +93,6 @@ output: pattern: "*.loco.gz" ontologies: - edam: "http://edamontology.org/format_3987" # GZIP - - pheno_col: - type: string - description: Phenotype column passed to `--phenoColList` log: - - meta: type: map @@ -114,9 +105,6 @@ output: pattern: "*.log" ontologies: - edam: "http://edamontology.org/format_2330" # Text - - pheno_col: - type: string - description: Phenotype column passed to `--phenoColList` versions_regenie: - - "${task.process}": type: string diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test b/modules/nf-core/regenie/step1/tests/main.nf.test index 7154b86bec49..66b88da49ebd 100644 --- a/modules/nf-core/regenie/step1/tests/main.nf.test +++ b/modules/nf-core/regenie/step1/tests/main.nf.test @@ -1,19 +1,7 @@ -def regenieStep1TestConfig = new File(".nf-test/regenie_step1.tests.config") -regenieStep1TestConfig.parentFile.mkdirs() -regenieStep1TestConfig.text = """ -params { - modules_testdata_base_path = System.getenv('NF_MODULES_TESTDATA_BASE_PATH') ?: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' -} -process { - withName: REGENIE_STEP1 { - ext.args = { meta2.bt ? '--bt' : '' } - } -} -""" - nextflow_process { name "Test Process REGENIE_STEP1" + config "./nextflow.config" script "../main.nf" process "REGENIE_STEP1" @@ -23,9 +11,11 @@ nextflow_process { tag "regenie/step1" test("homo_sapiens popgen - quantitative plink1 with covariates") { - config regenieStep1TestConfig.absolutePath when { + params { + module_args = '--phenoColList QuantitativeTrait' + } process { """ input[0] = [ @@ -37,8 +27,7 @@ nextflow_process { input[1] = [ [ id:'plink_simulated' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true), - 'QuantitativeTrait' + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) ] input[2] = [ @@ -60,9 +49,6 @@ nextflow_process { { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, - { assert process.out.predictions.get(0).get(2) == 'QuantitativeTrait' }, - { assert process.out.loco.get(0).get(2) == 'QuantitativeTrait' }, - { assert process.out.log.get(0).get(2) == 'QuantitativeTrait' }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) @@ -77,10 +63,10 @@ nextflow_process { }, { def stablePredictions = process.out.predictions.collect { prediction -> - [prediction[0], path(prediction[1]).getFileName().toString(), prediction[2]] + [prediction[0], path(prediction[1]).getFileName().toString()] } def stableLoco = process.out.loco.collect { loco -> - [loco[0], path(loco[1]).getFileName().toString(), loco[2]] + [loco[0], path(loco[1]).getFileName().toString()] } assert snapshot( stablePredictions, @@ -94,9 +80,11 @@ nextflow_process { } test("homo_sapiens popgen - binary plink1 with covariates") { - config regenieStep1TestConfig.absolutePath when { + params { + module_args = '--phenoColList BinaryTrait --bt' + } process { """ input[0] = [ @@ -107,9 +95,8 @@ nextflow_process { ] input[1] = [ - [ id:'plink_simulated', bt:true ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_binary_phenoname.phe', checkIfExists: true), - 'BinaryTrait' + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_binary_phenoname.phe', checkIfExists: true) ] input[2] = [ @@ -131,9 +118,6 @@ nextflow_process { { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, - { assert process.out.predictions.get(0).get(2) == 'BinaryTrait' }, - { assert process.out.loco.get(0).get(2) == 'BinaryTrait' }, - { assert process.out.log.get(0).get(2) == 'BinaryTrait' }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) @@ -148,10 +132,10 @@ nextflow_process { }, { def stablePredictions = process.out.predictions.collect { prediction -> - [prediction[0], path(prediction[1]).getFileName().toString(), prediction[2]] + [prediction[0], path(prediction[1]).getFileName().toString()] } def stableLoco = process.out.loco.collect { loco -> - [loco[0], path(loco[1]).getFileName().toString(), loco[2]] + [loco[0], path(loco[1]).getFileName().toString()] } assert snapshot( stablePredictions, @@ -165,9 +149,11 @@ nextflow_process { } test("homo_sapiens popgen - quantitative plink1 without covariates") { - config regenieStep1TestConfig.absolutePath when { + params { + module_args = '--phenoColList QuantitativeTrait' + } process { """ input[0] = [ @@ -179,8 +165,7 @@ nextflow_process { input[1] = [ [ id:'plink_simulated' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true), - 'QuantitativeTrait' + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) ] input[2] = [[:], []] @@ -198,9 +183,6 @@ nextflow_process { { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, - { assert process.out.predictions.get(0).get(2) == 'QuantitativeTrait' }, - { assert process.out.loco.get(0).get(2) == 'QuantitativeTrait' }, - { assert process.out.log.get(0).get(2) == 'QuantitativeTrait' }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) @@ -215,10 +197,10 @@ nextflow_process { }, { def stablePredictions = process.out.predictions.collect { prediction -> - [prediction[0], path(prediction[1]).getFileName().toString(), prediction[2]] + [prediction[0], path(prediction[1]).getFileName().toString()] } def stableLoco = process.out.loco.collect { loco -> - [loco[0], path(loco[1]).getFileName().toString(), loco[2]] + [loco[0], path(loco[1]).getFileName().toString()] } assert snapshot( stablePredictions, @@ -232,7 +214,11 @@ nextflow_process { } test("homo_sapiens popgen - quantitative plink2 with covariates and custom bsize") { + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } process { """ input[0] = [ @@ -244,8 +230,7 @@ nextflow_process { input[1] = [ [ id:'plink_simulated' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true), - 'QuantitativeTrait' + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) ] input[2] = [ @@ -267,9 +252,6 @@ nextflow_process { { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, - { assert process.out.predictions.get(0).get(2) == 'QuantitativeTrait' }, - { assert process.out.loco.get(0).get(2) == 'QuantitativeTrait' }, - { assert process.out.log.get(0).get(2) == 'QuantitativeTrait' }, { assert path(process.out.log.get(0).get(1)).exists() }, { def predList = path(process.out.predictions.get(0).get(1)) @@ -297,9 +279,11 @@ nextflow_process { test("homo_sapiens popgen - plink2 - stub") { options "-stub" - config regenieStep1TestConfig.absolutePath when { + params { + module_args = '--phenoColList QuantitativeTrait' + } process { """ input[0] = [ @@ -311,8 +295,7 @@ nextflow_process { input[1] = [ [ id:'plink_simulated' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true), - 'QuantitativeTrait' + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) ] input[2] = [ @@ -330,13 +313,13 @@ nextflow_process { { assert process.success }, { def stablePredictions = process.out.predictions.collect { prediction -> - [prediction[0], path(prediction[1]).getFileName().toString(), prediction[2]] + [prediction[0], path(prediction[1]).getFileName().toString()] } def stableLoco = process.out.loco.collect { loco -> - [loco[0], path(loco[1]).getFileName().toString(), loco[2]] + [loco[0], path(loco[1]).getFileName().toString()] } def stableLogs = process.out.log.collect { logTuple -> - [logTuple[0], path(logTuple[1]).getFileName().toString(), logTuple[2]] + [logTuple[0], path(logTuple[1]).getFileName().toString()] } assert snapshot( stablePredictions, diff --git a/modules/nf-core/regenie/step1/tests/nextflow.config b/modules/nf-core/regenie/step1/tests/nextflow.config new file mode 100644 index 000000000000..1fc7b88cdf58 --- /dev/null +++ b/modules/nf-core/regenie/step1/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: REGENIE_STEP1 { + ext.args = params.module_args + } +} From 685940939f9aa79d869f54bf645a5b0bd3f46b86 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Tue, 19 May 2026 22:37:50 +0800 Subject: [PATCH 15/17] Update regenie step1 snapshots after removing pheno_col --- .../regenie/step1/tests/main.nf.test.snap | 27 +++++++------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test.snap b/modules/nf-core/regenie/step1/tests/main.nf.test.snap index 252f94259745..bd24f854c824 100644 --- a/modules/nf-core/regenie/step1/tests/main.nf.test.snap +++ b/modules/nf-core/regenie/step1/tests/main.nf.test.snap @@ -6,8 +6,7 @@ { "id": "plink_simulated" }, - "plink_simulated_pred.list", - "QuantitativeTrait" + "plink_simulated_pred.list" ] ], [ @@ -15,8 +14,7 @@ { "id": "plink_simulated" }, - "plink_simulated_1.loco.gz", - "QuantitativeTrait" + "plink_simulated_1.loco.gz" ] ], { @@ -42,8 +40,7 @@ { "id": "plink_simulated" }, - "plink_simulated_pred.list", - "QuantitativeTrait" + "plink_simulated_pred.list" ] ], [ @@ -51,8 +48,7 @@ { "id": "plink_simulated" }, - "plink_simulated_1.loco.gz", - "QuantitativeTrait" + "plink_simulated_1.loco.gz" ] ], { @@ -78,8 +74,7 @@ { "id": "plink_simulated" }, - "plink_simulated_pred.list", - "QuantitativeTrait" + "plink_simulated_pred.list" ] ], [ @@ -87,8 +82,7 @@ { "id": "plink_simulated" }, - "plink_simulated_1.loco.gz", - "QuantitativeTrait" + "plink_simulated_1.loco.gz" ] ], [ @@ -96,8 +90,7 @@ { "id": "plink_simulated" }, - "plink_simulated.log", - "QuantitativeTrait" + "plink_simulated.log" ] ], { @@ -123,8 +116,7 @@ { "id": "plink_simulated" }, - "plink_simulated_pred.list", - "BinaryTrait" + "plink_simulated_pred.list" ] ], [ @@ -132,8 +124,7 @@ { "id": "plink_simulated" }, - "plink_simulated_1.loco.gz", - "BinaryTrait" + "plink_simulated_1.loco.gz" ] ], { From 08caec7ddb0ac2a567d3d05f28b9aeab6acf8dd0 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Wed, 27 May 2026 02:38:16 +0800 Subject: [PATCH 16/17] Add REGENIE splitl0 module --- .../nf-core/regenie/splitl0/environment.yml | 7 + modules/nf-core/regenie/splitl0/main.nf | 58 +++++++ modules/nf-core/regenie/splitl0/meta.yml | 156 ++++++++++++++++++ .../regenie/splitl0/tests/main.nf.test | 148 +++++++++++++++++ .../regenie/splitl0/tests/main.nf.test.snap | 84 ++++++++++ .../regenie/splitl0/tests/nextflow.config | 5 + 6 files changed, 458 insertions(+) create mode 100644 modules/nf-core/regenie/splitl0/environment.yml create mode 100644 modules/nf-core/regenie/splitl0/main.nf create mode 100644 modules/nf-core/regenie/splitl0/meta.yml create mode 100644 modules/nf-core/regenie/splitl0/tests/main.nf.test create mode 100644 modules/nf-core/regenie/splitl0/tests/main.nf.test.snap create mode 100644 modules/nf-core/regenie/splitl0/tests/nextflow.config diff --git a/modules/nf-core/regenie/splitl0/environment.yml b/modules/nf-core/regenie/splitl0/environment.yml new file mode 100644 index 000000000000..98fe8277cc05 --- /dev/null +++ b/modules/nf-core/regenie/splitl0/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::regenie=4.1.2" diff --git a/modules/nf-core/regenie/splitl0/main.nf b/modules/nf-core/regenie/splitl0/main.nf new file mode 100644 index 000000000000..38f3435f52f4 --- /dev/null +++ b/modules/nf-core/regenie/splitl0/main.nf @@ -0,0 +1,58 @@ +process REGENIE_SPLITL0 { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data' + : 'community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf'}" + + input: + tuple val(meta), path(plink_genotype_file), path(plink_variant_file), path(plink_sample_file) + tuple val(meta2), path(pheno) + tuple val(meta3), path(covar) + val bsize + val n_jobs + + output: + tuple val(meta), path("*.master"), emit: master + tuple val(meta), path("*_job*.snplist"), emit: snplists + tuple val(meta), path("*.log"), emit: log + tuple val("${task.process}"), val('regenie'), eval('regenie --version 2>&1 | sed -n "1{s/^v//;s/\\.gz$//;p}"'), topic: versions, emit: versions_regenie + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix + def genotype_flag = plink_genotype_file.name.endsWith('.pgen') ? '--pgen' : '--bed' + def covar_arg = covar ? "--covarFile ${covar}" : '' + def bsize_arg = bsize ?: 1000 + """ + regenie \\ + --step 1 \\ + ${genotype_flag} ${input_prefix} \\ + --phenoFile ${pheno} \\ + ${covar_arg} \\ + --bsize ${bsize_arg} \\ + --gz \\ + --threads ${task.cpus} \\ + ${args} \\ + --out ${prefix} \\ + --split-l0 ${prefix},${n_jobs} + """ + + stub: + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix + def job_count = n_jobs as Integer + def snplist_lines = (1..job_count).collect { job -> "touch ${prefix}_job${job}.snplist" }.join('\n') + def master_lines = (1..job_count).collect { job -> "${prefix}_job${job} ${prefix}_job${job}.snplist" }.join('\\n') + """ + printf 'job snplist\\n${master_lines}\\n' > ${prefix}.master + ${snplist_lines} + touch ${prefix}.log + """ +} diff --git a/modules/nf-core/regenie/splitl0/meta.yml b/modules/nf-core/regenie/splitl0/meta.yml new file mode 100644 index 000000000000..7a2de6f6cce6 --- /dev/null +++ b/modules/nf-core/regenie/splitl0/meta.yml @@ -0,0 +1,156 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "regenie_splitl0" +description: Split REGENIE step 1 level-0 ridge-regression blocks into parallel jobs +keywords: + - regenie + - gwas + - association + - genomics + - parallel +tools: + - "regenie": + description: "Regenie is a C++ program for whole genome regression modelling of large genome-wide association studies (GWAS)." + homepage: "https://rgcgithub.github.io/regenie/" + documentation: "https://rgcgithub.github.io/regenie/options/" + tool_dev_url: "https://github.com/rgcgithub/regenie" + doi: "10.1038/s41588-021-00870-7" + licence: ["MIT"] + identifier: "biotools:regenie" + +input: + - - meta: + type: map + description: | + Groovy Map containing genotype information + Keep only the genotype analysis identifier in this map + REGENIE consumes the staged basename of `plink_genotype_file` as the `--bed` or `--pgen` prefix, so the `.bed/.bim/.fam` or `.pgen/.pvar/.psam` files must share one basename + e.g. `[ id:'cohort' ]` + - plink_genotype_file: + type: file + description: PLINK primary genotype file in BED or PGEN format + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" # BED + - plink_variant_file: + type: file + description: PLINK variant metadata file in BIM or PVAR format + pattern: "*.{bim,pvar,zst}" + ontologies: [] + - plink_sample_file: + type: file + description: PLINK sample metadata file in FAM or PSAM format + pattern: "*.{fam,psam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing genotype/sample information associated with the phenotype file input + Use the same phenotype file and phenotype-selection arguments for all `regenie/splitl0`, `regenie/runl0`, and `regenie/runl1` jobs in the same chunked step 1 analysis + e.g. `[ id:'plink_simulated' ]` + - pheno: + type: file + description: Phenotype file passed to `--phenoFile` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - - meta3: + type: map + description: | + Groovy Map containing genotype/sample information associated with the covariate input + Use compatible covariate inputs for all stages in the same chunked step 1 analysis + e.g. `[ id:'plink_simulated' ]` + - covar: + type: file + optional: true + description: Optional covariate file passed to `--covarFile`; provide `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - bsize: + type: integer + description: Optional block size passed to `--bsize`; pass `[]` to use the module default of `1000` + - n_jobs: + type: integer + description: Number of level-0 jobs requested with `--split-l0` + +output: + master: + - - meta: + type: map + description: | + Groovy Map containing genotype/sample information + e.g. `[ id:'plink_simulated' ]` + - "*.master": + type: file + description: REGENIE split level-0 master file + pattern: "*.master" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + snplists: + - - meta: + type: map + description: | + Groovy Map containing genotype/sample information + e.g. `[ id:'plink_simulated' ]` + - "*_job*.snplist": + type: file + description: REGENIE per-job variant list files referenced by the master file + pattern: "*_job*.snplist" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + log: + - - meta: + type: map + description: | + Groovy Map containing genotype information + e.g. `[ id:'plink_simulated' ]` + - "*.log": + type: file + description: REGENIE split level-0 log file + pattern: "*.log" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + versions_regenie: + - - "${task.process}": + type: string + description: The process the versions were collected from + - "regenie": + type: string + description: The tool name + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': + type: eval + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - regenie: + type: string + description: The tool name + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': + type: eval + description: The command used to generate the version of the tool + +notes: | + `task.ext.args` is passed directly to REGENIE and can be used for stage-consistent options such as `--phenoColList`, `--bt`, `--loocv`, or `--keep-l0`. + The same phenotype file, phenotype-selection arguments, trait mode arguments such as `--bt`, and compatible genotype/covariate inputs must be used across `regenie/splitl0`, every matching `regenie/runl0` job, and `regenie/runl1`. +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" +containers: + conda: + linux_amd64: + lock_file: "modules/nf-core/regenie/splitl0/.conda-lock/linux_amd64-bd-5d361f9fcb2f85cf_1.txt" + docker: + linux_amd64: + build_id: "bd-5d361f9fcb2f85cf_1" + name: "community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf" + scanId: "sc-cc9eb5ed5eb381dd_2" + singularity: + linux_amd64: + build_id: "bd-7c121fb4ecd57890_1" + name: "oras://community.wave.seqera.io/library/regenie:4.1.2--7c121fb4ecd57890" + https: "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data" diff --git a/modules/nf-core/regenie/splitl0/tests/main.nf.test b/modules/nf-core/regenie/splitl0/tests/main.nf.test new file mode 100644 index 000000000000..3932aca10356 --- /dev/null +++ b/modules/nf-core/regenie/splitl0/tests/main.nf.test @@ -0,0 +1,148 @@ +nextflow_process { + + name "Test Process REGENIE_SPLITL0" + config "./nextflow.config" + script "../main.nf" + process "REGENIE_SPLITL0" + + tag "modules" + tag "modules_nfcore" + tag "regenie" + tag "regenie/splitl0" + + test("homo_sapiens popgen - quantitative plink1 with covariates") { + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = 100 + input[4] = 2 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.master.size() == 1 }, + { assert process.out.snplists.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.master.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.snplists.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, + { + def master = path(process.out.master.get(0).get(1)) + def lines = master.text.readLines().findAll { it } + assert master.exists() + assert lines.size() == 3 + assert lines[0] ==~ /\d+\s+\d+/ + assert lines.drop(1).every { line -> + line.contains('plink_simulated_job') && !line.contains('/') + } + }, + { + def snplists = process.out.snplists.get(0).get(1) + assert snplists.size() == 2 + assert snplists.collect { path(it).getFileName().toString() }.sort() == [ + 'plink_simulated_job1.snplist', + 'plink_simulated_job2.snplist' + ] + }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { + def stableMaster = process.out.master.collect { master -> + [master[0], path(master[1]).getFileName().toString()] + } + def stableSnplists = process.out.snplists.collect { snplist -> + [snplist[0], snplist[1].collect { path(it).getFileName().toString() }.sort()] + } + assert snapshot( + stableMaster, + stableSnplists, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + + test("homo_sapiens popgen - plink1 - stub") { + + options "-stub" + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = 100 + input[4] = 2 + """ + } + } + + then { + assertAll( + { assert process.success }, + { + def stableMaster = process.out.master.collect { master -> + [master[0], path(master[1]).getFileName().toString()] + } + def stableSnplists = process.out.snplists.collect { snplist -> + [snplist[0], snplist[1].collect { path(it).getFileName().toString() }.sort()] + } + def stableLogs = process.out.log.collect { log -> + [log[0], path(log[1]).getFileName().toString()] + } + assert snapshot( + stableMaster, + stableSnplists, + stableLogs, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/regenie/splitl0/tests/main.nf.test.snap b/modules/nf-core/regenie/splitl0/tests/main.nf.test.snap new file mode 100644 index 000000000000..a89bab88ca8f --- /dev/null +++ b/modules/nf-core/regenie/splitl0/tests/main.nf.test.snap @@ -0,0 +1,84 @@ +{ + "homo_sapiens popgen - quantitative plink1 with covariates": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.master" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + [ + "plink_simulated_job1.snplist", + "plink_simulated_job2.snplist" + ] + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_SPLITL0", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-21T22:23:00.198898381" + }, + "homo_sapiens popgen - plink1 - stub": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.master" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + [ + "plink_simulated_job1.snplist", + "plink_simulated_job2.snplist" + ] + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.log" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_SPLITL0", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-21T22:23:14.857699924" + } +} \ No newline at end of file diff --git a/modules/nf-core/regenie/splitl0/tests/nextflow.config b/modules/nf-core/regenie/splitl0/tests/nextflow.config new file mode 100644 index 000000000000..a21fcfdd4745 --- /dev/null +++ b/modules/nf-core/regenie/splitl0/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: REGENIE_SPLITL0 { + ext.args = params.module_args + } +} From 2ccfff63925fd7dc30a152c2a109a63ba52ef03d Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Wed, 27 May 2026 02:39:22 +0800 Subject: [PATCH 17/17] Add REGENIE runl0 module --- modules/nf-core/regenie/runl0/environment.yml | 7 + modules/nf-core/regenie/runl0/main.nf | 55 ++++++ modules/nf-core/regenie/runl0/meta.yml | 160 +++++++++++++++++ .../nf-core/regenie/runl0/tests/main.nf.test | 168 ++++++++++++++++++ .../regenie/runl0/tests/main.nf.test.snap | 66 +++++++ .../regenie/runl0/tests/nextflow.config | 8 + 6 files changed, 464 insertions(+) create mode 100644 modules/nf-core/regenie/runl0/environment.yml create mode 100644 modules/nf-core/regenie/runl0/main.nf create mode 100644 modules/nf-core/regenie/runl0/meta.yml create mode 100644 modules/nf-core/regenie/runl0/tests/main.nf.test create mode 100644 modules/nf-core/regenie/runl0/tests/main.nf.test.snap create mode 100644 modules/nf-core/regenie/runl0/tests/nextflow.config diff --git a/modules/nf-core/regenie/runl0/environment.yml b/modules/nf-core/regenie/runl0/environment.yml new file mode 100644 index 000000000000..98fe8277cc05 --- /dev/null +++ b/modules/nf-core/regenie/runl0/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::regenie=4.1.2" diff --git a/modules/nf-core/regenie/runl0/main.nf b/modules/nf-core/regenie/runl0/main.nf new file mode 100644 index 000000000000..7ad218b5997d --- /dev/null +++ b/modules/nf-core/regenie/runl0/main.nf @@ -0,0 +1,55 @@ +process REGENIE_RUNL0 { + tag "${meta.id}_${job_number}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data' + : 'community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf'}" + + input: + tuple val(meta), path(plink_genotype_file), path(plink_variant_file), path(plink_sample_file) + tuple val(meta2), path(master), path(snplist), val(job_number) + tuple val(meta3), path(pheno) + tuple val(meta4), path(covar) + val bsize + + output: + tuple val(meta), path("*_l0_Y*"), emit: l0_predictions + tuple val(meta), path("*.log"), emit: log + tuple val("${task.process}"), val('regenie'), eval('regenie --version 2>&1 | sed -n "1{s/^v//;s/\\.gz$//;p}"'), topic: versions, emit: versions_regenie + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix + def run_prefix = "${prefix}_job${job_number}" + def genotype_flag = plink_genotype_file.name.endsWith('.pgen') ? '--pgen' : '--bed' + def covar_arg = covar ? "--covarFile ${covar}" : '' + def bsize_arg = bsize ?: 1000 + """ + regenie \\ + --step 1 \\ + ${genotype_flag} ${input_prefix} \\ + --phenoFile ${pheno} \\ + ${covar_arg} \\ + --bsize ${bsize_arg} \\ + --gz \\ + --threads ${task.cpus} \\ + ${args} \\ + --out ${run_prefix} \\ + --run-l0 ${master},${job_number} + """ + + stub: + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix + def run_prefix = "${prefix}_job${job_number}" + """ + touch ${run_prefix}_l0_Y1 + touch ${run_prefix}.log + """ +} diff --git a/modules/nf-core/regenie/runl0/meta.yml b/modules/nf-core/regenie/runl0/meta.yml new file mode 100644 index 000000000000..510ee02cfe8a --- /dev/null +++ b/modules/nf-core/regenie/runl0/meta.yml @@ -0,0 +1,160 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "regenie_runl0" +description: Run one REGENIE step 1 level-0 job from a split master file +keywords: + - regenie + - gwas + - association + - genomics + - parallel +tools: + - "regenie": + description: "Regenie is a C++ program for whole genome regression modelling of large genome-wide association studies (GWAS)." + homepage: "https://rgcgithub.github.io/regenie/" + documentation: "https://rgcgithub.github.io/regenie/options/" + tool_dev_url: "https://github.com/rgcgithub/regenie" + doi: "10.1038/s41588-021-00870-7" + licence: ["MIT"] + identifier: "biotools:regenie" + +input: + - - meta: + type: map + description: | + Groovy Map containing genotype information + Keep only the genotype analysis identifier in this map + REGENIE consumes the staged basename of `plink_genotype_file` as the `--bed` or `--pgen` prefix, so the `.bed/.bim/.fam` or `.pgen/.pvar/.psam` files must share one basename + e.g. `[ id:'cohort' ]` + - plink_genotype_file: + type: file + description: PLINK primary genotype file in BED or PGEN format + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" # BED + - plink_variant_file: + type: file + description: PLINK variant metadata file in BIM or PVAR format + pattern: "*.{bim,pvar,zst}" + ontologies: [] + - plink_sample_file: + type: file + description: PLINK sample metadata file in FAM or PSAM format + pattern: "*.{fam,psam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing split level-0 job information + e.g. `[ id:'plink_simulated' ]` + - master: + type: file + description: REGENIE split level-0 master file from `regenie/splitl0` + pattern: "*.master" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + - snplist: + type: file + description: Per-job variant list staged because the master file references it; the path is not passed explicitly to REGENIE + pattern: "*_job*.snplist" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + - job_number: + type: integer + description: Level-0 job number passed as the second value to `--run-l0` + - - meta3: + type: map + description: | + Groovy Map containing genotype/sample information associated with the phenotype file input + Use the same phenotype file and phenotype-selection arguments for all `regenie/splitl0`, `regenie/runl0`, and `regenie/runl1` jobs in the same chunked step 1 analysis + e.g. `[ id:'plink_simulated' ]` + - pheno: + type: file + description: Phenotype file passed to `--phenoFile` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - - meta4: + type: map + description: | + Groovy Map containing genotype/sample information associated with the covariate input + Use compatible covariate inputs for all stages in the same chunked step 1 analysis + e.g. `[ id:'plink_simulated' ]` + - covar: + type: file + optional: true + description: Optional covariate file passed to `--covarFile`; provide `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - bsize: + type: integer + description: Optional block size passed to `--bsize`; pass `[]` to use the module default of `1000` + +output: + l0_predictions: + - - meta: + type: map + description: | + Groovy Map containing genotype/sample information + e.g. `[ id:'plink_simulated' ]` + - "*_l0_Y*": + type: file + description: REGENIE level-0 prediction files for this job + pattern: "*_l0_Y*" + ontologies: [] + log: + - - meta: + type: map + description: | + Groovy Map containing genotype information + e.g. `[ id:'plink_simulated' ]` + - "*.log": + type: file + description: REGENIE run level-0 log file + pattern: "*.log" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + versions_regenie: + - - "${task.process}": + type: string + description: The process the versions were collected from + - "regenie": + type: string + description: The tool name + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': + type: eval + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - regenie: + type: string + description: The tool name + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': + type: eval + description: The command used to generate the version of the tool + +notes: | + `task.ext.args` is passed directly to REGENIE and can be used for stage-consistent options such as `--phenoColList`, `--bt`, `--loocv`, or `--keep-l0`. + The same phenotype file, phenotype-selection arguments, trait mode arguments such as `--bt`, and compatible genotype/covariate inputs must be used across `regenie/splitl0`, every matching `regenie/runl0` job, and `regenie/runl1`. +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" +containers: + conda: + linux_amd64: + lock_file: "modules/nf-core/regenie/runl0/.conda-lock/linux_amd64-bd-5d361f9fcb2f85cf_1.txt" + docker: + linux_amd64: + build_id: "bd-5d361f9fcb2f85cf_1" + name: "community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf" + scanId: "sc-cc9eb5ed5eb381dd_2" + singularity: + linux_amd64: + build_id: "bd-7c121fb4ecd57890_1" + name: "oras://community.wave.seqera.io/library/regenie:4.1.2--7c121fb4ecd57890" + https: "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data" diff --git a/modules/nf-core/regenie/runl0/tests/main.nf.test b/modules/nf-core/regenie/runl0/tests/main.nf.test new file mode 100644 index 000000000000..a5d1cda3011a --- /dev/null +++ b/modules/nf-core/regenie/runl0/tests/main.nf.test @@ -0,0 +1,168 @@ +nextflow_process { + + name "Test Process REGENIE_RUNL0" + config "./nextflow.config" + script "../main.nf" + process "REGENIE_RUNL0" + + tag "modules" + tag "modules_nfcore" + tag "regenie" + tag "regenie/splitl0" + tag "regenie/runl0" + + setup { + run("REGENIE_SPLITL0") { + script "../../splitl0/main.nf" + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = 100 + input[4] = 2 + """ + } + } + } + + test("homo_sapiens popgen - quantitative plink1 with covariates") { + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = REGENIE_SPLITL0.out.master + .combine(REGENIE_SPLITL0.out.snplists) + .map { master_meta, master, snplist_meta, snplists -> + [ master_meta, master, snplists.find { snplist -> snplist.getFileName().toString().contains('_job1.snplist') }, 1 ] + } + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[3] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[4] = 100 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.l0_predictions.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.l0_predictions.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, + { + def predictionFiles = process.out.l0_predictions.get(0).get(1) + predictionFiles = predictionFiles instanceof List ? predictionFiles : [predictionFiles] + assert predictionFiles.size() >= 1 + assert predictionFiles.every { path(it).getFileName().toString().contains('_l0_Y') } + }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { + def stablePredictions = process.out.l0_predictions.collect { prediction -> + def predictionFiles = prediction[1] instanceof List ? prediction[1] : [prediction[1]] + [prediction[0], predictionFiles.collect { path(it).getFileName().toString() }.sort()] + } + assert snapshot( + stablePredictions, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + + test("homo_sapiens popgen - plink1 - stub") { + + options "-stub" + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = REGENIE_SPLITL0.out.master + .combine(REGENIE_SPLITL0.out.snplists) + .map { master_meta, master, snplist_meta, snplists -> + [ master_meta, master, snplists.find { snplist -> snplist.getFileName().toString().contains('_job1.snplist') }, 1 ] + } + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[3] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[4] = 100 + """ + } + } + + then { + assertAll( + { assert process.success }, + { + def stablePredictions = process.out.l0_predictions.collect { prediction -> + def predictionFiles = prediction[1] instanceof List ? prediction[1] : [prediction[1]] + [prediction[0], predictionFiles.collect { path(it).getFileName().toString() }.sort()] + } + def stableLogs = process.out.log.collect { log -> + [log[0], path(log[1]).getFileName().toString()] + } + assert snapshot( + stablePredictions, + stableLogs, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/regenie/runl0/tests/main.nf.test.snap b/modules/nf-core/regenie/runl0/tests/main.nf.test.snap new file mode 100644 index 000000000000..66bd3fc8bbba --- /dev/null +++ b/modules/nf-core/regenie/runl0/tests/main.nf.test.snap @@ -0,0 +1,66 @@ +{ + "homo_sapiens popgen - quantitative plink1 with covariates": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + [ + "plink_simulated_job1_l0_Y1" + ] + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_RUNL0", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-21T22:35:29.736647195" + }, + "homo_sapiens popgen - plink1 - stub": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + [ + "plink_simulated_job1_l0_Y1" + ] + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_job1.log" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_RUNL0", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-21T22:32:15.576585394" + } +} \ No newline at end of file diff --git a/modules/nf-core/regenie/runl0/tests/nextflow.config b/modules/nf-core/regenie/runl0/tests/nextflow.config new file mode 100644 index 000000000000..5334b9809f5e --- /dev/null +++ b/modules/nf-core/regenie/runl0/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: REGENIE_SPLITL0 { + ext.args = params.module_args + } + withName: REGENIE_RUNL0 { + ext.args = params.module_args + } +}