diff --git a/modules/nf-core/regenie/runl0/environment.yml b/modules/nf-core/regenie/runl0/environment.yml new file mode 100644 index 000000000000..98fe8277cc05 --- /dev/null +++ b/modules/nf-core/regenie/runl0/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::regenie=4.1.2" diff --git a/modules/nf-core/regenie/runl0/main.nf b/modules/nf-core/regenie/runl0/main.nf new file mode 100644 index 000000000000..7ad218b5997d --- /dev/null +++ b/modules/nf-core/regenie/runl0/main.nf @@ -0,0 +1,55 @@ +process REGENIE_RUNL0 { + tag "${meta.id}_${job_number}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data' + : 'community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf'}" + + input: + tuple val(meta), path(plink_genotype_file), path(plink_variant_file), path(plink_sample_file) + tuple val(meta2), path(master), path(snplist), val(job_number) + tuple val(meta3), path(pheno) + tuple val(meta4), path(covar) + val bsize + + output: + tuple val(meta), path("*_l0_Y*"), emit: l0_predictions + tuple val(meta), path("*.log"), emit: log + tuple val("${task.process}"), val('regenie'), eval('regenie --version 2>&1 | sed -n "1{s/^v//;s/\\.gz$//;p}"'), topic: versions, emit: versions_regenie + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix + def run_prefix = "${prefix}_job${job_number}" + def genotype_flag = plink_genotype_file.name.endsWith('.pgen') ? '--pgen' : '--bed' + def covar_arg = covar ? "--covarFile ${covar}" : '' + def bsize_arg = bsize ?: 1000 + """ + regenie \\ + --step 1 \\ + ${genotype_flag} ${input_prefix} \\ + --phenoFile ${pheno} \\ + ${covar_arg} \\ + --bsize ${bsize_arg} \\ + --gz \\ + --threads ${task.cpus} \\ + ${args} \\ + --out ${run_prefix} \\ + --run-l0 ${master},${job_number} + """ + + stub: + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix + def run_prefix = "${prefix}_job${job_number}" + """ + touch ${run_prefix}_l0_Y1 + touch ${run_prefix}.log + """ +} diff --git a/modules/nf-core/regenie/runl0/meta.yml b/modules/nf-core/regenie/runl0/meta.yml new file mode 100644 index 000000000000..510ee02cfe8a --- /dev/null +++ b/modules/nf-core/regenie/runl0/meta.yml @@ -0,0 +1,160 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "regenie_runl0" +description: Run one REGENIE step 1 level-0 job from a split master file +keywords: + - regenie + - gwas + - association + - genomics + - parallel +tools: + - "regenie": + description: "Regenie is a C++ program for whole genome regression modelling of large genome-wide association studies (GWAS)." + homepage: "https://rgcgithub.github.io/regenie/" + documentation: "https://rgcgithub.github.io/regenie/options/" + tool_dev_url: "https://github.com/rgcgithub/regenie" + doi: "10.1038/s41588-021-00870-7" + licence: ["MIT"] + identifier: "biotools:regenie" + +input: + - - meta: + type: map + description: | + Groovy Map containing genotype information + Keep only the genotype analysis identifier in this map + REGENIE consumes the staged basename of `plink_genotype_file` as the `--bed` or `--pgen` prefix, so the `.bed/.bim/.fam` or `.pgen/.pvar/.psam` files must share one basename + e.g. `[ id:'cohort' ]` + - plink_genotype_file: + type: file + description: PLINK primary genotype file in BED or PGEN format + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" # BED + - plink_variant_file: + type: file + description: PLINK variant metadata file in BIM or PVAR format + pattern: "*.{bim,pvar,zst}" + ontologies: [] + - plink_sample_file: + type: file + description: PLINK sample metadata file in FAM or PSAM format + pattern: "*.{fam,psam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing split level-0 job information + e.g. `[ id:'plink_simulated' ]` + - master: + type: file + description: REGENIE split level-0 master file from `regenie/splitl0` + pattern: "*.master" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + - snplist: + type: file + description: Per-job variant list staged because the master file references it; the path is not passed explicitly to REGENIE + pattern: "*_job*.snplist" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + - job_number: + type: integer + description: Level-0 job number passed as the second value to `--run-l0` + - - meta3: + type: map + description: | + Groovy Map containing genotype/sample information associated with the phenotype file input + Use the same phenotype file and phenotype-selection arguments for all `regenie/splitl0`, `regenie/runl0`, and `regenie/runl1` jobs in the same chunked step 1 analysis + e.g. `[ id:'plink_simulated' ]` + - pheno: + type: file + description: Phenotype file passed to `--phenoFile` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - - meta4: + type: map + description: | + Groovy Map containing genotype/sample information associated with the covariate input + Use compatible covariate inputs for all stages in the same chunked step 1 analysis + e.g. `[ id:'plink_simulated' ]` + - covar: + type: file + optional: true + description: Optional covariate file passed to `--covarFile`; provide `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - bsize: + type: integer + description: Optional block size passed to `--bsize`; pass `[]` to use the module default of `1000` + +output: + l0_predictions: + - - meta: + type: map + description: | + Groovy Map containing genotype/sample information + e.g. `[ id:'plink_simulated' ]` + - "*_l0_Y*": + type: file + description: REGENIE level-0 prediction files for this job + pattern: "*_l0_Y*" + ontologies: [] + log: + - - meta: + type: map + description: | + Groovy Map containing genotype information + e.g. `[ id:'plink_simulated' ]` + - "*.log": + type: file + description: REGENIE run level-0 log file + pattern: "*.log" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + versions_regenie: + - - "${task.process}": + type: string + description: The process the versions were collected from + - "regenie": + type: string + description: The tool name + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': + type: eval + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - regenie: + type: string + description: The tool name + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': + type: eval + description: The command used to generate the version of the tool + +notes: | + `task.ext.args` is passed directly to REGENIE and can be used for stage-consistent options such as `--phenoColList`, `--bt`, `--loocv`, or `--keep-l0`. + The same phenotype file, phenotype-selection arguments, trait mode arguments such as `--bt`, and compatible genotype/covariate inputs must be used across `regenie/splitl0`, every matching `regenie/runl0` job, and `regenie/runl1`. +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" +containers: + conda: + linux_amd64: + lock_file: "modules/nf-core/regenie/runl0/.conda-lock/linux_amd64-bd-5d361f9fcb2f85cf_1.txt" + docker: + linux_amd64: + build_id: "bd-5d361f9fcb2f85cf_1" + name: "community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf" + scanId: "sc-cc9eb5ed5eb381dd_2" + singularity: + linux_amd64: + build_id: "bd-7c121fb4ecd57890_1" + name: "oras://community.wave.seqera.io/library/regenie:4.1.2--7c121fb4ecd57890" + https: "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data" diff --git a/modules/nf-core/regenie/runl0/tests/main.nf.test b/modules/nf-core/regenie/runl0/tests/main.nf.test new file mode 100644 index 000000000000..a5d1cda3011a --- /dev/null +++ b/modules/nf-core/regenie/runl0/tests/main.nf.test @@ -0,0 +1,168 @@ +nextflow_process { + + name "Test Process REGENIE_RUNL0" + config "./nextflow.config" + script "../main.nf" + process "REGENIE_RUNL0" + + tag "modules" + tag "modules_nfcore" + tag "regenie" + tag "regenie/splitl0" + tag "regenie/runl0" + + setup { + run("REGENIE_SPLITL0") { + script "../../splitl0/main.nf" + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = 100 + input[4] = 2 + """ + } + } + } + + test("homo_sapiens popgen - quantitative plink1 with covariates") { + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = REGENIE_SPLITL0.out.master + .combine(REGENIE_SPLITL0.out.snplists) + .map { master_meta, master, snplist_meta, snplists -> + [ master_meta, master, snplists.find { snplist -> snplist.getFileName().toString().contains('_job1.snplist') }, 1 ] + } + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[3] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[4] = 100 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.l0_predictions.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.l0_predictions.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, + { + def predictionFiles = process.out.l0_predictions.get(0).get(1) + predictionFiles = predictionFiles instanceof List ? predictionFiles : [predictionFiles] + assert predictionFiles.size() >= 1 + assert predictionFiles.every { path(it).getFileName().toString().contains('_l0_Y') } + }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { + def stablePredictions = process.out.l0_predictions.collect { prediction -> + def predictionFiles = prediction[1] instanceof List ? prediction[1] : [prediction[1]] + [prediction[0], predictionFiles.collect { path(it).getFileName().toString() }.sort()] + } + assert snapshot( + stablePredictions, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + + test("homo_sapiens popgen - plink1 - stub") { + + options "-stub" + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = REGENIE_SPLITL0.out.master + .combine(REGENIE_SPLITL0.out.snplists) + .map { master_meta, master, snplist_meta, snplists -> + [ master_meta, master, snplists.find { snplist -> snplist.getFileName().toString().contains('_job1.snplist') }, 1 ] + } + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[3] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[4] = 100 + """ + } + } + + then { + assertAll( + { assert process.success }, + { + def stablePredictions = process.out.l0_predictions.collect { prediction -> + def predictionFiles = prediction[1] instanceof List ? prediction[1] : [prediction[1]] + [prediction[0], predictionFiles.collect { path(it).getFileName().toString() }.sort()] + } + def stableLogs = process.out.log.collect { log -> + [log[0], path(log[1]).getFileName().toString()] + } + assert snapshot( + stablePredictions, + stableLogs, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/regenie/runl0/tests/main.nf.test.snap b/modules/nf-core/regenie/runl0/tests/main.nf.test.snap new file mode 100644 index 000000000000..66bd3fc8bbba --- /dev/null +++ b/modules/nf-core/regenie/runl0/tests/main.nf.test.snap @@ -0,0 +1,66 @@ +{ + "homo_sapiens popgen - quantitative plink1 with covariates": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + [ + "plink_simulated_job1_l0_Y1" + ] + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_RUNL0", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-21T22:35:29.736647195" + }, + "homo_sapiens popgen - plink1 - stub": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + [ + "plink_simulated_job1_l0_Y1" + ] + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_job1.log" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_RUNL0", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-21T22:32:15.576585394" + } +} \ No newline at end of file diff --git a/modules/nf-core/regenie/runl0/tests/nextflow.config b/modules/nf-core/regenie/runl0/tests/nextflow.config new file mode 100644 index 000000000000..5334b9809f5e --- /dev/null +++ b/modules/nf-core/regenie/runl0/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: REGENIE_SPLITL0 { + ext.args = params.module_args + } + withName: REGENIE_RUNL0 { + ext.args = params.module_args + } +} diff --git a/modules/nf-core/regenie/splitl0/environment.yml b/modules/nf-core/regenie/splitl0/environment.yml new file mode 100644 index 000000000000..98fe8277cc05 --- /dev/null +++ b/modules/nf-core/regenie/splitl0/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::regenie=4.1.2" diff --git a/modules/nf-core/regenie/splitl0/main.nf b/modules/nf-core/regenie/splitl0/main.nf new file mode 100644 index 000000000000..38f3435f52f4 --- /dev/null +++ b/modules/nf-core/regenie/splitl0/main.nf @@ -0,0 +1,58 @@ +process REGENIE_SPLITL0 { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data' + : 'community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf'}" + + input: + tuple val(meta), path(plink_genotype_file), path(plink_variant_file), path(plink_sample_file) + tuple val(meta2), path(pheno) + tuple val(meta3), path(covar) + val bsize + val n_jobs + + output: + tuple val(meta), path("*.master"), emit: master + tuple val(meta), path("*_job*.snplist"), emit: snplists + tuple val(meta), path("*.log"), emit: log + tuple val("${task.process}"), val('regenie'), eval('regenie --version 2>&1 | sed -n "1{s/^v//;s/\\.gz$//;p}"'), topic: versions, emit: versions_regenie + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix + def genotype_flag = plink_genotype_file.name.endsWith('.pgen') ? '--pgen' : '--bed' + def covar_arg = covar ? "--covarFile ${covar}" : '' + def bsize_arg = bsize ?: 1000 + """ + regenie \\ + --step 1 \\ + ${genotype_flag} ${input_prefix} \\ + --phenoFile ${pheno} \\ + ${covar_arg} \\ + --bsize ${bsize_arg} \\ + --gz \\ + --threads ${task.cpus} \\ + ${args} \\ + --out ${prefix} \\ + --split-l0 ${prefix},${n_jobs} + """ + + stub: + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix + def job_count = n_jobs as Integer + def snplist_lines = (1..job_count).collect { job -> "touch ${prefix}_job${job}.snplist" }.join('\n') + def master_lines = (1..job_count).collect { job -> "${prefix}_job${job} ${prefix}_job${job}.snplist" }.join('\\n') + """ + printf 'job snplist\\n${master_lines}\\n' > ${prefix}.master + ${snplist_lines} + touch ${prefix}.log + """ +} diff --git a/modules/nf-core/regenie/splitl0/meta.yml b/modules/nf-core/regenie/splitl0/meta.yml new file mode 100644 index 000000000000..7a2de6f6cce6 --- /dev/null +++ b/modules/nf-core/regenie/splitl0/meta.yml @@ -0,0 +1,156 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "regenie_splitl0" +description: Split REGENIE step 1 level-0 ridge-regression blocks into parallel jobs +keywords: + - regenie + - gwas + - association + - genomics + - parallel +tools: + - "regenie": + description: "Regenie is a C++ program for whole genome regression modelling of large genome-wide association studies (GWAS)." + homepage: "https://rgcgithub.github.io/regenie/" + documentation: "https://rgcgithub.github.io/regenie/options/" + tool_dev_url: "https://github.com/rgcgithub/regenie" + doi: "10.1038/s41588-021-00870-7" + licence: ["MIT"] + identifier: "biotools:regenie" + +input: + - - meta: + type: map + description: | + Groovy Map containing genotype information + Keep only the genotype analysis identifier in this map + REGENIE consumes the staged basename of `plink_genotype_file` as the `--bed` or `--pgen` prefix, so the `.bed/.bim/.fam` or `.pgen/.pvar/.psam` files must share one basename + e.g. `[ id:'cohort' ]` + - plink_genotype_file: + type: file + description: PLINK primary genotype file in BED or PGEN format + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" # BED + - plink_variant_file: + type: file + description: PLINK variant metadata file in BIM or PVAR format + pattern: "*.{bim,pvar,zst}" + ontologies: [] + - plink_sample_file: + type: file + description: PLINK sample metadata file in FAM or PSAM format + pattern: "*.{fam,psam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing genotype/sample information associated with the phenotype file input + Use the same phenotype file and phenotype-selection arguments for all `regenie/splitl0`, `regenie/runl0`, and `regenie/runl1` jobs in the same chunked step 1 analysis + e.g. `[ id:'plink_simulated' ]` + - pheno: + type: file + description: Phenotype file passed to `--phenoFile` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - - meta3: + type: map + description: | + Groovy Map containing genotype/sample information associated with the covariate input + Use compatible covariate inputs for all stages in the same chunked step 1 analysis + e.g. `[ id:'plink_simulated' ]` + - covar: + type: file + optional: true + description: Optional covariate file passed to `--covarFile`; provide `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - bsize: + type: integer + description: Optional block size passed to `--bsize`; pass `[]` to use the module default of `1000` + - n_jobs: + type: integer + description: Number of level-0 jobs requested with `--split-l0` + +output: + master: + - - meta: + type: map + description: | + Groovy Map containing genotype/sample information + e.g. `[ id:'plink_simulated' ]` + - "*.master": + type: file + description: REGENIE split level-0 master file + pattern: "*.master" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + snplists: + - - meta: + type: map + description: | + Groovy Map containing genotype/sample information + e.g. `[ id:'plink_simulated' ]` + - "*_job*.snplist": + type: file + description: REGENIE per-job variant list files referenced by the master file + pattern: "*_job*.snplist" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + log: + - - meta: + type: map + description: | + Groovy Map containing genotype information + e.g. `[ id:'plink_simulated' ]` + - "*.log": + type: file + description: REGENIE split level-0 log file + pattern: "*.log" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + versions_regenie: + - - "${task.process}": + type: string + description: The process the versions were collected from + - "regenie": + type: string + description: The tool name + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': + type: eval + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - regenie: + type: string + description: The tool name + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': + type: eval + description: The command used to generate the version of the tool + +notes: | + `task.ext.args` is passed directly to REGENIE and can be used for stage-consistent options such as `--phenoColList`, `--bt`, `--loocv`, or `--keep-l0`. + The same phenotype file, phenotype-selection arguments, trait mode arguments such as `--bt`, and compatible genotype/covariate inputs must be used across `regenie/splitl0`, every matching `regenie/runl0` job, and `regenie/runl1`. +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" +containers: + conda: + linux_amd64: + lock_file: "modules/nf-core/regenie/splitl0/.conda-lock/linux_amd64-bd-5d361f9fcb2f85cf_1.txt" + docker: + linux_amd64: + build_id: "bd-5d361f9fcb2f85cf_1" + name: "community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf" + scanId: "sc-cc9eb5ed5eb381dd_2" + singularity: + linux_amd64: + build_id: "bd-7c121fb4ecd57890_1" + name: "oras://community.wave.seqera.io/library/regenie:4.1.2--7c121fb4ecd57890" + https: "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data" diff --git a/modules/nf-core/regenie/splitl0/tests/main.nf.test b/modules/nf-core/regenie/splitl0/tests/main.nf.test new file mode 100644 index 000000000000..3932aca10356 --- /dev/null +++ b/modules/nf-core/regenie/splitl0/tests/main.nf.test @@ -0,0 +1,148 @@ +nextflow_process { + + name "Test Process REGENIE_SPLITL0" + config "./nextflow.config" + script "../main.nf" + process "REGENIE_SPLITL0" + + tag "modules" + tag "modules_nfcore" + tag "regenie" + tag "regenie/splitl0" + + test("homo_sapiens popgen - quantitative plink1 with covariates") { + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = 100 + input[4] = 2 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.master.size() == 1 }, + { assert process.out.snplists.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.master.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.snplists.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, + { + def master = path(process.out.master.get(0).get(1)) + def lines = master.text.readLines().findAll { it } + assert master.exists() + assert lines.size() == 3 + assert lines[0] ==~ /\d+\s+\d+/ + assert lines.drop(1).every { line -> + line.contains('plink_simulated_job') && !line.contains('/') + } + }, + { + def snplists = process.out.snplists.get(0).get(1) + assert snplists.size() == 2 + assert snplists.collect { path(it).getFileName().toString() }.sort() == [ + 'plink_simulated_job1.snplist', + 'plink_simulated_job2.snplist' + ] + }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { + def stableMaster = process.out.master.collect { master -> + [master[0], path(master[1]).getFileName().toString()] + } + def stableSnplists = process.out.snplists.collect { snplist -> + [snplist[0], snplist[1].collect { path(it).getFileName().toString() }.sort()] + } + assert snapshot( + stableMaster, + stableSnplists, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + + test("homo_sapiens popgen - plink1 - stub") { + + options "-stub" + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = 100 + input[4] = 2 + """ + } + } + + then { + assertAll( + { assert process.success }, + { + def stableMaster = process.out.master.collect { master -> + [master[0], path(master[1]).getFileName().toString()] + } + def stableSnplists = process.out.snplists.collect { snplist -> + [snplist[0], snplist[1].collect { path(it).getFileName().toString() }.sort()] + } + def stableLogs = process.out.log.collect { log -> + [log[0], path(log[1]).getFileName().toString()] + } + assert snapshot( + stableMaster, + stableSnplists, + stableLogs, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/regenie/splitl0/tests/main.nf.test.snap b/modules/nf-core/regenie/splitl0/tests/main.nf.test.snap new file mode 100644 index 000000000000..a89bab88ca8f --- /dev/null +++ b/modules/nf-core/regenie/splitl0/tests/main.nf.test.snap @@ -0,0 +1,84 @@ +{ + "homo_sapiens popgen - quantitative plink1 with covariates": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.master" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + [ + "plink_simulated_job1.snplist", + "plink_simulated_job2.snplist" + ] + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_SPLITL0", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-21T22:23:00.198898381" + }, + "homo_sapiens popgen - plink1 - stub": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.master" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + [ + "plink_simulated_job1.snplist", + "plink_simulated_job2.snplist" + ] + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.log" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_SPLITL0", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-21T22:23:14.857699924" + } +} \ No newline at end of file diff --git a/modules/nf-core/regenie/splitl0/tests/nextflow.config b/modules/nf-core/regenie/splitl0/tests/nextflow.config new file mode 100644 index 000000000000..a21fcfdd4745 --- /dev/null +++ b/modules/nf-core/regenie/splitl0/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: REGENIE_SPLITL0 { + ext.args = params.module_args + } +} diff --git a/modules/nf-core/regenie/step1/environment.yml b/modules/nf-core/regenie/step1/environment.yml new file mode 100644 index 000000000000..98fe8277cc05 --- /dev/null +++ b/modules/nf-core/regenie/step1/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::regenie=4.1.2" diff --git a/modules/nf-core/regenie/step1/main.nf b/modules/nf-core/regenie/step1/main.nf new file mode 100644 index 000000000000..aa8328fbdc24 --- /dev/null +++ b/modules/nf-core/regenie/step1/main.nf @@ -0,0 +1,53 @@ +process REGENIE_STEP1 { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data' + : 'community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf'}" + + input: + tuple val(meta), path(plink_genotype_file), path(plink_variant_file), path(plink_sample_file) + tuple val(meta2), path(pheno) + tuple val(meta3), path(covar) + val bsize + + output: + tuple val(meta), path("*_pred.list"), emit: predictions + tuple val(meta), path("*.loco.gz"), emit: loco + tuple val(meta), path("*.log"), emit: log + tuple val("${task.process}"), val('regenie'), eval('regenie --version 2>&1 | sed -n "1{s/^v//;s/\\.gz$//;p}"'), topic: versions, emit: versions_regenie + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix + def genotype_flag = plink_genotype_file.name.endsWith('.pgen') ? '--pgen' : '--bed' + def covar_arg = covar ? "--covarFile ${covar}" : '' + def bsize_arg = bsize ?: 1000 + """ + regenie \\ + --step 1 \\ + ${genotype_flag} ${input_prefix} \\ + --phenoFile ${pheno} \\ + ${covar_arg} \\ + --bsize ${bsize_arg} \\ + --gz \\ + --threads ${task.cpus} \\ + ${args} \\ + --out ${prefix} + """ + + stub: + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix + """ + touch ${prefix}_pred.list + echo "" | gzip > ${prefix}_1.loco.gz + touch ${prefix}.log + """ +} diff --git a/modules/nf-core/regenie/step1/meta.yml b/modules/nf-core/regenie/step1/meta.yml new file mode 100644 index 000000000000..65b6cb5e7b12 --- /dev/null +++ b/modules/nf-core/regenie/step1/meta.yml @@ -0,0 +1,147 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "regenie_step1" +description: Run REGENIE step 1 to fit whole-genome regression models and emit LOCO predictions +keywords: + - regenie + - gwas + - association + - burden test + - genomics +tools: + - "regenie": + description: "Regenie is a C++ program for whole genome regression modelling of large genome-wide association studies (GWAS)." + homepage: "https://rgcgithub.github.io/regenie/" + documentation: "https://rgcgithub.github.io/regenie/options/" + tool_dev_url: "https://github.com/rgcgithub/regenie" + doi: "10.1038/s41588-021-00870-7" + licence: ["MIT"] + identifier: "biotools:regenie" + +input: + - - meta: + type: map + description: | + Groovy Map containing genotype information + Keep only the genotype analysis identifier in this map + REGENIE consumes the staged basename of `plink_genotype_file` as the `--bed` or `--pgen` prefix, so the `.bed/.bim/.fam` or `.pgen/.pvar/.psam` files must share one basename + e.g. `[ id:'cohort' ]` + - plink_genotype_file: + type: file + description: PLINK primary genotype file in BED or PGEN format + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" # BED + - plink_variant_file: + type: file + description: PLINK variant metadata file in BIM or PVAR format + pattern: "*.{bim,pvar,zst}" + ontologies: [] + - plink_sample_file: + type: file + description: PLINK sample metadata file in FAM or PSAM format + pattern: "*.{fam,psam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing genotype/sample information associated with the phenotype file input + Keep only the shared genotype/sample identifier in this map + e.g. `[ id:'plink_simulated' ]` + - pheno: + type: file + description: Phenotype file passed to `--phenoFile` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - - meta3: + type: map + description: | + Groovy Map containing genotype/sample information associated with the covariate input + e.g. `[ id:'plink_simulated' ]` + - covar: + type: file + optional: true + description: Optional covariate file passed to `--covarFile`; provide `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - bsize: + type: integer + description: Optional block size passed to `--bsize`; pass `[]` to use the module default of `1000` + +output: + predictions: + - - meta: + type: map + description: | + Groovy Map containing genotype/sample information + e.g. `[ id:'plink_simulated' ]` + - "*_pred.list": + type: file + description: REGENIE prediction list file + pattern: "*_pred.list" + ontologies: [] + loco: + - - meta: + type: map + description: | + Groovy Map containing genotype/sample information + e.g. `[ id:'plink_simulated' ]` + - "*.loco.gz": + type: file + description: REGENIE LOCO prediction files + pattern: "*.loco.gz" + ontologies: + - edam: "http://edamontology.org/format_3987" # GZIP + log: + - - meta: + type: map + description: | + Groovy Map containing genotype information + e.g. `[ id:'plink_simulated' ]` + - "*.log": + type: file + description: REGENIE step 1 log file + pattern: "*.log" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + versions_regenie: + - - "${task.process}": + type: string + description: The process the versions were collected from + - "regenie": + type: string + description: The tool name + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': + type: eval + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - regenie: + type: string + description: The tool name + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': + type: eval + description: The command used to generate the version of the tool +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" +containers: + conda: + linux_amd64: + lock_file: "modules/nf-core/regenie/step1/.conda-lock/linux_amd64-bd-5d361f9fcb2f85cf_1.txt" + docker: + linux_amd64: + build_id: "bd-5d361f9fcb2f85cf_1" + name: "community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf" + scanId: "sc-cc9eb5ed5eb381dd_2" + singularity: + linux_amd64: + build_id: "bd-7c121fb4ecd57890_1" + name: "oras://community.wave.seqera.io/library/regenie:4.1.2--7c121fb4ecd57890" + https: "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data" diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test b/modules/nf-core/regenie/step1/tests/main.nf.test new file mode 100644 index 000000000000..66b88da49ebd --- /dev/null +++ b/modules/nf-core/regenie/step1/tests/main.nf.test @@ -0,0 +1,336 @@ +nextflow_process { + + name "Test Process REGENIE_STEP1" + config "./nextflow.config" + script "../main.nf" + process "REGENIE_STEP1" + + tag "modules" + tag "modules_nfcore" + tag "regenie" + tag "regenie/step1" + + test("homo_sapiens popgen - quantitative plink1 with covariates") { + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.predictions.size() == 1 }, + { assert process.out.loco.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { + def predList = path(process.out.predictions.get(0).get(1)) + def locoFile = path(process.out.loco.get(0).get(1)) + def predListLines = predList.text.readLines().findAll { it } + assert predList.exists() + assert predListLines.size() == 1 + def predListFields = predListLines[0].split(/\s+/) + assert predListFields.size() == 2 + assert predListFields[0] == 'QuantitativeTrait' + assert predListFields[1] == locoFile.toString() + }, + { + def stablePredictions = process.out.predictions.collect { prediction -> + [prediction[0], path(prediction[1]).getFileName().toString()] + } + def stableLoco = process.out.loco.collect { loco -> + [loco[0], path(loco[1]).getFileName().toString()] + } + assert snapshot( + stablePredictions, + stableLoco, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + + test("homo_sapiens popgen - binary plink1 with covariates") { + + when { + params { + module_args = '--phenoColList BinaryTrait --bt' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_binary_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.predictions.size() == 1 }, + { assert process.out.loco.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { + def predList = path(process.out.predictions.get(0).get(1)) + def locoFile = path(process.out.loco.get(0).get(1)) + def predListLines = predList.text.readLines().findAll { it } + assert predList.exists() + assert predListLines.size() == 1 + def predListFields = predListLines[0].split(/\s+/) + assert predListFields.size() == 2 + assert predListFields[0] == 'BinaryTrait' + assert predListFields[1] == locoFile.toString() + }, + { + def stablePredictions = process.out.predictions.collect { prediction -> + [prediction[0], path(prediction[1]).getFileName().toString()] + } + def stableLoco = process.out.loco.collect { loco -> + [loco[0], path(loco[1]).getFileName().toString()] + } + assert snapshot( + stablePredictions, + stableLoco, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + + test("homo_sapiens popgen - quantitative plink1 without covariates") { + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [[:], []] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.predictions.size() == 1 }, + { assert process.out.loco.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { + def predList = path(process.out.predictions.get(0).get(1)) + def locoFile = path(process.out.loco.get(0).get(1)) + def predListLines = predList.text.readLines().findAll { it } + assert predList.exists() + assert predListLines.size() == 1 + def predListFields = predListLines[0].split(/\s+/) + assert predListFields.size() == 2 + assert predListFields[0] == 'QuantitativeTrait' + assert predListFields[1] == locoFile.toString() + }, + { + def stablePredictions = process.out.predictions.collect { prediction -> + [prediction[0], path(prediction[1]).getFileName().toString()] + } + def stableLoco = process.out.loco.collect { loco -> + [loco[0], path(loco[1]).getFileName().toString()] + } + assert snapshot( + stablePredictions, + stableLoco, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + + test("homo_sapiens popgen - quantitative plink2 with covariates and custom bsize") { + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = 256 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.predictions.size() == 1 }, + { assert process.out.loco.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { + def predList = path(process.out.predictions.get(0).get(1)) + def locoFile = path(process.out.loco.get(0).get(1)) + def predListLines = predList.text.readLines().findAll { it } + assert predList.exists() + assert predListLines.size() == 1 + def predListFields = predListLines[0].split(/\s+/) + assert predListFields.size() == 2 + assert predListFields[0] == 'QuantitativeTrait' + assert predListFields[1] == locoFile.toString() + }, + { assert path(process.out.loco.get(0).get(1)).exists() }, + { + def versionInfo = process.out.versions_regenie.get(0) + assert versionInfo[0] == 'REGENIE_STEP1' + assert versionInfo[1] == 'regenie' + assert versionInfo[2] == '4.1.2' + } + ) + } + + } + + test("homo_sapiens popgen - plink2 - stub") { + + options "-stub" + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = 256 + """ + } + } + + then { + assertAll( + { assert process.success }, + { + def stablePredictions = process.out.predictions.collect { prediction -> + [prediction[0], path(prediction[1]).getFileName().toString()] + } + def stableLoco = process.out.loco.collect { loco -> + [loco[0], path(loco[1]).getFileName().toString()] + } + def stableLogs = process.out.log.collect { logTuple -> + [logTuple[0], path(logTuple[1]).getFileName().toString()] + } + assert snapshot( + stablePredictions, + stableLoco, + stableLogs, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/regenie/step1/tests/main.nf.test.snap b/modules/nf-core/regenie/step1/tests/main.nf.test.snap new file mode 100644 index 000000000000..bd24f854c824 --- /dev/null +++ b/modules/nf-core/regenie/step1/tests/main.nf.test.snap @@ -0,0 +1,146 @@ +{ + "homo_sapiens popgen - quantitative plink1 with covariates": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_pred.list" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_1.loco.gz" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_STEP1", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-17T14:27:17.114946536" + }, + "homo_sapiens popgen - quantitative plink1 without covariates": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_pred.list" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_1.loco.gz" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_STEP1", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-17T14:27:46.324894984" + }, + "homo_sapiens popgen - plink2 - stub": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_pred.list" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_1.loco.gz" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.log" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_STEP1", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-17T14:28:16.024988753" + }, + "homo_sapiens popgen - binary plink1 with covariates": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_pred.list" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_1.loco.gz" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_STEP1", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-17T14:27:31.70779572" + } +} diff --git a/modules/nf-core/regenie/step1/tests/nextflow.config b/modules/nf-core/regenie/step1/tests/nextflow.config new file mode 100644 index 000000000000..1fc7b88cdf58 --- /dev/null +++ b/modules/nf-core/regenie/step1/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: REGENIE_STEP1 { + ext.args = params.module_args + } +}