diff --git a/CHANGELOG.md b/CHANGELOG.md index 0717298f..e1e8ce5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -91,6 +91,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[PR #451](https://github.com/nf-core/proteinfold/pulls/451)] - Remove af2 multimer padding from msa plots. - [[#417](https://github.com/nf-core/proteinfold/issues/417)] - Add `boltz_use_kernels` parameter to enable/disable using optimized Triton-based CUDA kernels CUDA kernels for Boltz inference. - [[#417](https://github.com/nf-core/proteinfold/issues/417)] - Handle incompatible CUDA kernel errors in Boltz by automatically retrying with `--no_kernels` false. +- [[PR #454](https://github.com/nf-core/proteinfold/pulls/454)] - Update publishdir patterns for alphafold2 modules ### Parameters diff --git a/conf/modules_alphafold2.config b/conf/modules_alphafold2.config index abf107b8..14767109 100644 --- a/conf/modules_alphafold2.config +++ b/conf/modules_alphafold2.config @@ -60,11 +60,15 @@ process { saveAs: { filename -> if(filename.endsWith('_pae.tsv')){ "paes/$filename" - } else if(filename.equals('versions.yml')){ - null } else { filename } }, - pattern: '*.*' + pattern: '*.tsv' + ], + [ + enabled: params.save_intermediates, + path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}/${meta.id}/" }, + mode: 'copy', + pattern: 'raw/**' ], [ path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}/top_ranked_structures" }, @@ -80,15 +84,34 @@ process { withName: 'RUN_ALPHAFOLD2_MSA' { ext.args = params.alphafold2_max_template_date ? "--max_template_date ${params.alphafold2_max_template_date}" : '' publishDir = [ - path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + [ + path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}/${meta.id}/msa/" }, + mode: 'copy', + pattern: 'raw/*.*', + saveAs: { filename -> + if(filename.equals('versions.yml')) { + null + } else { + filename.toString().replaceFirst(/^raw\//, '') + } + } + ], + [ + enabled: params.save_intermediates, + path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}/${meta.id}/raw/" }, + mode: 'copy', + pattern: 'raw/msas/**', + saveAs: { filename -> filename.toString().replaceFirst(/^raw\//, '') } + ] ] } withName: 'RUN_ALPHAFOLD2_PRED' { accelerator = params.use_gpu? 1 : 0 - ext.args = params.use_gpu ? '--use_gpu_relax=true' : '--use_gpu_relax=false' + ext.args = [ + params.use_gpu ? '--use_gpu_relax=true' : '--use_gpu_relax=false', + params.alphafold2_random_seed ? "--random_seed=${params.alphafold2_random_seed}" : '' + ].join(' ').trim() publishDir = [ [ path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}/${meta.id}" }, @@ -96,11 +119,15 @@ process { saveAs: { filename -> if(filename.endsWith('_pae.tsv')){ "paes/$filename" - } else if(filename.equals('versions.yml')){ - null } else { filename } }, - pattern: '*.*' + pattern: '*.tsv' + ], + [ + enabled: params.save_intermediates, + path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}/${meta.id}/" }, + mode: 'copy', + pattern: 'raw/**' ], [ path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}/top_ranked_structures" }, diff --git a/modules/local/run_alphafold2/main.nf b/modules/local/run_alphafold2/main.nf index c5d45518..64944464 100644 --- a/modules/local/run_alphafold2/main.nf +++ b/modules/local/run_alphafold2/main.nf @@ -26,9 +26,9 @@ process RUN_ALPHAFOLD2 { path ('uniprot/*') output: - path ("${fasta.baseName}*") + path ("raw/**") , emit: raw tuple val(meta), path ("${meta.id}_alphafold2.pdb") , emit: top_ranked_pdb - tuple val(meta), path ("${fasta.baseName}/ranked*.pdb") , emit: pdb + tuple val(meta), path ("raw/ranked*.pdb") , emit: pdb // TODO: re-label multiqc -> plddt so multiqc channel can take in all metrics tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc tuple val(meta), path ("${meta.id}_alphafold2_msa.tsv") , emit: msa @@ -83,6 +83,9 @@ process RUN_ALPHAFOLD2 { mv "${meta.id}_msa.tsv" "${meta.id}_alphafold2_msa.tsv" + # Can't use fasta.baseName to batch outputs in publishDir + mv "${fasta.baseName}" raw/ + cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python3 --version | sed 's/Python //g') @@ -102,12 +105,12 @@ process RUN_ALPHAFOLD2 { touch "${meta.id}_0_pae.tsv" touch "${meta.id}_ptm.tsv" touch "${meta.id}_iptm.tsv" - mkdir "${fasta.baseName}" - touch "${fasta.baseName}/ranked_0.pdb" - touch "${fasta.baseName}/ranked_1.pdb" - touch "${fasta.baseName}/ranked_2.pdb" - touch "${fasta.baseName}/ranked_3.pdb" - touch "${fasta.baseName}/ranked_4.pdb" + mkdir "raw" + touch "raw/ranked_0.pdb" + touch "raw/ranked_1.pdb" + touch "raw/ranked_2.pdb" + touch "raw/ranked_3.pdb" + touch "raw/ranked_4.pdb" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/run_alphafold2_msa/main.nf b/modules/local/run_alphafold2_msa/main.nf index f5eafe9d..3c3c9ac8 100644 --- a/modules/local/run_alphafold2_msa/main.nf +++ b/modules/local/run_alphafold2_msa/main.nf @@ -25,9 +25,9 @@ process RUN_ALPHAFOLD2_MSA { path ('uniprot/*') output: - path ("${fasta.baseName}*") - tuple val(meta), path ("${fasta.baseName}/features.pkl"), emit: features - path "versions.yml" , emit: versions + path ("raw/**") , emit: raw + tuple val(meta), path ("raw/features.pkl"), emit: features + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -64,6 +64,9 @@ process RUN_ALPHAFOLD2_MSA { --obsolete_pdbs_path=./clean_obsolete.dat \ $args + # Can't use fasta.baseName to batch outputs in publishDir + mv "${fasta.baseName}" raw/ + cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python3 --version | sed 's/Python //g') @@ -75,8 +78,8 @@ process RUN_ALPHAFOLD2_MSA { stub: """ - mkdir ./"${fasta.baseName}" - touch ./"${fasta.baseName}"/features.pkl + mkdir ./raw + touch ./raw/features.pkl cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/run_alphafold2_pred/main.nf b/modules/local/run_alphafold2_pred/main.nf index b45c38f0..b565d419 100644 --- a/modules/local/run_alphafold2_pred/main.nf +++ b/modules/local/run_alphafold2_pred/main.nf @@ -24,9 +24,9 @@ process RUN_ALPHAFOLD2_PRED { path ('uniprot/*') output: - path ("${fasta.baseName}*") + path ("raw/**") , emit: raw tuple val(meta), path ("${meta.id}_alphafold2.pdb") , emit: top_ranked_pdb - tuple val(meta), path ("${fasta.baseName}/ranked*.pdb") , emit: pdb + tuple val(meta), path ("raw/ranked*.pdb") , emit: pdb tuple val(meta), path ("${meta.id}_alphafold2_msa.tsv") , emit: msa // TODO: re-label multiqc -> plddt so multiqc channel can take in all metrics tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc @@ -62,6 +62,9 @@ process RUN_ALPHAFOLD2_PRED { mv "${meta.id}_msa.tsv" "${meta.id}_alphafold2_msa.tsv" + # Can't use fasta.baseName to batch outputs in publishDir + mv "${fasta.baseName}" raw/ + cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python3 --version | sed 's/Python //g') @@ -79,12 +82,12 @@ process RUN_ALPHAFOLD2_PRED { touch "${meta.id}_plddt.tsv" touch "${meta.id}_alphafold2_msa.tsv" touch "${meta.id}_0_pae.tsv" - mkdir "${fasta.baseName}" - touch "${fasta.baseName}/ranked_0.pdb" - touch "${fasta.baseName}/ranked_1.pdb" - touch "${fasta.baseName}/ranked_2.pdb" - touch "${fasta.baseName}/ranked_3.pdb" - touch "${fasta.baseName}/ranked_4.pdb" + mkdir "raw/" + touch "raw/ranked_0.pdb" + touch "raw/ranked_1.pdb" + touch "raw/ranked_2.pdb" + touch "raw/ranked_3.pdb" + touch "raw/ranked_4.pdb" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index 24ef5c55..df881154 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,6 +13,7 @@ params { input = null mode = 'alphafold2' // {alphafold2, colabfold, esmfold, rosettafold_all_atom, alphafold3, helixfold3, boltz, rosettafold2na} use_gpu = false + save_intermediates = false split_fasta = false db = null full_dbs = false // true/false, globally sets full_dbs if not independently set diff --git a/nextflow_schema.json b/nextflow_schema.json index fb5b0558..d1a78953 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -44,6 +44,11 @@ "description": "Run on CPUs (default) or GPUs", "fa_icon": "fas fa-microchip" }, + "save_intermediates": { + "type": "boolean", + "description": "Keep raw intermediate files", + "fa_icon": "fas fa-microchip" + }, "split_fasta": { "type": "boolean", "description": "Split input multi-fasta file in separated fasta files each of them containing one sequence to be folded", diff --git a/tests/alphafold2_download.nf.test.snap b/tests/alphafold2_download.nf.test.snap index 17b19b17..901df40d 100644 --- a/tests/alphafold2_download.nf.test.snap +++ b/tests/alphafold2_download.nf.test.snap @@ -48,13 +48,6 @@ "alphafold2", "alphafold2/standard", "alphafold2/standard/T1024", - "alphafold2/standard/T1024/T1024.1", - "alphafold2/standard/T1024/T1024.1/ranked_0.pdb", - "alphafold2/standard/T1024/T1024.1/ranked_1.pdb", - "alphafold2/standard/T1024/T1024.1/ranked_2.pdb", - "alphafold2/standard/T1024/T1024.1/ranked_3.pdb", - "alphafold2/standard/T1024/T1024.1/ranked_4.pdb", - "alphafold2/standard/T1024/T1024_alphafold2.pdb", "alphafold2/standard/T1024/T1024_alphafold2_msa.tsv", "alphafold2/standard/T1024/T1024_iptm.tsv", "alphafold2/standard/T1024/T1024_plddt.tsv", @@ -62,13 +55,6 @@ "alphafold2/standard/T1024/paes", "alphafold2/standard/T1024/paes/T1024_0_pae.tsv", "alphafold2/standard/T1026", - "alphafold2/standard/T1026/T1026.1", - "alphafold2/standard/T1026/T1026.1/ranked_0.pdb", - "alphafold2/standard/T1026/T1026.1/ranked_1.pdb", - "alphafold2/standard/T1026/T1026.1/ranked_2.pdb", - "alphafold2/standard/T1026/T1026.1/ranked_3.pdb", - "alphafold2/standard/T1026/T1026.1/ranked_4.pdb", - "alphafold2/standard/T1026/T1026_alphafold2.pdb", "alphafold2/standard/T1026/T1026_alphafold2_msa.tsv", "alphafold2/standard/T1026/T1026_iptm.tsv", "alphafold2/standard/T1026/T1026_plddt.tsv", @@ -109,23 +95,11 @@ "file.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ], "uniref90.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_0.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_1.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_2.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_3.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_4.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1024_alphafold2.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_alphafold2_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_0.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_1.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_2.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_3.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_4.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1026_alphafold2.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_alphafold2_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -140,9 +114,9 @@ ] ], "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nf-test": "0.9.2", + "nextflow": "25.10.3" }, - "timestamp": "2026-01-13T14:33:28.434588" + "timestamp": "2026-01-29T21:35:56.027817716" } } \ No newline at end of file diff --git a/tests/alphafold2_split.nf.test.snap b/tests/alphafold2_split.nf.test.snap index ab2cad9a..917420c5 100644 --- a/tests/alphafold2_split.nf.test.snap +++ b/tests/alphafold2_split.nf.test.snap @@ -29,31 +29,17 @@ "alphafold2", "alphafold2/split_msa_prediction", "alphafold2/split_msa_prediction/T1024", - "alphafold2/split_msa_prediction/T1024.1", - "alphafold2/split_msa_prediction/T1024.1/features.pkl", - "alphafold2/split_msa_prediction/T1024/T1024.1", - "alphafold2/split_msa_prediction/T1024/T1024.1/ranked_0.pdb", - "alphafold2/split_msa_prediction/T1024/T1024.1/ranked_1.pdb", - "alphafold2/split_msa_prediction/T1024/T1024.1/ranked_2.pdb", - "alphafold2/split_msa_prediction/T1024/T1024.1/ranked_3.pdb", - "alphafold2/split_msa_prediction/T1024/T1024.1/ranked_4.pdb", - "alphafold2/split_msa_prediction/T1024/T1024_alphafold2.pdb", "alphafold2/split_msa_prediction/T1024/T1024_alphafold2_msa.tsv", "alphafold2/split_msa_prediction/T1024/T1024_plddt.tsv", + "alphafold2/split_msa_prediction/T1024/msa", + "alphafold2/split_msa_prediction/T1024/msa/features.pkl", "alphafold2/split_msa_prediction/T1024/paes", "alphafold2/split_msa_prediction/T1024/paes/T1024_0_pae.tsv", "alphafold2/split_msa_prediction/T1026", - "alphafold2/split_msa_prediction/T1026.1", - "alphafold2/split_msa_prediction/T1026.1/features.pkl", - "alphafold2/split_msa_prediction/T1026/T1026.1", - "alphafold2/split_msa_prediction/T1026/T1026.1/ranked_0.pdb", - "alphafold2/split_msa_prediction/T1026/T1026.1/ranked_1.pdb", - "alphafold2/split_msa_prediction/T1026/T1026.1/ranked_2.pdb", - "alphafold2/split_msa_prediction/T1026/T1026.1/ranked_3.pdb", - "alphafold2/split_msa_prediction/T1026/T1026.1/ranked_4.pdb", - "alphafold2/split_msa_prediction/T1026/T1026_alphafold2.pdb", "alphafold2/split_msa_prediction/T1026/T1026_alphafold2_msa.tsv", "alphafold2/split_msa_prediction/T1026/T1026_plddt.tsv", + "alphafold2/split_msa_prediction/T1026/msa", + "alphafold2/split_msa_prediction/T1026/msa/features.pkl", "alphafold2/split_msa_prediction/T1026/paes", "alphafold2/split_msa_prediction/T1026/paes/T1026_0_pae.tsv", "alphafold2/split_msa_prediction/top_ranked_structures", @@ -71,25 +57,13 @@ "pipeline_info/nf_core_proteinfold_software_mqc_versions.yml" ], [ - "features.pkl:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_0.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_1.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_2.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_3.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_4.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1024_alphafold2.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_alphafold2_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1024_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "features.pkl:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_0.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_1.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_2.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_3.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_4.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1026_alphafold2.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_alphafold2_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "features.pkl:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -100,9 +74,9 @@ ] ], "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nf-test": "0.9.2", + "nextflow": "25.10.3" }, - "timestamp": "2026-01-13T14:34:11.513463" + "timestamp": "2026-01-29T21:36:12.286799433" } } \ No newline at end of file diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index e26cec3d..57347ffb 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -22,13 +22,6 @@ "alphafold2", "alphafold2/standard", "alphafold2/standard/T1024", - "alphafold2/standard/T1024/T1024.1", - "alphafold2/standard/T1024/T1024.1/ranked_0.pdb", - "alphafold2/standard/T1024/T1024.1/ranked_1.pdb", - "alphafold2/standard/T1024/T1024.1/ranked_2.pdb", - "alphafold2/standard/T1024/T1024.1/ranked_3.pdb", - "alphafold2/standard/T1024/T1024.1/ranked_4.pdb", - "alphafold2/standard/T1024/T1024_alphafold2.pdb", "alphafold2/standard/T1024/T1024_alphafold2_msa.tsv", "alphafold2/standard/T1024/T1024_iptm.tsv", "alphafold2/standard/T1024/T1024_plddt.tsv", @@ -36,13 +29,6 @@ "alphafold2/standard/T1024/paes", "alphafold2/standard/T1024/paes/T1024_0_pae.tsv", "alphafold2/standard/T1026", - "alphafold2/standard/T1026/T1026.1", - "alphafold2/standard/T1026/T1026.1/ranked_0.pdb", - "alphafold2/standard/T1026/T1026.1/ranked_1.pdb", - "alphafold2/standard/T1026/T1026.1/ranked_2.pdb", - "alphafold2/standard/T1026/T1026.1/ranked_3.pdb", - "alphafold2/standard/T1026/T1026.1/ranked_4.pdb", - "alphafold2/standard/T1026/T1026_alphafold2.pdb", "alphafold2/standard/T1026/T1026_alphafold2_msa.tsv", "alphafold2/standard/T1026/T1026_iptm.tsv", "alphafold2/standard/T1026/T1026_plddt.tsv", @@ -64,23 +50,11 @@ "pipeline_info/nf_core_proteinfold_software_mqc_versions.yml" ], [ - "ranked_0.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_1.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_2.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_3.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_4.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1024_alphafold2.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_alphafold2_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_0.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_1.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_2.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_3.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ranked_4.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1026_alphafold2.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_alphafold2_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -96,8 +70,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.04.7" + "nextflow": "25.10.3" }, - "timestamp": "2025-10-23T16:51:41.928836" + "timestamp": "2026-01-29T21:37:26.752545973" } } \ No newline at end of file