diff --git a/CHANGELOG.md b/CHANGELOG.md index e38e42eef..31fba1d5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -122,6 +122,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#507](https://github.com/nf-core/proteinfold/issues/507)] - Implement missing full tests and check that the others work before release 2.0.0. - [[PR #509](https://github.com/nf-core/proteinfold/pulls/509)] - Setup gpu environment for AWS full tests. - [[PR #538](https://github.com/nf-core/proteinfold/pulls/538)] - Fix alphafold2_random_seed type. +- [[PR #549](https://github.com/nf-core/proteinfold/pulls/549)] - Create `EXTRACT_METRICS` module and remove extract_metrics.py from AlphaFold2 module. +- [[PR #527](https://github.com/nf-core/proteinfold/pulls/527)] - Auto-detect multimer vs monomer and validate unique split. +- [[PR #566](https://github.com/nf-core/proteinfold/pulls/566)] - Remove extract_metrics.py from HelixFold3 module. ### Parameters diff --git a/conf/dbs.config b/conf/dbs.config index ff6aad469..1fd91b3ef 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -11,12 +11,13 @@ params { // Dated prefixes, can be modified for alternate versions alphafold2_params_prefix = "alphafold_params_2022-12-06" + colabfold_params_prefix = "alphafold_params_2022-12-06" uniref30_prefix = "UniRef30_2023_02" // AlphaFold2 links alphafold2_bfd_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz' alphafold2_small_bfd_link = 'https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz' - alphafold2_params_link = 'https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar' + alphafold2_params_link = "https://storage.googleapis.com/alphafold/${params.alphafold2_params_prefix}.tar" alphafold2_mgnify_link = 'https://ftp.ebi.ac.uk/pub/databases/metagenomics/peptide_database/2024_04/mgy_clusters.fa.gz' alphafold2_pdb70_link = 'https://wwwuser.gwdguser.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/pdb70_from_mmcif_220313.tar.gz' alphafold2_pdb_mmcif_link = 'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' //Other sources available: 'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' ftp.pdbj.org::ftp_data/structures/divided/mmCIF/ rsync.ebi.ac.uk::pub/databases/pdb/data/structures/divided/mmCIF/ @@ -78,19 +79,14 @@ params { boltz2_mols_path = "${params.boltz_db}/params/mols/" // Colabfold links - colabfold_db_link = 'https://opendata.mmseqs.org/colabfold/colabfold_envdb_202108.db.tar.gz' - colabfold_uniref30_link = 'https://opendata.mmseqs.org/colabfold/uniref30_2302.db.tar.gz' + colabfold_db_link = 'https://opendata.mmseqs.org/colabfold/colabfold_envdb_202108.db.tar.gz' + colabfold_uniref30_link = 'https://opendata.mmseqs.org/colabfold/uniref30_2302.db.tar.gz' + colabfold_alphafold2_params_link = "https://storage.googleapis.com/alphafold/${params.colabfold_params_prefix}.tar" // Colabfold paths colabfold_envdb_path = "${params.colabfold_db}/colabfold_envdb/*" colabfold_uniref30_path = "${params.colabfold_db}/colabfold_uniref30/*" - // Are all these params options needed? - colabfold_alphafold2_params_tags = [ - "alphafold2_multimer_v1" : "alphafold_params_colab_2021-10-27", - "alphafold2_multimer_v2" : "alphafold_params_colab_2022-03-02", - "alphafold2_multimer_v3" : "alphafold_params_colab_2022-12-06", - "alphafold2_ptm" : "alphafold_params_2021-07-14" - ] + colabfold_alphafold2_params_path = "${params.colabfold_db}/params/${params.colabfold_params_prefix}" // RoseTTAFold_All_Atom links rosettafold_all_atom_uniref30_link = 'https://wwwuser.gwdguser.de/~compbiol/uniclust/2023_02/UniRef30_2023_02_hhsuite.tar.gz' diff --git a/conf/modules_alphafold2.config b/conf/modules_alphafold2.config index 19e67a880..53649e39f 100644 --- a/conf/modules_alphafold2.config +++ b/conf/modules_alphafold2.config @@ -102,16 +102,6 @@ process { params.alphafold2_random_seed ? "--random_seed=${params.alphafold2_random_seed}" : '' ].join(' ').trim() publishDir = [ - [ - path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}/${meta.id}" }, - mode: 'copy', - saveAs: { filename -> - if(filename.endsWith('_pae.tsv')){ - "paes/$filename" - } else { filename } - }, - pattern: '*.tsv' - ], [ enabled: params.save_intermediates, path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}/${meta.id}/" }, @@ -163,16 +153,6 @@ process { params.alphafold2_random_seed ? "--random_seed=${params.alphafold2_random_seed}" : '' ].join(' ').trim() publishDir = [ - [ - path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}/${meta.id}" }, - mode: 'copy', - saveAs: { filename -> - if(filename.endsWith('_pae.tsv')){ - "paes/$filename" - } else { filename } - }, - pattern: '*.tsv' - ], [ enabled: params.save_intermediates, path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}/${meta.id}/" }, @@ -187,4 +167,27 @@ process { ] ] } + + withName: 'EXTRACT_METRICS_AF2_STANDARD' { + container = 'nf-core/proteinfold_alphafold2_standard:2.0.0' + } + + withName: 'EXTRACT_METRICS_AF2_PRED' { + container = 'nf-core/proteinfold_alphafold2_pred:2.0.0' + } + + withName: 'EXTRACT_METRICS_AF2_STANDARD|EXTRACT_METRICS_AF2_PRED|EXTRACT_METRICS' { + publishDir = [ + [ + path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}/${meta.id}" }, + mode: 'copy', + saveAs: { filename -> + if(filename.endsWith('_pae.tsv')){ + "paes/$filename" + } else { filename } + }, + pattern: '*.tsv' + ] + ] + } } diff --git a/conf/modules_boltz.config b/conf/modules_boltz.config index fd656c166..574167f86 100644 --- a/conf/modules_boltz.config +++ b/conf/modules_boltz.config @@ -14,7 +14,7 @@ process { // Provide args plus configure processes to use correct directory names // for Boltz parameters and models, which are downloaded as part of the workflow withName: '.*ARIA2_COLABFOLD_PARAMS:UNTAR' { - ext.prefix = { "${params.colabfold_alphafold2_params_tags[params.colabfold_model_preset] }" } + ext.prefix = { "${params.colabfold_params_prefix}" } publishDir = [ path: {"${params.outdir}/DBs/${params.mode}/params"}, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, diff --git a/conf/modules_colabfold.config b/conf/modules_colabfold.config index 3bc01d143..6ac6d4996 100644 --- a/conf/modules_colabfold.config +++ b/conf/modules_colabfold.config @@ -21,7 +21,7 @@ process { // Configure UNTAR processes to use correct directory names withName: '.*ARIA2_COLABFOLD_PARAMS:UNTAR' { - ext.prefix = { "${params.colabfold_alphafold2_params_tags[params.colabfold_model_preset] }" } + ext.prefix = { "${params.colabfold_params_prefix}" } publishDir = [ path: {"${params.outdir}/DBs/${params.mode}/params"}, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, diff --git a/conf/modules_helixfold3.config b/conf/modules_helixfold3.config index 21f931c71..4c9a96870 100644 --- a/conf/modules_helixfold3.config +++ b/conf/modules_helixfold3.config @@ -109,6 +109,31 @@ process { params.helixfold3_precision ? "--precision ${params.helixfold3_precision}" : "--precision 'bf16'", params.helixfold3_infer_times ? "--infer_times ${params.helixfold3_infer_times}" : "--infer_times 4" ].join(' ').trim() + publishDir = [ + [ + enabled: params.save_intermediates, + path: { "${params.outdir}/helixfold3/${meta.id}/raw" }, + mode: 'copy', + pattern: 'raw/**', + saveAs: { filename -> filename.toString().replaceFirst(/^raw\//, '') } + ], + [ + path: { "${params.outdir}/helixfold3/top_ranked_structures" }, + mode: 'copy', + saveAs: { "${meta.id}.pdb" }, + pattern: '*_helixfold3.pdb' + ], + [ + path: { "${params.outdir}/helixfold3/top_ranked_structures" }, + mode: 'copy', + saveAs: { "${meta.id}.cif" }, + pattern: '*_helixfold3.cif' + ] + ] + } + + withName: 'EXTRACT_METRICS_HELIXFOLD3' { + container = 'nf-core/proteinfold_helixfold3:2.0.0' publishDir = [ [ path: { "${params.outdir}/helixfold3/${meta.id}" }, @@ -135,25 +160,6 @@ process { path: { "${params.outdir}/helixfold3/${meta.id}" }, mode: 'copy', pattern: '*_helixfold3_msa.tsv' - ], - [ - enabled: params.save_intermediates, - path: { "${params.outdir}/helixfold3/${meta.id}/raw" }, - mode: 'copy', - pattern: 'raw/**', - saveAs: { filename -> filename.toString().replaceFirst(/^raw\//, '') } - ], - [ - path: { "${params.outdir}/helixfold3/top_ranked_structures" }, - mode: 'copy', - saveAs: { "${meta.id}.pdb" }, - pattern: '*_helixfold3.pdb' - ], - [ - path: { "${params.outdir}/helixfold3/top_ranked_structures" }, - mode: 'copy', - saveAs: { "${meta.id}.cif" }, - pattern: '*_helixfold3.cif' ] ] } diff --git a/conf/test.config b/conf/test.config index 73a4c294e..c7d7de85b 100644 --- a/conf/test.config +++ b/conf/test.config @@ -28,7 +28,7 @@ params { // Input data to test alphafold2 analysis mode = 'alphafold2' alphafold2_mode = 'standard' - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' alphafold2_db = "${projectDir}/assets/dummy_db_dir" } diff --git a/conf/test_alphafold3_download.config b/conf/test_alphafold3_download.config index 0ad82e81c..11f10d8b2 100644 --- a/conf/test_alphafold3_download.config +++ b/conf/test_alphafold3_download.config @@ -27,7 +27,7 @@ params { // Input data to test alphafold2 analysis mode = 'alphafold3' - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' alphafold3_params_path = "${projectDir}/assets/dummy_db_dir" } diff --git a/conf/test_alphafold3_standard.config b/conf/test_alphafold3_standard.config index ab4ac3403..d6cb70988 100644 --- a/conf/test_alphafold3_standard.config +++ b/conf/test_alphafold3_standard.config @@ -27,7 +27,7 @@ params { // Input data to test alphafold2 analysis mode = 'alphafold3' - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' alphafold3_db = "${projectDir}/assets/dummy_db_dir" } diff --git a/conf/test_alphafold_download.config b/conf/test_alphafold_download.config index 7c22be5ee..287bbca0c 100644 --- a/conf/test_alphafold_download.config +++ b/conf/test_alphafold_download.config @@ -28,7 +28,7 @@ params { // Input data to test alphafold2 analysis mode = 'alphafold2' alphafold2_mode = 'standard' - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' } process { diff --git a/conf/test_alphafold_split.config b/conf/test_alphafold_split.config index 390ba4de7..c74f487be 100644 --- a/conf/test_alphafold_split.config +++ b/conf/test_alphafold_split.config @@ -28,7 +28,7 @@ params { // Input data to test alphafold2 splitting MSA from prediction analysis mode = 'alphafold2' alphafold2_mode = 'split_msa_prediction' - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' alphafold2_db = "${projectDir}/assets/dummy_db_dir" } diff --git a/conf/test_boltz.config b/conf/test_boltz.config index cb420af9b..6e2441e1c 100644 --- a/conf/test_boltz.config +++ b/conf/test_boltz.config @@ -27,7 +27,7 @@ params { // Input data for full test of boltz mode = 'boltz' colabfold_model_preset = 'alphafold2_ptm' - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' colabfold_db = "${projectDir}/assets/dummy_db_dir" boltz_db = "${projectDir}/assets/dummy_db_dir" } diff --git a/conf/test_colabfold_download.config b/conf/test_colabfold_download.config index f3b527b5f..313a4ec68 100644 --- a/conf/test_colabfold_download.config +++ b/conf/test_colabfold_download.config @@ -28,7 +28,7 @@ params { // Input data to test colabfold analysis mode = 'colabfold' use_msa_server = true - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' } process { diff --git a/conf/test_colabfold_local.config b/conf/test_colabfold_local.config index 3019f1963..2a896542d 100644 --- a/conf/test_colabfold_local.config +++ b/conf/test_colabfold_local.config @@ -26,7 +26,7 @@ params { // Input data to test colabfold with the colabfold webserver analysis mode = 'colabfold' colabfold_db = "${projectDir}/assets/dummy_db_dir" - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' } process { diff --git a/conf/test_colabfold_webserver.config b/conf/test_colabfold_webserver.config index e2273b359..206a3aeba 100644 --- a/conf/test_colabfold_webserver.config +++ b/conf/test_colabfold_webserver.config @@ -27,7 +27,7 @@ params { mode = 'colabfold' use_msa_server = true colabfold_db = "${projectDir}/assets/dummy_db_dir" - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' } process { diff --git a/conf/test_esmfold.config b/conf/test_esmfold.config index 344ea1bf9..0bffe195d 100644 --- a/conf/test_esmfold.config +++ b/conf/test_esmfold.config @@ -26,7 +26,7 @@ params { // Input data to test esmfold mode = 'esmfold' esmfold_db = "${projectDir}/assets/dummy_db_dir" - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' } process { diff --git a/conf/test_full.config b/conf/test_full.config index 76f2e58a3..1b83f5bfc 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -18,6 +18,14 @@ params { mode = 'alphafold2' alphafold2_mode = 'standard' use_gpu = true - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' alphafold2_db = 's3://proteinfold-dataset/test-data/mini_dbs' } + +docker.pullStrategy = 'lazy' + +process { + withName: 'RUN_ALPHAFOLD2' { + beforeScript = System.getenv('TOWER_WORKFLOW_ID') ? 'unset LD_LIBRARY_PATH && export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/nvidia/lib && export OPENMM_CUDA_COMPILER=/opt/conda/bin/nvcc && export TMPDIR=/tmp' : null + } +} diff --git a/conf/test_full_alphafold_multimer.config b/conf/test_full_alphafold_multimer.config index 27354e081..e79fa4e87 100644 --- a/conf/test_full_alphafold_multimer.config +++ b/conf/test_full_alphafold_multimer.config @@ -19,6 +19,21 @@ params { alphafold2_mode = 'standard' alphafold2_model_preset = 'multimer' use_gpu = true - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet_multimer.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet_multimer.csv' alphafold2_db = 's3://proteinfold-dataset/test-data/mini_dbs' } + +docker.pullStrategy = 'lazy' + +process { + withName: 'RUN_ALPHAFOLD2' { + memory = '60 GB' + beforeScript = System.getenv('TOWER_WORKFLOW_ID') ? 'unset LD_LIBRARY_PATH && export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/nvidia/lib && export OPENMM_CUDA_COMPILER=/opt/conda/bin/nvcc && export TMPDIR=/tmp' : null + } +} + +process { + withName: 'RUN_ALPHAFOLD2_PRED' { + ext.args = "--num_multimer_predictions_per_model 1" + } +} diff --git a/conf/test_full_alphafold_split.config b/conf/test_full_alphafold_split.config index 2c69828fd..e23ecc2c6 100644 --- a/conf/test_full_alphafold_split.config +++ b/conf/test_full_alphafold_split.config @@ -18,6 +18,14 @@ params { mode = 'alphafold2' alphafold2_mode = 'split_msa_prediction' use_gpu = true - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' alphafold2_db = 's3://proteinfold-dataset/test-data/mini_dbs' } + +docker.pullStrategy = 'lazy' + +process { + withName: 'RUN_ALPHAFOLD2_PRED' { + beforeScript = System.getenv('TOWER_WORKFLOW_ID') ? 'unset LD_LIBRARY_PATH && export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/nvidia/lib && export OPENMM_CUDA_COMPILER=/opt/conda/bin/nvcc && export TMPDIR=/tmp' : null + } +} diff --git a/conf/test_full_boltz.config b/conf/test_full_boltz.config index 3fb5e30d2..a8f59e073 100644 --- a/conf/test_full_boltz.config +++ b/conf/test_full_boltz.config @@ -15,9 +15,19 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data for full test of boltz - mode = 'boltz' - colabfold_model_preset = 'alphafold2_ptm' - use_gpu = true - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' - colabfold_db = 's3://proteinfold-dataset/test-data/mini_dbs' + mode = 'boltz' + colabfold_model_preset = 'alphafold2_ptm' + use_gpu = true + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' + colabfold_db = 's3://proteinfold-dataset/test-data/mini_dbs' + boltz_db = 's3://proteinfold-dataset/test-data/mini_dbs/' +} + +docker.pullStrategy = 'lazy' + +process { + withName: 'RUN_BOLTZ' { + memory = '60 GB' + ext.args = '--output_format "pdb" --write_full_pae --cache ./ --recycling_steps 1' + } } diff --git a/conf/test_full_colabfold_local.config b/conf/test_full_colabfold_local.config index 1a9f9d6d8..22bb7fa5f 100644 --- a/conf/test_full_colabfold_local.config +++ b/conf/test_full_colabfold_local.config @@ -16,15 +16,23 @@ params { // Input data to test colabfold with a local server analysis - mode = 'colabfold' - colabfold_model_preset = 'alphafold2_ptm' - use_gpu = true - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' - colabfold_db = 's3://proteinfold-dataset/test-data/mini_dbs' + mode = 'colabfold' + colabfold_model_preset = 'alphafold2_ptm' + use_gpu = true + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' + colabfold_db = 's3://proteinfold-dataset/test-data/mini_dbs' + colabfold_use_gpu_relax = false + colabfold_use_amber = false + colabfold_use_templates = false } +docker.pullStrategy = 'lazy' + process { withName:MMSEQS_COLABFOLDSEARCH { memory = 16.GB } + withName: 'COLABFOLD_BATCH' { + beforeScript = System.getenv('TOWER_WORKFLOW_ID') ? 'unset LD_LIBRARY_PATH && export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/nvidia/lib:/usr/local/lib:/usr/local/targets/x86_64-linux/lib:/usr/local/cuda-12.9/targets/x86_64-linux/lib:/opt/conda/lib"' : null + } } diff --git a/conf/test_full_colabfold_webserver.config b/conf/test_full_colabfold_webserver.config index 5bd673200..00917e6c4 100644 --- a/conf/test_full_colabfold_webserver.config +++ b/conf/test_full_colabfold_webserver.config @@ -15,9 +15,20 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data for full test of colabfold with Colabfold server - mode = 'colabfold' - use_msa_server = true - colabfold_model_preset = 'alphafold2_ptm' - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' - colabfold_db = 's3://proteinfold-dataset/test-data/mini_dbs' + mode = 'colabfold' + use_msa_server = true + colabfold_model_preset = 'alphafold2_ptm' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' + colabfold_db = 's3://proteinfold-dataset/test-data/mini_dbs' + colabfold_use_gpu_relax = false + colabfold_use_amber = false + colabfold_use_templates = false +} + +docker.pullStrategy = 'lazy' + +process { + withName: 'COLABFOLD_BATCH' { + beforeScript = System.getenv('TOWER_WORKFLOW_ID') ? 'unset LD_LIBRARY_PATH && export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/nvidia/lib:/usr/local/lib:/usr/local/targets/x86_64-linux/lib:/usr/local/cuda-12.9/targets/x86_64-linux/lib:/opt/conda/lib"' : null + } } diff --git a/conf/test_full_colabfold_webserver_multimer.config b/conf/test_full_colabfold_webserver_multimer.config index b2ef29d0c..1bbe55359 100644 --- a/conf/test_full_colabfold_webserver_multimer.config +++ b/conf/test_full_colabfold_webserver_multimer.config @@ -15,9 +15,21 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data for full test of colabfold with Colabfold server - mode = 'colabfold' - use_msa_server = true - colabfold_model_preset = 'alphafold2_multimer_v3' - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet_multimer.csv' - colabfold_db = 's3://proteinfold-dataset/test-data/mini_dbs' + mode = 'colabfold' + use_msa_server = true + colabfold_model_preset = 'alphafold2_multimer_v3' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet_multimer.csv' + colabfold_db = 's3://proteinfold-dataset/test-data/mini_dbs' + colabfold_use_gpu_relax = false + colabfold_use_amber = false + colabfold_use_templates = false +} + +docker.pullStrategy = 'lazy' + +process { + withName: 'COLABFOLD_BATCH' { + ext.args = '' + beforeScript = System.getenv('TOWER_WORKFLOW_ID') ? 'unset LD_LIBRARY_PATH && export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/nvidia/lib:/usr/local/lib:/usr/local/targets/x86_64-linux/lib:/usr/local/cuda-12.9/targets/x86_64-linux/lib:/opt/conda/lib"' : null + } } diff --git a/conf/test_full_esmfold.config b/conf/test_full_esmfold.config index bd61bb504..e234a731f 100644 --- a/conf/test_full_esmfold.config +++ b/conf/test_full_esmfold.config @@ -18,6 +18,14 @@ params { mode = 'esmfold' use_gpu = true esmfold_model_preset = 'monomer' - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' esmfold_db = 's3://proteinfold-dataset/test-data/mini_dbs' } + +docker.pullStrategy = 'lazy' + +process { + withName: 'RUN_ESMFOLD' { + beforeScript = System.getenv('TOWER_WORKFLOW_ID') ? 'unset LD_LIBRARY_PATH && export LD_LIBRARY_PATH="/conda/lib:/usr/local/cuda-11.7/lib64:/conda/lib/python3.9/site-packages/nvidia/cusparse/lib:/conda/lib:/usr/local/cuda/lib64"' : null + } +} diff --git a/conf/test_full_esmfold_multimer.config b/conf/test_full_esmfold_multimer.config index 18f358268..4004e2c4f 100644 --- a/conf/test_full_esmfold_multimer.config +++ b/conf/test_full_esmfold_multimer.config @@ -18,6 +18,14 @@ params { mode = 'esmfold' esmfold_model_preset = 'multimer' use_gpu = true - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet_multimer.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet_multimer.csv' esmfold_db = 's3://proteinfold-dataset/test-data/mini_dbs' } + +docker.pullStrategy = 'lazy' + +process { + withName: 'RUN_ESMFOLD' { + beforeScript = System.getenv('TOWER_WORKFLOW_ID') ? 'unset LD_LIBRARY_PATH && export LD_LIBRARY_PATH="/conda/lib:/usr/local/cuda-11.7/lib64:/conda/lib/python3.9/site-packages/nvidia/cusparse/lib:/conda/lib:/usr/local/cuda/lib64"' : null + } +} diff --git a/conf/test_full_helixfold3.config b/conf/test_full_helixfold3.config index ccc98f485..02ca9e44e 100644 --- a/conf/test_full_helixfold3.config +++ b/conf/test_full_helixfold3.config @@ -17,6 +17,14 @@ params { // Input data for full test of helixfold3 mode = 'helixfold3' use_gpu = true - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' helixfold3_db = 's3://proteinfold-dataset/test-data/mini_dbs/' } + +docker.pullStrategy = 'lazy' + +process { + withName: 'RUN_HELIXFOLD3' { + beforeScript = System.getenv('TOWER_WORKFLOW_ID') ? 'unset LD_LIBRARY_PATH && export LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda-12.8/lib64"' : null + } +} diff --git a/conf/test_full_rosettafold2na.config b/conf/test_full_rosettafold2na.config index 56ed2135a..9c165d288 100644 --- a/conf/test_full_rosettafold2na.config +++ b/conf/test_full_rosettafold2na.config @@ -20,3 +20,5 @@ params { input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/rna_complex_samplesheet.csv' rosettafold2na_db = 's3://proteinfold-dataset/test-data/mini_dbs' } + +docker.pullStrategy = 'lazy' diff --git a/conf/test_full_rosettafold_all_atom.config b/conf/test_full_rosettafold_all_atom.config index 8536a86b2..bdb6adc68 100644 --- a/conf/test_full_rosettafold_all_atom.config +++ b/conf/test_full_rosettafold_all_atom.config @@ -17,6 +17,8 @@ params { // Input data for full test of rosettafold_all_atom mode = 'rosettafold_all_atom' use_gpu = true - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' rosettafold_all_atom_db = 's3://proteinfold-dataset/test-data/mini_dbs/' } + +docker.pullStrategy = 'lazy' diff --git a/conf/test_helixfold3.config b/conf/test_helixfold3.config index a8b3e4d5e..99595b02c 100644 --- a/conf/test_helixfold3.config +++ b/conf/test_helixfold3.config @@ -26,7 +26,7 @@ params { // Input data to test helixfold3 mode = 'helixfold3' helixfold3_db = "${projectDir}/assets/dummy_db_dir" - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' } process { diff --git a/conf/test_rosettafold_all_atom.config b/conf/test_rosettafold_all_atom.config index 13b19ce33..86805b9a8 100644 --- a/conf/test_rosettafold_all_atom.config +++ b/conf/test_rosettafold_all_atom.config @@ -26,7 +26,7 @@ params { // Input data to test rosettafold_all_atom mode = 'rosettafold_all_atom' rosettafold_all_atom_db = "${projectDir}/assets/dummy_db_dir" - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet.csv' } process { diff --git a/conf/test_split_fasta.config b/conf/test_split_fasta.config index f9463eb68..5ee897804 100644 --- a/conf/test_split_fasta.config +++ b/conf/test_split_fasta.config @@ -27,7 +27,7 @@ params { mode = 'colabfold' split_fasta = true colabfold_db = "${projectDir}/assets/dummy_db_dir" - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet_multimer.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v2.0/samplesheet_multimer.csv' } process { diff --git a/docs/usage.md b/docs/usage.md index d2282dffe..d30877e77 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -35,7 +35,7 @@ The samplesheet can have as many columns as you desire, however, there is a stri An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. -Each FASTA file should contain a single protein sequence unless using multimer mode. To provide a FASTA file with multiple sequences for individual folding, you can use one or more FASTA files with the `--split_fasta` parameter. This will treat each sequence in the FASTA file as a separate entry, folding them individually and in parallel, as if each sequence were listed separately in the samplesheet. +To provide a FASTA file with multiple sequences for individual folding, use one or more FASTA files with the `--split_fasta` parameter. This will treat each sequence in the FASTA file as a separate entry, folding them individually and in parallel, as if each sequence were listed separately in the samplesheet. ## Running the pipeline @@ -85,7 +85,6 @@ Each mode has specific reference data requirements. To support all modes the `-- │ ├── af3.bin │   ├── alphafold_params_2021-07-14 │   ├── alphafold_params_2022-12-06 -│   ├── alphafold_params_colab_2022-12-06 │   ├── boltz1_conf.ckpt │   ├── boltz2_aff.ckpt │   ├── boltz2_conf.ckpt @@ -124,6 +123,14 @@ Each mode has specific reference data requirements. To support all modes the `-- │   └── pdb_seqres.txt ├── rfam │   └── Rfam-14.9_rep_seq.fasta +├── RNA +│ ├── Rfam.full_region +│ ├── Rfam.cm.* +│ ├── id_mapping.tsv.gz +│ ├── rfam_annotations.tsv.gz +│ ├── rnacentral.fasta.* +│ ├── nt.* +│ └── ... ├── small_bfd │   └── bfd-first_non_consensus_sequences.fasta ├── uniprot @@ -140,6 +147,8 @@ Each mode has specific reference data requirements. To support all modes the `--    └── uniref90.fasta ``` + + Alternatively, the required data layout for each of the individual modes is described in the mode-specific usage documentation: - [AlphaFold2](./usage/alphafold2.md) @@ -194,20 +203,8 @@ You can override Foldseek arguments with: ```bash --foldseek_easysearch_arg "" -``` - -Note that the pipeline will create the following files in your working directory: - -```bash -work # Directory containing the nextflow working files - # Finished results in specified location (defined with --outdir) -.nextflow_log # Log file from Nextflow -# Other nextflow hidden files, eg. history of pipeline runs and old logs. -``` - -If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. - Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. +``` > [!WARNING] > Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). diff --git a/docs/usage/alphafold2.md b/docs/usage/alphafold2.md index b4bcc8b84..dc4895def 100644 --- a/docs/usage/alphafold2.md +++ b/docs/usage/alphafold2.md @@ -13,16 +13,12 @@ nextflow run nf-core/proteinfold \ --mode alphafold2 \ --alphafold2_db \ --use_gpu \ - --alphafold2_model_preset \ -profile ``` > [!NOTE] > By default, this will run a fork of AlphaFold2 where MSA generation is split from the neural network inference. This enables more efficient utilization of resources by allowing the CPU-bound MSA generation to be executed without occupying an idle GPU. If you want to run the original implementation of AlphaFold2 you can use the `--alphafold2_mode standard`. However, please be advised that this will cause the allocated GPU to idle while MSAs are generated. -> [!WARNING] -> `--alphafold2_model_preset ` is used to infer how to handle multi-entry fasta files. Choosing `monomer_ptm`, `monomer` or `monomer_casp14` will result in a multi-entry fasta being processed as a series of monomer entries rather than as a single oligomeric complex. - ## File Structure The file structure of `--alphafold2_db` must be as follows: @@ -127,11 +123,13 @@ Without setting the `--alphafold2_db` flag, all of the required data files will See the [AlphaFold2](https://github.com/google-deepmind/alphafold) documentation for a full description of additional arguments. The arguments supported by the proteinfold workflow are described briefly below: -| Parameter | Default | Description | -| -------------------------------- | ------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `--alphafold2_full_dbs` | `false` | bfd is a large environmental sequence database used to identify homologs. small bfd is a redundancy recuced version of the bfd database which can reduce the execution time of homolog search but may reduce the depth of the resulting MSA in some cases. `--alphafold2_full_dbs` ensures that the full version of bfd is used for search. | -| `--alphafold2_random_seed` | `null` | AlphaFold2 model inference is a stochastic process. Fixing a numerical random seed ensures that results are reproducible between runs. | -| `--alphafold2_max_template_date` | `2038-01-19` | Structural templates from the PDB are used as additional context when making predictions. Molecules with solved structures in the PDB can be trivially predicted by using these structures as inputs. When benchmarking model performance it can be useful to restrict the use of templates to those deposited before a fixed date to ensure solved structures do not bias predictions. | +| Parameter | Default | Description | +| -------------------------------- | ----------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `--alphafold2_full_dbs` | `false` | bfd is a large environmental sequence database used to identify homologs. small bfd is a redundancy recuced version of the bfd database which can reduce the execution time of homolog search but may reduce the depth of the resulting MSA in some cases. `--alphafold2_full_dbs` ensures that the full version of bfd is used for search. | +| `--alphafold2_random_seed` | `null` | AlphaFold2 model inference is a stochastic process. Fixing a numerical random seed ensures that results are reproducible between runs | +| `--alphafold2_max_template_date` | `2038-01-19` | Structural templates from the PDB are used as additional context when making predictions. Molecules with solved structures in the PDB can be trivially predicted by using these structures as inputs. When benchmarking model performance it can be useful to restrict the use of templates to those deposited before a fixed date to ensure solved structures do not bias predictions. | +| `--alphafold2_params_prefix` | `alphafold_params_2022-12-06` | Specify the alphafold2 params used for prediction | +| `--alphafold2_model_preset` | `monomer_ptm` | Specify the alphafold2 monomer preset used for prediction. | > You can override any of these parameters via the command line or a params file. diff --git a/docs/usage/colabfold.md b/docs/usage/colabfold.md index b58be1a47..adfda9b96 100644 --- a/docs/usage/colabfold.md +++ b/docs/usage/colabfold.md @@ -14,14 +14,10 @@ nextflow run nf-core/proteinfold \ --outdir \ --mode colabfold \ --colabfold_db \ - --colabfold_model_preset "" \ --use_gpu \ -profile ``` -> [!WARNING] -> `--colabfold_model_preset` is used to infer how to handle multi-entry fasta files. Choosing `alphafold2_ptm` will result in a multi-entry fasta being processed as a series of monomer entries rather than as a single oligomeric complex. - By default, `--mode colabfold` will generate MSA files required for structure prediction using a local execution of the [ColabFold](https://github.com/sokrypton/ColabFold) search protocol. This protocol uses [MMseqs2](https://github.com/soedinglab/MMseqs2) to search a uniref30 expandable profile database and construct paired alignments using taxonomic labels. MSAs are enriched with additional unpaired sequences by searching an expandable profile databased of environmental sequences. > [!NOTE] @@ -47,23 +43,23 @@ The file structure of `--colabfold_db` must be as follows: │   ├── uniref30_2302_db_aln.dbtype │   └── ... └── params/ -    └── alphafold_params_colab_2022-12-06/ +    └── alphafold_params_2022-12-06/ ├── LICENSE - ├── params_model_1_multimer_v2.npz ├── params_model_1_multimer_v3.npz ├── params_model_1.npz - ├── params_model_2_multimer_v2.npz + ├── params_model_1_ptm.npz ├── params_model_2_multimer_v3.npz ├── params_model_2.npz - ├── params_model_3_multimer_v2.npz + ├── params_model_2_ptm.npz ├── params_model_3_multimer_v3.npz ├── params_model_3.npz - ├── params_model_4_multimer_v2.npz + ├── params_model_3_ptm.npz ├── params_model_4_multimer_v3.npz ├── params_model_4.npz - ├── params_model_5_multimer_v2.npz + ├── params_model_4_ptm.npz ├── params_model_5_multimer_v3.npz - └── params_model_5.npz + ├── params_model_5.npz + └── params_model_5_ptm.npz ``` @@ -89,7 +85,6 @@ nextflow run nf-core/proteinfold \ --outdir \ --mode colabfold \ --colabfold_db \ - --colabfold_model_preset \ --use_msa_server \ --use_gpu \ -profile @@ -102,12 +97,13 @@ nextflow run nf-core/proteinfold \ See the [ColabFold](https://github.com/sokrypton/ColabFold) documentation for a full description of additional arguments. The arguments supported by the proteinfold workflow are described briefly below: -| Parameter | Default | Description | -| --------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `--colabfold_num_recycles` | `3` | The AlphaFold2 model used by ColabFold provides initial structure predictions as a recycled model input in an iterative refinement process. This parameter controls the number of times model outputs are recycled. Increasing the number of recycles has been found to improve performance for some challening cases. | -| `--colabfold_use_amber` | `true` | ColabFold outputs will sometimes contain phsyical violations such as steric clashes. These clashes can be resolved by post-processing the outputs with a short relaxation using the Amber Force Field. Non-clashing atoms are pinned to starting coordinates such that the relaxation has a minimal impact on final structures. | -| `--colabfold_db_load_mode` | `0` | Specify the way that MMSeqs2 will load the required databases in memory | -| `--colabfold_use_templates` | `false` | Use PDB templates to support predictions. The ColabFold notebooks do not use templates by default. | -| `--colabfold_create_index` | `false` | Create index for ColabFold databases during setup. On network filesystems it can be more performant to re-compute the index on the fly | +| Parameter | Default | Description | +| -------------------------------------- | ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `--colabfold_num_recycles` | `3` | The AlphaFold2 model used by ColabFold provides initial structure predictions as a recycled model input in an iterative refinement process. This parameter controls the number of times model outputs are recycled. Increasing the number of recycles has been found to improve performance for some challening cases. | +| `--colabfold_use_amber` | `true` | ColabFold outputs will sometimes contain phsyical violations such as steric clashes. These clashes can be resolved by post-processing the outputs with a short relaxation using the Amber Force Field. Non-clashing atoms are pinned to starting coordinates such that the relaxation has a minimal impact on final structures. | +| `--colabfold_db_load_mode` | `0` | Specify the way that MMSeqs2 will load the required databases in memory | +| `--colabfold_alphafold2_params_prefix` | `alphafold_params_2022-12-06` | Specify the alphafold2 params used for prediction. | +| `--colabfold_use_templates` | `false` | Use PDB templates to support predictions. The ColabFold notebooks do not use templates by default. | +| `--colabfold_create_index` | `false` | Create index for ColabFold databases during setup. On network filesystems it can be more performant to re-compute the index on the fly | > You can override any of these parameters via the command line or a params file. diff --git a/docs/usage/esmfold.md b/docs/usage/esmfold.md index ab5fea16c..b5a2a5e9f 100644 --- a/docs/usage/esmfold.md +++ b/docs/usage/esmfold.md @@ -13,7 +13,6 @@ nextflow run nf-core/proteinfold \ --input samplesheet.csv \ --outdir \ --mode esmfold \ - --esmfold_model_preset \ --esmfold_db \ --use_gpu \ -profile @@ -22,9 +21,6 @@ nextflow run nf-core/proteinfold \ > [!NOTE] > ESMFold does not require searching large sequence databases for sequences homologous to the prediction target and instead relies on a pre-trained protein language model (pLM) to inform predictions. -> [!WARNING] -> `--esmfold_model_preset` is used to infer how to handle multi-entry fasta files. Choosing `monomer` will result in a multi-entry fasta being processed as a series of monomer entries rather than as a single oligomeric complex. - ## File Structure The file structure of `--esmfold_db` must be as follows: diff --git a/main.nf b/main.nf index 9ddbee2c5..a77180d1a 100644 --- a/main.nf +++ b/main.nf @@ -35,19 +35,8 @@ include { ROSETTAFOLD2NA } from './workflows/rosettafold2na' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' -include { getColabfoldAlphafold2Params } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' -include { getColabfoldAlphafold2ParamsPath } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' include { POST_PROCESSING } from './subworkflows/local/post_processing' -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COLABFOLD PARAMETER VALUES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -params.colabfold_alphafold2_params_link = getColabfoldAlphafold2Params() -params.colabfold_alphafold2_params_path = getColabfoldAlphafold2ParamsPath() - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NAMED WORKFLOWS FOR PIPELINE @@ -261,7 +250,6 @@ workflow NFCORE_PROTEINFOLD { COLABFOLD ( ch_samplesheet, ch_versions, - params.colabfold_model_preset, PREPARE_COLABFOLD_DBS.out.params, PREPARE_COLABFOLD_DBS.out.colabfold_db, PREPARE_COLABFOLD_DBS.out.uniref30, diff --git a/modules/local/colabfold_batch/main.nf b/modules/local/colabfold_batch/main.nf index ee567e2d8..52e6d1c62 100644 --- a/modules/local/colabfold_batch/main.nf +++ b/modules/local/colabfold_batch/main.nf @@ -6,9 +6,7 @@ process COLABFOLD_BATCH { container "nf-core/proteinfold_colabfold:2.0.0" input: - tuple val(meta), path(fasta) - val colabfold_model_preset - path ('params/*') + tuple val(meta), path(fasta), path('params/*') path ('colabfold_db/*') path ('uniref30/*') val numRec @@ -49,7 +47,6 @@ process COLABFOLD_BATCH { $args \\ --num-recycle ${numRec} \\ --data \$PWD \\ - --model-type ${colabfold_model_preset} \\ ${fasta} \\ raw/ diff --git a/modules/local/extract_metrics/environment.yml b/modules/local/extract_metrics/environment.yml new file mode 100644 index 000000000..596ce5d42 --- /dev/null +++ b/modules/local/extract_metrics/environment.yml @@ -0,0 +1,9 @@ +channels: + - conda-forge + - bioconda +dependencies: + - python=3.11 + - numpy + - biopython + - jax + - jaxlib diff --git a/modules/local/extract_metrics/main.nf b/modules/local/extract_metrics/main.nf new file mode 100644 index 000000000..e6fc04b69 --- /dev/null +++ b/modules/local/extract_metrics/main.nf @@ -0,0 +1,77 @@ +/* + * Extract metrics from structure prediction serialized outputs + */ +process EXTRACT_METRICS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + + input: + tuple val(meta), path(raw), val(mode), path(features) + + output: + tuple val(meta), path("${meta.id}_plddt.tsv") , emit: multiqc + tuple val(meta), path("${meta.id}_alphafold2_msa.tsv") , emit: msa + tuple val(meta), path("${meta.id}_*_pae.tsv") , optional: true, emit: paes + tuple val(meta), path("${meta.id}_0_pae.tsv") , optional: true, emit: pae + tuple val(meta), path("${meta.id}_ptm.tsv") , optional: true, emit: ptms + tuple val(meta), path("${meta.id}_iptm.tsv") , optional: true, emit: iptms + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + if [[ "${mode}" != "alphafold2" ]]; then + echo "Unsupported mode for EXTRACT_METRICS: ${mode}" >&2 + exit 1 + fi + + # Handle both regular files and symlink-staged files from Nextflow work dirs. + mapfile -t ranked_structs < <(find -L . -name "ranked*.pdb" | sort) + if [[ "${'$'}{#ranked_structs[@]}" -eq 0 ]]; then + echo "Could not find ranked AlphaFold2 structures in raw output" >&2 + exit 1 + fi + + features_pkl=\$(find -L . -name "features.pkl" | head -n 1) + if [[ -z "\$features_pkl" && "${features}" != "NO_FILE" ]]; then + features_pkl="${features}" + fi + if [[ -z "\$features_pkl" ]]; then + echo "Could not find features.pkl in raw output" >&2 + exit 1 + fi + + mapfile -t pkl_files < <(find -L . -name "*.pkl" | sort) + + extract_metrics.py --name ${meta.id} \ + --pkls "\$features_pkl" "${'$'}{pkl_files[@]}" \ + --structs "${'$'}{ranked_structs[@]}" + + mv "${meta.id}_msa.tsv" "${meta.id}_alphafold2_msa.tsv" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python3 --version 2>/dev/null | sed 's/Python //g' || echo "unknown") + numpy: \$(python3 -c "import numpy; print(numpy.__version__)" 2>/dev/null || echo "unknown") + END_VERSIONS + """ + + stub: + """ + touch "${meta.id}_plddt.tsv" + touch "${meta.id}_alphafold2_msa.tsv" + touch "${meta.id}_0_pae.tsv" + touch "${meta.id}_ptm.tsv" + touch "${meta.id}_iptm.tsv" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python3 --version 2>/dev/null | sed 's/Python //g' || echo "unknown") + numpy: \$(python3 -c "import numpy; print(numpy.__version__)" 2>/dev/null || echo "unknown") + END_VERSIONS + """ +} diff --git a/modules/local/extract_metrics_helixfold3/main.nf b/modules/local/extract_metrics_helixfold3/main.nf new file mode 100644 index 000000000..1dd092a56 --- /dev/null +++ b/modules/local/extract_metrics_helixfold3/main.nf @@ -0,0 +1,57 @@ +/* + * Extract metrics from HelixFold3 outputs + */ +process EXTRACT_METRICS_HELIXFOLD3 { + tag "$meta.id" + label 'process_single' + + container "nf-core/proteinfold_helixfold3:2.0.0" + + input: + tuple val(meta), path(raw) + + output: + tuple val(meta), path("${meta.id}_plddt.tsv") , emit: multiqc + tuple val(meta), path("${meta.id}_helixfold3_msa.tsv") , emit: msa + tuple val(meta), path("${meta.id}_1_pae.tsv") , emit: pae + tuple val(meta), path("${meta.id}_*_pae.tsv") , emit: paes + tuple val(meta), path("${meta.id}_ptm.tsv") , emit: ptms + tuple val(meta), path("${meta.id}_iptm.tsv") , optional: true, emit: iptms + path ("versions.yml") , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + mamba run --name helixfold extract_metrics.py --name ${meta.id} \\ + --structs ${raw}/${raw.baseName}-rank*/predicted_structure.pdb \\ + --pkls "${raw}/final_features.pkl" \\ + --jsons ${raw}/${raw.baseName}-rank*/all_results.json + + mv "${meta.id}_msa.tsv" "${meta.id}_helixfold3_msa.tsv" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python3 --version 2>/dev/null | sed 's/Python //g' || echo "unknown") + END_VERSIONS + """ + + stub: + """ + touch "${meta.id}_plddt.tsv" + touch "${meta.id}_helixfold3_msa.tsv" + touch "${meta.id}_ptm.tsv" + touch "${meta.id}_iptm.tsv" + touch "${meta.id}_1_pae.tsv" + touch "${meta.id}_2_pae.tsv" + touch "${meta.id}_3_pae.tsv" + touch "${meta.id}_4_pae.tsv" + touch "${meta.id}_5_pae.tsv" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python3 --version 2>/dev/null | sed 's/Python //g' || echo "unknown") + END_VERSIONS + """ +} diff --git a/modules/local/run_alphafold2/main.nf b/modules/local/run_alphafold2/main.nf index eea900702..ea431d36c 100644 --- a/modules/local/run_alphafold2/main.nf +++ b/modules/local/run_alphafold2/main.nf @@ -9,9 +9,8 @@ process RUN_ALPHAFOLD2 { container "nf-core/proteinfold_alphafold2_standard:2.0.0" input: - tuple val(meta), path(fasta) + tuple val(meta), path(fasta), val(alphafold2_model_preset) val db_preset - val alphafold2_model_preset val uniref30_prefix path ('params/*') path ('bfd/*') @@ -26,16 +25,9 @@ process RUN_ALPHAFOLD2 { path ('uniprot/*') output: - path ("raw/**") , emit: raw + tuple val(meta), path ("raw/**") , emit: raw tuple val(meta), path ("${meta.id}_alphafold2.pdb") , emit: top_ranked_pdb tuple val(meta), path ("raw/ranked*.pdb") , emit: pdb - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc - tuple val(meta), path ("${meta.id}_alphafold2_msa.tsv") , emit: msa - // Note: alphafold2_model_preset == "monomer" the pae file won't exist, thus the optional - tuple val(meta), path ("${meta.id}_*_pae.tsv") , optional: true, emit: paes - tuple val(meta), path ("${meta.id}_0_pae.tsv") , optional: true, emit: pae - tuple val(meta), path ("${meta.id}_ptm.tsv") , optional: true, emit: ptms - tuple val(meta), path ("${meta.id}_iptm.tsv") , optional: true, emit: iptms path "versions.yml" , emit: versions when: @@ -76,12 +68,6 @@ process RUN_ALPHAFOLD2 { cp "${fasta.baseName}"/ranked_0.pdb ./"${meta.id}"_alphafold2.pdb - extract_metrics.py --name ${meta.id} \\ - --pkls ${fasta.baseName}/features.pkl ${fasta.baseName}/*.pkl \\ - --structs ${fasta.baseName}/ranked*.pdb - - mv "${meta.id}_msa.tsv" "${meta.id}_alphafold2_msa.tsv" - # Can't use fasta.baseName to batch outputs in publishDir mv "${fasta.baseName}" raw/ @@ -99,11 +85,6 @@ process RUN_ALPHAFOLD2 { stub: """ touch "${meta.id}_alphafold2.pdb" - touch "${meta.id}_plddt.tsv" - touch "${meta.id}_alphafold2_msa.tsv" - touch "${meta.id}_0_pae.tsv" - touch "${meta.id}_ptm.tsv" - touch "${meta.id}_iptm.tsv" mkdir "raw" touch "raw/ranked_0.pdb" touch "raw/ranked_1.pdb" diff --git a/modules/local/run_alphafold2/meta.yml b/modules/local/run_alphafold2/meta.yml new file mode 100644 index 000000000..c5dc9ec51 --- /dev/null +++ b/modules/local/run_alphafold2/meta.yml @@ -0,0 +1,170 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json + +name: "run_alphafold2" +description: Run AlphaFold2 to predict protein 3D structures from amino acid sequences within the nf-core/proteinfold pipeline. The module executes AlphaFold2 inference using pretrained models and sequence databases to generate predicted protein structures, multiple sequence alignments, and confidence metrics such as pLDDT and predicted aligned error (PAE). + +keywords: + - protein folding + - structure prediction + - deep learning + - alphafold + - msa + - pdb + +tools: + - "alphafold2": + description: DeepMind's deep learning-based protein structure prediction system that predicts accurate 3D protein structures from amino acid sequences using multiple sequence alignments and structural templates. + homepage: "https://github.com/google-deepmind/alphafold" + documentation: "https://github.com/google-deepmind/alphafold" + tool_dev_url: "https://github.com/google-deepmind/alphafold" + doi: "https://doi.org/10.1038/s41586-021-03819-2" + licence: "Apache-2.0" + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'protein1' ]` + - fasta: + type: file + description: Input FASTA file containing the protein sequence to be folded + pattern: "*.{fa,fasta}" + + - - db_preset: + type: string + description: | + AlphaFold2 database preset specifying whether full (full_dbs) or reduced databases (reduced_dbs) should be used + + - - alphafold2_model_preset: + type: string + description: | + AlphaFold2 model preset defining the prediction mode (e.g. monomer or multimer) + + - - uniref30_prefix: + type: string + description: Prefix used to locate the UniRef30 database + + - - params: + type: directory + description: AlphaFold2 model parameter files + + - - bfd: + type: directory + description: BFD sequence database used for MSA generation when running with full databases + + - - small_bfd: + type: directory + description: Reduced BFD database used for MSA generation when running with reduced databases + + - - mgnify: + type: directory + description: MGnify metagenomic sequence database used for MSA generation + + - - pdb70: + type: directory + description: PDB70 database used for structural template search in monomer mode + + - - pdb_mmcif: + type: directory + description: PDB mmCIF structure files used for structural templates + + - - uniref30: + type: directory + description: UniRef30 sequence database used for MSA generation + + - - uniref90: + type: directory + description: UniRef90 sequence database used for MSA generation + + - - pdb_seqres: + type: directory + description: PDB SeqRes database used for multimer template search + + - - uniprot: + type: directory + description: UniProt sequence database used for multimer predictions + +output: + top_ranked_structure: + - - meta: + type: map + description: Groovy Map containing sample information + - "*_alphafold2.pdb": + type: file + description: Top-ranked predicted protein structure generated by AlphaFold2 + pattern: "*.pdb" + + structures: + - - meta: + type: map + description: Groovy Map containing sample information + - "ranked*.pdb": + type: file + description: Predicted protein structures ranked by AlphaFold2 confidence + pattern: "*.pdb" + + raw_prediction_output: + - - raw: + type: directory + description: Raw AlphaFold2 prediction directory containing intermediate outputs including features, model predictions, and additional structure files + + msa_features: + - - meta: + type: map + description: Groovy Map containing sample information + - "*_alphafold2_msa.tsv": + type: file + description: Tab-separated file containing multiple sequence alignment statistics and metrics + pattern: "*.tsv" + + plddt: + - - meta: + type: map + description: Groovy Map containing sample information + - "*_plddt.tsv": + type: file + description: Per-residue pLDDT confidence scores extracted from AlphaFold2 predictions + pattern: "*.tsv" + + pae: + - - meta: + type: map + description: Groovy Map containing sample information + - "*_pae.tsv": + type: file + description: Predicted aligned error matrices representing pairwise residue confidence + pattern: "*.tsv" + + ptm: + - - meta: + type: map + description: Groovy Map containing sample information + - "*_ptm.tsv": + type: file + description: Predicted TM-score confidence metric produced by AlphaFold2 + pattern: "*.tsv" + + iptm: + - - meta: + type: map + description: Groovy Map containing sample information + - "*_iptm.tsv": + type: file + description: Interface predicted TM-score metric for multimer predictions + pattern: "*.tsv" + + versions: + - - "${task.process}": + type: string + description: Name of the executed process + - "alphafold2": + type: string + description: Version of AlphaFold2 used + +authors: + - "@sblaizerwize" + +maintainers: + - "@JoseEspinosa" diff --git a/modules/local/run_alphafold2_msa/main.nf b/modules/local/run_alphafold2_msa/main.nf index 551683cd5..4a4412afc 100644 --- a/modules/local/run_alphafold2_msa/main.nf +++ b/modules/local/run_alphafold2_msa/main.nf @@ -8,9 +8,8 @@ process RUN_ALPHAFOLD2_MSA { container "nf-core/proteinfold_alphafold2_msa:2.0.0" input: - tuple val(meta), path(fasta) + tuple val(meta), path(fasta), val(alphafold2_model_preset) val db_preset - val alphafold2_model_preset val uniref30_prefix path ('params/*') path ('bfd/*') diff --git a/modules/local/run_alphafold2_pred/main.nf b/modules/local/run_alphafold2_pred/main.nf index 30a581a32..872c950a4 100644 --- a/modules/local/run_alphafold2_pred/main.nf +++ b/modules/local/run_alphafold2_pred/main.nf @@ -9,8 +9,7 @@ process RUN_ALPHAFOLD2_PRED { container "nf-core/proteinfold_alphafold2_pred:2.0.0" input: - tuple val(meta), path(fasta), path(features) - val alphafold2_model_preset + tuple val(meta), path(fasta), path(features), val(alphafold2_model_preset) path ('params/*') path ('bfd/*') path ('small_bfd/*') @@ -24,16 +23,9 @@ process RUN_ALPHAFOLD2_PRED { path ('uniprot/*') output: - path ("raw/**") , emit: raw + tuple val(meta), path ("raw/**") , emit: raw tuple val(meta), path ("${meta.id}_alphafold2.pdb") , emit: top_ranked_pdb tuple val(meta), path ("raw/ranked*.pdb") , emit: pdb - tuple val(meta), path ("${meta.id}_alphafold2_msa.tsv") , emit: msa - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc - //Note: alphafold2_model_preset == "monomer" the pae file won't exist. - tuple val(meta), path ("${meta.id}_*_pae.tsv") , optional: true, emit: paes - tuple val(meta), path ("${meta.id}_0_pae.tsv") , optional: true, emit: pae - tuple val(meta), path ("${meta.id}_ptm.tsv") , optional: true, emit: ptms - tuple val(meta), path ("${meta.id}_iptm.tsv") , optional: true, emit: iptms path "versions.yml" , emit: versions when: @@ -55,12 +47,6 @@ process RUN_ALPHAFOLD2_PRED { cp "${fasta.baseName}"/ranked_0.pdb ./"${meta.id}"_alphafold2.pdb - extract_metrics.py --name ${meta.id} \\ - --pkls ${features} ${fasta.baseName}/*.pkl \\ - --structs ${fasta.baseName}/ranked*.pdb - - mv "${meta.id}_msa.tsv" "${meta.id}_alphafold2_msa.tsv" - # Can't use fasta.baseName to batch outputs in publishDir mv "${fasta.baseName}" raw/ @@ -78,9 +64,6 @@ process RUN_ALPHAFOLD2_PRED { stub: """ touch "${meta.id}_alphafold2.pdb" - touch "${meta.id}_plddt.tsv" - touch "${meta.id}_alphafold2_msa.tsv" - touch "${meta.id}_0_pae.tsv" mkdir "raw/" touch "raw/ranked_0.pdb" touch "raw/ranked_1.pdb" diff --git a/modules/local/run_boltz/meta.yml b/modules/local/run_boltz/meta.yml new file mode 100644 index 000000000..4472a3360 --- /dev/null +++ b/modules/local/run_boltz/meta.yml @@ -0,0 +1,129 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json + +name: "run_boltz" +description: Run Boltz to predict protein 3D structures from amino acid sequences within the nf-core/proteinfold pipeline. The module executes Boltz inference using pretrained checkpoints and produces predicted structures, multiple sequence alignments, and confidence metrics such as pLDDT and PAE. + +keywords: + - protein folding + - structure prediction + - deep learning + - alphafold-like + - msa + - pdb + +tools: + - "boltz": + description: Deep learning-based protein structure prediction framework that predicts 3D protein structures and associated confidence metrics from amino acid sequences. + homepage: "https://github.com/jwohlwend/boltz" + documentation: "https://www.biorxiv.org/content/10.1101/2025.06.14.659707v1" + tool_dev_url: "https://github.com/jwohlwend/boltz" + doi: "https://www.biorxiv.org/content/10.1101/2024.11.19.624167v1" + licence: "MIT" + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'protein1' ]` + - fasta: + type: file + description: Input FASTA file containing the protein sequence to be folded + pattern: "*.{fa,fasta}" + + - - files: + type: file + description: Optional cached files used during Boltz prediction + + - - boltz1_conf_ckpt: + type: file + description: Boltz pretrained checkpoint used for structure confidence prediction + pattern: "*.ckpt" + + - - ccd: + type: file + description: Chemical component dictionary used during structure generation + pattern: "*.pkl" + + - - boltz2_aff_ckpt: + type: file + description: Boltz pretrained checkpoint used for affinity prediction + pattern: "*.ckpt" + + - - boltz2_conf_ckpt: + type: file + description: Boltz pretrained checkpoint used for confidence prediction + pattern: "*.ckpt" + + - - mols: + type: file + description: Auxiliary molecular data used by Boltz during structure prediction + +output: + top_ranked_structure: + - - meta: + type: map + description: Groovy Map containing sample information + - "*_boltz.pdb": + type: file + description: Top-ranked predicted protein structure generated by Boltz + pattern: "*.pdb" + + structures: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.pdb": + type: file + description: Predicted protein structures from Boltz models + pattern: "*.pdb" + + msa_features: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.npz": + type: file + description: Processed multiple sequence alignment features used during prediction + pattern: "*.npz" + + confidence_metrics: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.json": + type: file + description: Prediction confidence metrics produced by Boltz + pattern: "*.json" + + plddt: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.npz": + type: file + description: Per-residue pLDDT confidence scores + pattern: "*.npz" + + pae: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.npz": + type: file + description: Predicted aligned error matrices representing pairwise uncertainty + pattern: "*.npz" + + versions: + - - "${task.process}": + type: string + description: Name of the executed process + - "boltz": + type: string + description: Version of Boltz used + +authors: + - "@sblaizerwize" +maintainers: + - "@JoseEspinosa" diff --git a/modules/local/run_helixfold3/main.nf b/modules/local/run_helixfold3/main.nf index b4bda4fc7..586fee50c 100644 --- a/modules/local/run_helixfold3/main.nf +++ b/modules/local/run_helixfold3/main.nf @@ -26,17 +26,10 @@ process RUN_HELIXFOLD3 { path ('maxit_src') output: - path ("raw/**") , emit: raw + tuple val(meta), path ("raw/**") , emit: raw tuple val(meta), path ("${meta.id}_helixfold3.pdb") , emit: top_ranked_pdb tuple val(meta), path ("${meta.id}_helixfold3.cif") , emit: main_cif - tuple val(meta), path ("raw/ranked*.pdb") , emit: pdb - tuple val(meta), path ("${meta.id}_plddt.tsv") , emit: multiqc - tuple val(meta), path ("${meta.id}_helixfold3_msa.tsv") , emit: msa - // If ${meta.id}-rank*/all_results.json" doesn't have PAE vales in the key, this will be empty - tuple val(meta), path ("${meta.id}_1_pae.tsv") , emit: pae - tuple val(meta), path ("${meta.id}_*_pae.tsv") , emit: paes - tuple val(meta), path ("${meta.id}_ptm.tsv") , emit: ptms - tuple val(meta), path ("${meta.id}_iptm.tsv") , optional: true, emit: iptms + tuple val(meta), path ("raw/ranked*.pdb") , emit: pdb path ("versions.yml") , emit: versions when: @@ -83,17 +76,10 @@ process RUN_HELIXFOLD3 { cp "${fasta.baseName}/${fasta.baseName}-rank1/predicted_structure.pdb" "./${meta.id}_helixfold3.pdb" cp "${fasta.baseName}/${fasta.baseName}-rank1/predicted_structure.cif" "./${meta.id}_helixfold3.cif" - mamba run --name helixfold extract_metrics.py --name ${meta.id} \\ - --structs ${fasta.baseName}/${fasta.baseName}-rank*/predicted_structure.pdb \\ - --pkls "${fasta.baseName}/final_features.pkl" \\ - --jsons ${fasta.baseName}/${fasta.baseName}-rank*/all_results.json - mkdir -p raw for i in 1 2 3 4 5; do cp "${fasta.baseName}/${fasta.baseName}-rank\$i/predicted_structure.pdb" "raw/ranked_\$i.pdb" done - - mv "${meta.id}_msa.tsv" "${meta.id}_helixfold3_msa.tsv" mv "${fasta.baseName}" raw/ cat <<-END_VERSIONS > versions.yml @@ -110,15 +96,6 @@ process RUN_HELIXFOLD3 { """ touch "${meta.id}_helixfold3.cif" touch "${meta.id}_helixfold3.pdb" - touch "${meta.id}_plddt.tsv" - touch "${meta.id}_helixfold3_msa.tsv" - touch "${meta.id}_ptm.tsv" - touch "${meta.id}_iptm.tsv" - touch "${meta.id}_1_pae.tsv" - touch "${meta.id}_2_pae.tsv" - touch "${meta.id}_3_pae.tsv" - touch "${meta.id}_4_pae.tsv" - touch "${meta.id}_5_pae.tsv" mkdir -p raw touch "raw/ranked_1.pdb" touch "raw/ranked_2.pdb" diff --git a/nextflow.config b/nextflow.config index 6fc57c216..93fb8b6a9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -25,7 +25,7 @@ params { alphafold2_mode = 'split_msa_prediction' // {standard, split_msa_prediction} alphafold2_max_template_date = '2038-01-19' alphafold2_full_dbs = null // true full_dbs, false reduced_dbs - alphafold2_model_preset = 'monomer_ptm' // for AF2 {monomer, monomer_casp14, monomer_ptm, multimer} + alphafold2_model_preset = 'monomer_ptm' // single-entry FASTA only: {monomer, monomer_casp14, monomer_ptm} alphafold2_db = null alphafold2_random_seed = null @@ -103,7 +103,6 @@ params { boltz2_mols_path = null // Colabfold parameters - colabfold_model_preset = "alphafold2_ptm" // {'alphafold2_ptm', 'alphafold2_multimer_v1', 'alphafold2_multimer_v2', 'alphafold2_multimer_v3'} colabfold_num_recycles = 3 colabfold_use_amber = true colabfold_db = null @@ -112,16 +111,17 @@ params { colabfold_create_index = false // Colabfold links - colabfold_db_link = null - colabfold_uniref30_link = null + colabfold_db_link = null + colabfold_uniref30_link = null + colabfold_alphafold2_params_link = null // Colabfold paths - colabfold_envdb_path = null - colabfold_uniref30_path = null + colabfold_envdb_path = null + colabfold_uniref30_path = null + colabfold_alphafold2_params_path = null // samplesheets with multimers must be >2021-07 // Esmfold parameters esmfold_db = null - esmfold_model_preset = "monomer" esmfold_num_recycles = 4 // Esmfold links diff --git a/nextflow_schema.json b/nextflow_schema.json index 5535e40e3..fc9cf4974 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -130,8 +130,8 @@ "alphafold2_model_preset": { "type": "string", "default": "monomer_ptm", - "description": "Model preset for 'AlphaFold2' mode", - "enum": ["monomer", "monomer_casp14", "monomer_ptm", "multimer"], + "description": "Model preset used only for single-entry FASTA inputs in 'AlphaFold2' mode", + "enum": ["monomer", "monomer_casp14", "monomer_ptm"], "fa_icon": "fas fa-stream" }, "alphafold2_random_seed": { @@ -158,18 +158,6 @@ "fa_icon": "fas fa-coins", "description": "ColabFold options.", "properties": { - "colabfold_model_preset": { - "type": "string", - "default": "alphafold2_ptm", - "description": "Model preset for 'colabfold' mode", - "enum": [ - "alphafold2_ptm", - "alphafold2_multimer_v1", - "alphafold2_multimer_v2", - "alphafold2_multimer_v3" - ], - "fa_icon": "fas fa-stream" - }, "colabfold_num_recycles": { "type": "integer", "default": 3, @@ -202,6 +190,18 @@ "type": "boolean", "description": "Create databases indexes when running colabfold_local mode", "fa_icon": "fas fa-bezier-curve" + }, + "colabfold_params_prefix": { + "type": "string", + "default": "alphafold_params_2022-12-06", + "description": "Alphafold2 parameters version", + "enum": [ + "alphafold_params_2022-12-06", + "alphafold_params_2022-03-02", + "alphafold_params_2022-01-19", + "alphafold_params_2021-07-14" + ], + "fa_icon": "fas fa-database" } } }, @@ -219,13 +219,6 @@ "minimum": 1, "maximum": 20, "errorMessage": "Number of recycles must be a whole number between 1 and 20" - }, - "esmfold_model_preset": { - "type": "string", - "description": "Specifies whether is a 'monomer' or 'multimer' prediction", - "enum": ["monomer", "multimer"], - "fa_icon": "fas fa-stream", - "default": "monomer" } } }, @@ -706,8 +699,9 @@ }, "colabfold_alphafold2_params_link": { "type": "string", - "description": "Link to the Alphafold2 parameters for Colabfold", - "fa_icon": "fas fa-link" + "description": "Link to the Alphafold2 monomer parameters for ColabFold", + "fa_icon": "fas fa-link", + "default": "https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar" } } }, @@ -739,13 +733,9 @@ }, "colabfold_alphafold2_params_path": { "type": "string", - "description": "Link to the Alphafold2 parameters for Colabfold", - "fa_icon": "fas fa-folder-open" - }, - "colabfold_alphafold2_params_tags": { - "type": "object", - "description": "Dictionary with Alphafold2 parameters tags", - "fa_icon": "fas fa-stream" + "description": "Path to the AlphaFold2 parameters for Colabfold", + "fa_icon": "fas fa-folder-open", + "default": "null/params/alphafold_params_2022-12-06" } } }, diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 8f9eec03d..c83af9c22 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -21,9 +21,9 @@ { "@id": "./", "@type": "Dataset", - "creativeWorkStatus": "Stable", - "datePublished": "2026-03-12T11:05:10+00:00", - "description": "

\n \n \n \"nf-core/proteinfold\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinfold)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinfold/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinfold/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinfold/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.13135393-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.13135393)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinfold)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinfold-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinfold)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinfold** is a bioinformatics best-practice analysis pipeline for Protein 3D structure prediction.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/proteinfold/results).\n\n## Pipeline summary\n\n![Alt text](docs/images/nf-core-proteinfold_metro_map_1.1.0.png?raw=true \"nf-core-proteinfold 1.1.0 metro map\")\n\n1. Choice of protein structure prediction method:\n\n i. [AlphaFold2](https://github.com/deepmind/alphafold) - Regular AlphaFold2 (MSA computation and model inference in the same process)\n\n ii. [AlphaFold2 split](https://github.com/luisas/alphafold_split) - AlphaFold2 MSA computation and model inference in separate processes\n\n iii. [AlphaFold3](https://github.com/deepmind/alphafold) - Regular AlphaFold3 (MSA computation and model inference in the same process)\n\n iv. [ColabFold](https://github.com/sokrypton/ColabFold) - MMseqs2 API server followed by ColabFold\n\n v. [ColabFold](https://github.com/sokrypton/ColabFold) - MMseqs2 local search followed by ColabFold\n\n vi. [ESMFold](https://github.com/facebookresearch/esm) - Regular ESM\n\n vii. [RoseTTAFold-All-Atom](https://github.com/baker-laboratory/RoseTTAFold-All-Atom/) - Regular RFAA\n\n viii. [HelixFold3](https://github.com/PaddlePaddle/PaddleHelix/tree/dev/apps/protein_folding/helixfold3) - Regular HF3\n\n ix. [Boltz](https://github.com/jwohlwend/boltz/) - Regular Boltz-1\n\n x. [RosettaFold2NA](https://github.com/uw-ipd/RoseTTAFold2NA) - Regular RF2NA\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinfold \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\nThe pipeline takes care of downloading the databases and parameters required by AlphaFold2, Colabfold, ESMFold RoseTTAFold-All-Atom or RosettaFold2NA. In case you have already downloaded the required files, you can skip this step by providing the path to the databases using the corresponding parameter [`--alphafold2_db`], [`--colabfold_db`], [`--esmfold_db`] or ['--rosettafold_all_atom_db']. Please refer to the [usage documentation](https://nf-co.re/proteinfold/usage) to check the directory structure you must provide for each database.\n\n- The typical command to run AlphaFold2 mode is shown below:\n\n ```console\n nextflow run nf-core/proteinfold \\\n --input samplesheet.csv \\\n --outdir \\\n --mode alphafold2 \\\n --alphafold2_db \\\n --alphafold2_full_dbs \\\n --alphafold2_model_preset monomer \\\n --use_gpu \\\n -profile \n ```\n\n- Here is the command to run AlphaFold2 splitting the MSA from the prediction execution:\n\n ```console\n nextflow run nf-core/proteinfold \\\n --input samplesheet.csv \\\n --outdir \\\n --mode alphafold2 \\\n --alphafold2_mode split_msa_prediction \\\n --alphafold2_db \\\n --alphafold2_full_dbs \\\n --alphafold2_model_preset monomer \\\n --use_gpu \\\n -profile \n ```\n\n- The AlphaFold3 mode can be run using the command below:\n\n ```console\n nextflow run nf-core/proteinfold \\\n --input samplesheet.csv \\\n --outdir \\\n --mode alphafold3 \\\n --alphafold3_db \\\n --use_gpu \\\n -profile \n ```\n\n > [!WARNING]\n > The AlphaFold3 weights are not provided by this pipeline. Users must obtain the weights directly from DeepMind according to their [terms of use](https://github.com/deepmind/alphafold/blob/main/WEIGHTS_TERMS_OF_USE.md) and [prohibited use policy](https://github.com/deepmind/alphafold/blob/main/WEIGHTS_PROHIBITED_USE_POLICY.md). Please ensure you comply with all terms and conditions before using AlphaFold3. For more information about AlphaFold3 usage and requirements, please refer to the [official AlphaFold3 repository](https://github.com/deepmind/alphafold).\n\n- Below, the command to run colabfold_local mode:\n\n ```console\n nextflow run nf-core/proteinfold \\\n --input samplesheet.csv \\\n --outdir \\\n --mode colabfold \\\n --colabfold_db \\\n --num_recycles_colabfold 3 \\\n --use_amber \\\n --colabfold_model_preset \"alphafold2_ptm\" \\\n --use_gpu \\\n --db_load_mode 0\n -profile \n ```\n\n- The typical command to run colabfold_webserver mode would be:\n\n ```console\n nextflow run nf-core/proteinfold \\\n --input samplesheet.csv \\\n --outdir \\\n --mode colabfold \\\n --use_msa_server \\\n --msa_server_url \\\n --colabfold_db \\\n --num_recycles_colabfold 3 \\\n --use_amber \\\n --colabfold_model_preset \"alphafold2_ptm\" \\\n --use_gpu \\\n -profile \n ```\n\n > [!WARNING]\n > If you aim to carry out a large amount of predictions using the colabfold_webserver mode, please setup and use your own custom MMSeqs2 API Server. You can find instructions [here](https://github.com/sokrypton/ColabFold/tree/main/MsaServer).\n\n- The esmfold mode can be run using the command below:\n\n ```console\n nextflow run nf-core/proteinfold \\\n --input samplesheet.csv \\\n --outdir \\\n --mode esmfold \\\n --esmfold_model_preset \\\n --esmfold_db \\\n --num_recycles_esmfold 4 \\\n --use_gpu \\\n -profile \n ```\n\n- The rosettafold_all_atom mode can be run using the command below:\n\n ```console\n nextflow run nf-core/proteinfold \\\n --input samplesheet.csv \\\n --outdir \\\n --mode rosettafold_all_atom \\\n --rosettafold_all_atom_db \\\n --use_gpu \\\n -profile \n ```\n\n- The helixfold3 mode can be run using the command below:\n\n ```console\n nextflow run nf-core/proteinfold \\\n --input samplesheet.csv \\\n --outdir \\\n --mode helixfold3 \\\n --helixfold3_db \\\n --use_gpu \\\n -profile \n ```\n\n- The RosettaFold2NA mode can be run using the command below:\n\n ```console\n nextflow run nf-core/proteinfold \\\n --input samplesheet.csv \\\n --outdir \\\n --mode rosettafold2na \\\n --rosettafold2na_db \\\n --use_gpu \\\n -profile \n ```\n\n- The boltz mode can be run using the command below:\n\n ```console\n nextflow run nf-core/proteinfold \\\n --input samplesheet.csv \\\n --outdir \\\n --mode boltz \\\n --boltz_ccd_path \\\n --boltz_model_path \\\n --use_gpu \\\n -profile \n ```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinfold/usage) and the [parameter documentation](https://nf-co.re/proteinfold/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinfold/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinfold/output).\n\n## Adding new modes to the pipeline\n\nFor details on how to contribute new modes to the pipeline please refer to the [Howto contribute new modes](https://nf-co.re/proteinfold/usage/HOWTO_CONTRIBUTE_NEW_MODES).\n\n## Credits\n\nnf-core/proteinfold was originally written by Athanasios Baltzis ([@athbaltzis](https://github.com/athbaltzis)), Jose Espinosa-Carrasco ([@JoseEspinosa](https://github.com/JoseEspinosa)), Luisa Santus ([@luisas](https://github.com/luisas)) and Leila Mansouri ([@l-mansouri](https://github.com/l-mansouri)) from [The Comparative Bioinformatics Group](https://www.crg.eu/en/cedric_notredame) at [The Centre for Genomic Regulation, Spain](https://www.crg.eu/) under the umbrella of the [BovReg project](https://www.bovreg.eu/) and Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [Seqera Labs, Spain](https://seqera.io/).\n\nMany thanks to others who have helped out and contributed along the way too, including (but not limited to): Norman Goodacre and Waleed Osman from Interline Therapeutics ([@interlinetx](https://github.com/interlinetx)), Martin Steinegger ([@martin-steinegger](https://github.com/martin-steinegger)) and Raoul J.P. Bonnal ([@rjpbonnal](https://github.com/rjpbonnal))\n\nWe would also like to thanks to the AWS Open Data Sponsorship Program for generously providing the resources necessary to host the data utilized in the testing, development, and deployment of nf-core proteinfold.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinfold` channel](https://nfcore.slack.com/channels/proteinfold) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinfold for your analysis, please cite it using the following doi: [10.5281/zenodo.7437038](https://doi.org/10.5281/zenodo.7437038)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "creativeWorkStatus": "InProgress", + "datePublished": "2026-01-13T11:55:37+00:00", + "description": "

\n \n \n \"nf-core/proteinfold\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinfold)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinfold/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinfold/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinfold/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.13135393-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.13135393)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinfold)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinfold-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinfold)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinfold** is a bioinformatics best-practice analysis pipeline for Protein 3D structure prediction.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/proteinfold/results).\n\n## Pipeline summary\n\n![Alt text](docs/images/nf-core-proteinfold_metro_map_2.0.0.png?raw=true \"nf-core-proteinfold 2.0.0 metro map\")\n\n| Mode | Protein | RNA | Small-molecule | PTM | Constraints | pLM | MSA server | Split MSA |\n| :------------------------------------------------------------------------------------------------- | :-----: | :-: | :------------: | :-: | :---------: | :-: | :--------: | :-------: |\n| [AlphaFold2](https://github.com/deepmind/alphafold) | \u2705 | \u274c | \u274c | \u274c | \u274c | \u274c | \u274c | \u2705 |\n| [ESMFold](https://github.com/facebookresearch/esm) | \u2705 | \u274c | \u274c | \u274c | \u274c | \u2705 | \u274c | \u274c |\n| [ColabFold](https://github.com/sokrypton/ColabFold) | \u2705 | \u274c | \u274c | \u274c | \u274c | \u274c | \u2705 | \u2705 |\n| [RoseTTAFold2NA](https://github.com/uw-ipd/RoseTTAFold2NA) | \u2705 | \u2705 | \u274c | \u274c | \u274c | \u274c | \u274c | \u274c |\n| [RoseTTAFold-All-Atom](https://github.com/baker-laboratory/RoseTTAFold-All-Atom/) | \u2705 | \u2705 | \u2705 | \u2705 | \u274c | \u274c | \u274c | \u274c |\n| [AlphaFold3](https://github.com/google-deepmind/alphafold3) | \u2705 | \u2705 | \u2705 | \u2705 | \u274c | \u274c | \u274c | \u274c |\n| [HelixFold3](https://github.com/PaddlePaddle/PaddleHelix/tree/dev/apps/protein_folding/helixfold3) | \u2705 | \u2705 | \u2705 | \u2705 | \u274c | \u274c | \u274c | \u274c |\n| [Boltz](https://github.com/jwohlwend/boltz/) | \u2705 | \u2705 | \u2705 | \u2705 | \u2705 | \u274c | \u2705 | \u2705 |\n\n**nf-core/proteinfold** supports multiple tools for general molecular structure prediction. Each of the methods have overlapping functionality which can be utilized within the pipeline. All tools support predicting protein structure from an input amino acid sequence. The pipeline is composed of the following steps:\n\n1. Split input fasta file (Optional): The pipeline can split large batches of monomeric sequences (eg an entire genome) from a multi-entry fasta input using the `--split_fasta` flag.\n\n2. Prepare databases for chosen methods: The pipeline downloads any required reference data.\n\n3. Structure prediction:\n\n i. Combined: MSA Search + Model Inference: Structures are predicted from MSAs generated using built-in homolog search pipelines.\n\n ii. Split: AlphaFold2 MSA Search + Model Inference: The AlphaFold2 MSA generation pipeline is executed independently and then provided as input for AlphaFold2 structure prediction.\n\n iii. Split: ColabFold MSA Search + Model Inference: The ColabFold MSA generation pipeline is used to produce input MSAs which can be used by ColabFold and Boltz.\n\n iv. pLM: Protein Language Model: The ESMFold model is used to predict structures without generating an MSA.\n\n4. Generate Report: The pipeline produces an interactive HTML report to visualize structure prediction outputs.\n\n5. Comparison Report: The structures predicted by parallel modes are combined in an interactive HTML report.\n\n6. MultiQC: The overall QC statistics are summarized.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n```csv title=\"samplesheet.csv\"\nid,fasta\nT1024,T1024.fasta\nT1026,T1026.fasta\n```\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinfold \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode \n```\n\nThe pipeline takes care of downloading the databases and parameters required by each of the modes. In case you have already downloaded the required files, you can skip this step by providing the path to the databases using the `--db` parameter.\n\n```bash\nnextflow run nf-core/proteinfold \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode \\\n --db \n```\n\n> [!WARNING]\n> The reference data for most methods is extremely large and may exceed individual user disk allocations on shared HPC systems.\n\nIn order to run multiple methods simultaneously where reference data is stored at different locations, the `--db` flag can be overwritten for each specific mode (e.g. `--alphafold2_db`, `--colabfold_db`, `--esmfold_db` and `--rosettafold_all_atom_db`). Please refer to the [usage documentation](https://nf-co.re/proteinfold/usage) to check the directory structure you must provide for each database.\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinfold/usage) and the [parameter documentation](https://nf-co.re/proteinfold/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinfold/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinfold/output).\n\n## Adding new modes to the pipeline\n\nFor details on how to contribute new modes to the pipeline please refer to the [Howto contribute new modes](https://nf-co.re/proteinfold/usage/HOWTO_CONTRIBUTE_NEW_MODES).\n\n## Credits\n\nnf-core/proteinfold was originally written by Athanasios Baltzis ([@athbaltzis](https://github.com/athbaltzis)), Jose Espinosa-Carrasco ([@JoseEspinosa](https://github.com/JoseEspinosa)), Luisa Santus ([@luisas](https://github.com/luisas)) and Leila Mansouri ([@l-mansouri](https://github.com/l-mansouri)) from [The Comparative Bioinformatics Group](https://www.crg.eu/en/cedric_notredame) at [The Centre for Genomic Regulation, Spain](https://www.crg.eu/) under the umbrella of the [BovReg project](https://www.bovreg.eu/) and Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [Seqera Labs, Spain](https://seqera.io/).\n\nMany thanks to others who have helped out and contributed along the way too, including (but not limited to): Norman Goodacre and Waleed Osman from Interline Therapeutics ([@interlinetx](https://github.com/interlinetx)), Martin Steinegger ([@martin-steinegger](https://github.com/martin-steinegger)) and Raoul J.P. Bonnal ([@rjpbonnal](https://github.com/rjpbonnal))\n\nWe would also like to thanks to the AWS Open Data Sponsorship Program for generously providing the resources necessary to host the data utilized in the testing, development, and deployment of nf-core proteinfold.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinfold` channel](https://nfcore.slack.com/channels/proteinfold) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinfold for your analysis, please cite it using the following doi: [10.5281/zenodo.7437038](https://doi.org/10.5281/zenodo.7437038)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" diff --git a/subworkflows/local/prepare_colabfold_dbs.nf b/subworkflows/local/prepare_colabfold_dbs.nf index 78e5d750b..43b03bb42 100644 --- a/subworkflows/local/prepare_colabfold_dbs.nf +++ b/subworkflows/local/prepare_colabfold_dbs.nf @@ -13,10 +13,10 @@ workflow PREPARE_COLABFOLD_DBS { take: colabfold_db // directory: path/to/colabfold/DBs and params use_msa_server // bool: Specifies whether to use web msa server - colabfold_alphafold2_params_path // directory: /path/to/colabfold/alphafold2/params/ + colabfold_alphafold2_params_path // directory: /path/to/colabfold/params/ colabfold_envdb_path // directory: /path/to/colabfold/db/ colabfold_uniref30_path // directory: /path/to/uniref30/colabfold/ - colabfold_alphafold2_params_link // string: Specifies the link to download colabfold alphafold2 params + colabfold_alphafold2_params_link // string: Specifies the link to download colabfold params colabfold_db_link // string: Specifies the link to download colabfold db colabfold_uniref30_link // string: Specifies the link to download uniref30 colabfold_create_index // boolean: Create index for colabfold db @@ -28,7 +28,7 @@ workflow PREPARE_COLABFOLD_DBS { ch_versions = channel.empty() if (colabfold_db) { - ch_params = channel.value(file(colabfold_alphafold2_params_path, type: 'any', checkIfExists: true)) + ch_params = channel.value(file(colabfold_alphafold2_params_path, type: 'any', checkIfExists: true)) if (!use_msa_server) { ch_colabfold_db = channel.value(file(colabfold_envdb_path, type: 'any', checkIfExists: true)) ch_uniref30 = channel.value(file(colabfold_uniref30_path, type: 'any', checkIfExists: true)) @@ -38,10 +38,13 @@ workflow PREPARE_COLABFOLD_DBS { ARIA2_COLABFOLD_PARAMS ( colabfold_alphafold2_params_link ) + ch_params = ARIA2_COLABFOLD_PARAMS .out .db - .map { dir -> dir.listFiles().findAll { it -> it.isFile() } } + .map { + dir -> dir.listFiles().findAll { it -> it.isFile() } + } ch_versions = ch_versions.mix(ARIA2_COLABFOLD_PARAMS.out.versions) diff --git a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf index 5ef5e402e..0fe23fcc0 100644 --- a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf @@ -108,16 +108,17 @@ ${colors.purple} nf-core/rnaseq ${workflow.manifest.version}${colors.reset} } if (params.split_fasta) { - ch_samplesheet.map { _meta, fasta -> + ch_samplesheet = ch_samplesheet.map { meta, fasta -> validateFasta(fasta) + [meta, fasta] } // Split the fasta file into individual files for each sequence ch_samplesheet - .map { _meta,fasta -> fasta } - .splitFasta( record: [header: true, sequence: true] ) - .collectFile { item -> - [ "${cleanHeader(item["header"])}.fa", ">" + cleanHeader(item["header"]) + '\n' +item["sequence"] ] + .map { meta, fasta -> [meta.id, fasta] } + .splitFasta( record: [header: true, sequence: true], elem: 1 ) + .collectFile { sample_id, item -> + [ "${cleanHeader(sample_id.toString())}_${cleanHeader(item["header"])}.fa", ">" + cleanHeader(item["header"]) + '\n' +item["sequence"] ] } .map { file -> [[id: file.baseName], file] @@ -193,33 +194,6 @@ def validateInputParameters() { } } -// -// Get link to Colabfold Alphafold2 parameters -// -def getColabfoldAlphafold2Params() { - def link = null - if (params.colabfold_alphafold2_params_tags) { - if (params.colabfold_alphafold2_params_tags.containsKey(params.colabfold_model_preset.toString())) { - link = "https://storage.googleapis.com/alphafold/" + params.colabfold_alphafold2_params_tags[ params.colabfold_model_preset.toString() ] + '.tar' - } - } - return link -} - -// -// Get path to Colabfold Alphafold2 parameters -// -def getColabfoldAlphafold2ParamsPath() { - def path = null - params.colabfold_model_preset.toString() - if (params.colabfold_alphafold2_params_tags) { - if (params.colabfold_alphafold2_params_tags.containsKey(params.colabfold_model_preset.toString())) { - path = "${params.colabfold_db}/params/" + params.colabfold_alphafold2_params_tags[ params.colabfold_model_preset.toString() ] - } - } - return path -} - def modeChannel(ch, mode) { return ch.map { meta, value -> def meta_clone = meta.clone() @@ -228,6 +202,16 @@ def modeChannel(ch, mode) { } } +def countMolecularEntitiesInFasta(fasta) { + return fasta.text + .readLines() + .count { line -> line.trim().startsWith('>') } +} + +def resolveModelPresetByFastaEntities(fasta, monomerPreset, multimerPreset = 'multimer') { + return countMolecularEntitiesInFasta(fasta) > 1 ? multimerPreset : monomerPreset +} + // // Generate methods description for MultiQC // @@ -296,11 +280,13 @@ def cleanHeader(header) { .replaceAll("/","_") .replaceAll(",", "") .replaceAll(";","") + .replaceAll("\\|","_") } def validateFasta(fasta) { + def lines = fasta.text.readLines() // extract headers - def headers = fasta.findAll { it -> it.startsWith('>') } + def headers = lines.findAll { it -> it.startsWith('>') } // if headers are not unique, throw an error if (headers.size() != headers.unique().size()) { throw new Exception("Invalid FASTA file. The headers are not unique.") diff --git a/tests/alphafold2_download.nf.test.snap b/tests/alphafold2_download.nf.test.snap index 48d464d74..0da3496c0 100644 --- a/tests/alphafold2_download.nf.test.snap +++ b/tests/alphafold2_download.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test_alphafold2_download": { "content": [ - 25, + 23, { "ARIA2": { "aria2": null @@ -16,10 +16,6 @@ "sed": 4.9, "rsync": "3.3.0" }, - "GENERATE_REPORT": { - "python": "3.12.7", - "generate_report.py": "Python 3.12.7" - }, "RUN_ALPHAFOLD2": { "python": "unknown", "alphafold2": "unknown", @@ -56,20 +52,6 @@ "DBs/alphafold2/uniref90/uniref90.fasta", "alphafold2", "alphafold2/standard", - "alphafold2/standard/T1024", - "alphafold2/standard/T1024/T1024_alphafold2_msa.tsv", - "alphafold2/standard/T1024/T1024_iptm.tsv", - "alphafold2/standard/T1024/T1024_plddt.tsv", - "alphafold2/standard/T1024/T1024_ptm.tsv", - "alphafold2/standard/T1024/paes", - "alphafold2/standard/T1024/paes/T1024_0_pae.tsv", - "alphafold2/standard/T1026", - "alphafold2/standard/T1026/T1026_alphafold2_msa.tsv", - "alphafold2/standard/T1026/T1026_iptm.tsv", - "alphafold2/standard/T1026/T1026_plddt.tsv", - "alphafold2/standard/T1026/T1026_ptm.tsv", - "alphafold2/standard/T1026/paes", - "alphafold2/standard/T1026/paes/T1026_0_pae.tsv", "alphafold2/standard/top_ranked_structures", "alphafold2/standard/top_ranked_structures/T1024.pdb", "alphafold2/standard/top_ranked_structures/T1026.pdb", @@ -78,9 +60,7 @@ "multiqc/alphafold2_multiqc_plots", "multiqc/alphafold2_multiqc_report.html", "pipeline_info", - "pipeline_info/nf_core_proteinfold_software_mqc_versions.yml", - "reports", - "reports/test_alphafold2_report.html" + "pipeline_info/nf_core_proteinfold_software_mqc_versions.yml" ], [ "mgy_clusters.fa:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -103,26 +83,15 @@ "file.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ], "uniref90.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1024_alphafold2_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1024_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1024_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1024_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1024_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1026_alphafold2_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1026_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1026_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1026_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1026_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "alphafold2_multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e", - "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" + "alphafold2_multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-03-12T16:09:42.635737909" + "timestamp": "2026-03-19T10:32:36.607419477" } } \ No newline at end of file diff --git a/tests/alphafold2_split.nf.test.snap b/tests/alphafold2_split.nf.test.snap index ef128e119..3b9bad862 100644 --- a/tests/alphafold2_split.nf.test.snap +++ b/tests/alphafold2_split.nf.test.snap @@ -1,8 +1,12 @@ { "-profile test_alphafold2_split": { "content": [ - 7, + 9, { + "EXTRACT_METRICS_AF2_PRED": { + "python": "3.11.14", + "numpy": "1.24.3" + }, "GENERATE_REPORT": { "python": "3.12.7", "generate_report.py": "Python 3.12.7" @@ -30,14 +34,18 @@ "alphafold2/split_msa_prediction", "alphafold2/split_msa_prediction/T1024", "alphafold2/split_msa_prediction/T1024/T1024_alphafold2_msa.tsv", + "alphafold2/split_msa_prediction/T1024/T1024_iptm.tsv", "alphafold2/split_msa_prediction/T1024/T1024_plddt.tsv", + "alphafold2/split_msa_prediction/T1024/T1024_ptm.tsv", "alphafold2/split_msa_prediction/T1024/msa", "alphafold2/split_msa_prediction/T1024/msa/features.pkl", "alphafold2/split_msa_prediction/T1024/paes", "alphafold2/split_msa_prediction/T1024/paes/T1024_0_pae.tsv", "alphafold2/split_msa_prediction/T1026", "alphafold2/split_msa_prediction/T1026/T1026_alphafold2_msa.tsv", + "alphafold2/split_msa_prediction/T1026/T1026_iptm.tsv", "alphafold2/split_msa_prediction/T1026/T1026_plddt.tsv", + "alphafold2/split_msa_prediction/T1026/T1026_ptm.tsv", "alphafold2/split_msa_prediction/T1026/msa", "alphafold2/split_msa_prediction/T1026/msa/features.pkl", "alphafold2/split_msa_prediction/T1026/paes", @@ -56,11 +64,15 @@ ], [ "T1024_alphafold2_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "features.pkl:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_alphafold2_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "features.pkl:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -70,9 +82,9 @@ ] ], "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-03-12T16:12:35.47858571" + "timestamp": "2026-03-19T10:44:23.044329052" } } \ No newline at end of file diff --git a/tests/alphafold3.nf.test.snap b/tests/alphafold3.nf.test.snap index b24775c3e..f3b915061 100644 --- a/tests/alphafold3.nf.test.snap +++ b/tests/alphafold3.nf.test.snap @@ -105,9 +105,9 @@ ] ], "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-03-12T16:14:34.760694724" + "timestamp": "2026-03-19T10:45:00.535114114" } } \ No newline at end of file diff --git a/tests/boltz.nf.test.snap b/tests/boltz.nf.test.snap index 7e776118e..da54ec2c6 100644 --- a/tests/boltz.nf.test.snap +++ b/tests/boltz.nf.test.snap @@ -95,9 +95,9 @@ ] ], "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-03-12T16:15:43.464789542" + "timestamp": "2026-03-19T10:45:31.65078569" } } \ No newline at end of file diff --git a/tests/colabfold_download.nf.test.snap b/tests/colabfold_download.nf.test.snap index 08fcb40c4..90f7a0809 100644 --- a/tests/colabfold_download.nf.test.snap +++ b/tests/colabfold_download.nf.test.snap @@ -25,7 +25,7 @@ "DBs", "DBs/colabfold", "DBs/colabfold/params", - "DBs/colabfold/params/alphafold_params_2021-07-14", + "DBs/colabfold/params/alphafold_params_2022-12-06", "colabfold", "colabfold/T1024", "colabfold/T1024/T1024_colabfold_msa.tsv", @@ -72,9 +72,9 @@ ] ], "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-03-12T16:17:44.266981229" + "timestamp": "2026-03-19T10:45:50.324882546" } } \ No newline at end of file diff --git a/tests/colabfold_local.nf.test.snap b/tests/colabfold_local.nf.test.snap index a806bc9cd..458b30647 100644 --- a/tests/colabfold_local.nf.test.snap +++ b/tests/colabfold_local.nf.test.snap @@ -66,9 +66,9 @@ ] ], "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-03-12T16:16:34.340030626" + "timestamp": "2026-03-19T10:46:08.625591792" } } \ No newline at end of file diff --git a/tests/colabfold_webserver.nf.test.snap b/tests/colabfold_webserver.nf.test.snap index 4354b1bbd..b27d4f5ee 100644 --- a/tests/colabfold_webserver.nf.test.snap +++ b/tests/colabfold_webserver.nf.test.snap @@ -62,9 +62,9 @@ ] ], "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-03-12T16:20:48.487472959" + "timestamp": "2026-03-19T10:46:25.311723197" } } \ No newline at end of file diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 2cd8ceba2..7cfc6becd 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -2,10 +2,6 @@ "-profile test": { "content": [ { - "GENERATE_REPORT": { - "python": "3.12.7", - "generate_report.py": "Python 3.12.7" - }, "RUN_ALPHAFOLD2": { "python": "unknown", "alphafold2": "unknown", @@ -21,20 +17,6 @@ [ "alphafold2", "alphafold2/standard", - "alphafold2/standard/T1024", - "alphafold2/standard/T1024/T1024_alphafold2_msa.tsv", - "alphafold2/standard/T1024/T1024_iptm.tsv", - "alphafold2/standard/T1024/T1024_plddt.tsv", - "alphafold2/standard/T1024/T1024_ptm.tsv", - "alphafold2/standard/T1024/paes", - "alphafold2/standard/T1024/paes/T1024_0_pae.tsv", - "alphafold2/standard/T1026", - "alphafold2/standard/T1026/T1026_alphafold2_msa.tsv", - "alphafold2/standard/T1026/T1026_iptm.tsv", - "alphafold2/standard/T1026/T1026_plddt.tsv", - "alphafold2/standard/T1026/T1026_ptm.tsv", - "alphafold2/standard/T1026/paes", - "alphafold2/standard/T1026/paes/T1026_0_pae.tsv", "alphafold2/standard/top_ranked_structures", "alphafold2/standard/top_ranked_structures/T1024.pdb", "alphafold2/standard/top_ranked_structures/T1026.pdb", @@ -43,31 +25,18 @@ "multiqc/alphafold2_multiqc_plots", "multiqc/alphafold2_multiqc_report.html", "pipeline_info", - "pipeline_info/nf_core_proteinfold_software_mqc_versions.yml", - "reports", - "reports/test_alphafold2_report.html" + "pipeline_info/nf_core_proteinfold_software_mqc_versions.yml" ], [ - "T1024_alphafold2_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1024_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1024_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1024_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1024_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1026_alphafold2_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1026_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1026_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1026_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1026_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "alphafold2_multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e", - "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" + "alphafold2_multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-03-12T16:13:26.007551223" + "timestamp": "2026-03-19T10:46:42.057505938" } } \ No newline at end of file diff --git a/tests/esmfold.nf.test.snap b/tests/esmfold.nf.test.snap index be4ec57b1..bb6d17a65 100644 --- a/tests/esmfold.nf.test.snap +++ b/tests/esmfold.nf.test.snap @@ -46,9 +46,9 @@ ] ], "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-03-12T16:21:39.018391209" + "timestamp": "2026-03-19T10:46:58.740037475" } } \ No newline at end of file diff --git a/tests/helixfold3.nf.test.snap b/tests/helixfold3.nf.test.snap index 0455c881e..7439d0fbc 100644 --- a/tests/helixfold3.nf.test.snap +++ b/tests/helixfold3.nf.test.snap @@ -1,8 +1,11 @@ { "-profile test_helixfold3": { "content": [ - 7, + 9, { + "EXTRACT_METRICS_HELIXFOLD3": { + "python": "3.9.23" + }, "GENERATE_REPORT": { "python": "3.12.7", "generate_report.py": "Python 3.12.7" @@ -84,9 +87,9 @@ ] ], "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-03-12T16:22:20.586950258" + "timestamp": "2026-03-18T18:28:50.194322071" } } \ No newline at end of file diff --git a/tests/rosettafold2na.nf.test.snap b/tests/rosettafold2na.nf.test.snap index b2202a8a9..fba2461e1 100644 --- a/tests/rosettafold2na.nf.test.snap +++ b/tests/rosettafold2na.nf.test.snap @@ -48,9 +48,9 @@ ] ], "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-03-12T16:23:41.931720663" + "timestamp": "2026-03-19T11:02:13.845843304" } } \ No newline at end of file diff --git a/tests/rosettafold_all_atom.nf.test.snap b/tests/rosettafold_all_atom.nf.test.snap index c15a6ac5e..c4bf56a41 100644 --- a/tests/rosettafold_all_atom.nf.test.snap +++ b/tests/rosettafold_all_atom.nf.test.snap @@ -62,9 +62,9 @@ ] ], "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-03-12T16:23:04.191225337" + "timestamp": "2026-03-19T11:02:32.490601321" } } \ No newline at end of file diff --git a/tests/split_fasta.nf.test.snap b/tests/split_fasta.nf.test.snap index 0d7e0336a..e6f05f185 100644 --- a/tests/split_fasta.nf.test.snap +++ b/tests/split_fasta.nf.test.snap @@ -24,21 +24,21 @@ }, [ "colabfold", - "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues", - "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_colabfold_msa.tsv", - "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_plddt.tsv", - "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_ptm.tsv", - "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/paes", - "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/paes/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_0_pae.tsv", - "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues", - "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_colabfold_msa.tsv", - "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_plddt.tsv", - "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_ptm.tsv", - "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/paes", - "colabfold/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/paes/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_0_pae.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_colabfold_msa.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_plddt.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_ptm.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/paes", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/paes/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_0_pae.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_colabfold_msa.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_plddt.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_ptm.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/paes", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/paes/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_0_pae.tsv", "colabfold/top_ranked_structures", - "colabfold/top_ranked_structures/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues.pdb", - "colabfold/top_ranked_structures/H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues.pdb", + "colabfold/top_ranked_structures/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues.pdb", + "colabfold/top_ranked_structures/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues.pdb", "multifasta", "multifasta/input.csv", "multiqc", @@ -51,24 +51,24 @@ "reports/test_alphafold2_report.html" ], [ - "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_colabfold_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_colabfold_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_colabfold_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_colabfold_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", "input.csv:md5,d41d8cd98f00b204e9800998ecf8427e", "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-03-12T16:25:13.497245275" + "timestamp": "2026-03-19T11:02:49.945409863" } } \ No newline at end of file diff --git a/workflows/alphafold2.nf b/workflows/alphafold2.nf index 1c17cea1c..26c4c2297 100644 --- a/workflows/alphafold2.nf +++ b/workflows/alphafold2.nf @@ -10,6 +10,9 @@ include { RUN_ALPHAFOLD2 } from '../modules/local/run_alphafold2' include { RUN_ALPHAFOLD2_MSA } from '../modules/local/run_alphafold2_msa' include { RUN_ALPHAFOLD2_PRED } from '../modules/local/run_alphafold2_pred' +include { EXTRACT_METRICS as EXTRACT_METRICS_AF2_STANDARD } from '../modules/local/extract_metrics' +include { EXTRACT_METRICS as EXTRACT_METRICS_AF2_PRED } from '../modules/local/extract_metrics' +include { resolveModelPresetByFastaEntities } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -30,7 +33,7 @@ workflow ALPHAFOLD2 { ch_versions // channel: [ path(versions.yml) ] alphafold2_full_dbs // boolean: Use full databases (otherwise reduced version) alphafold2_mode // string: Mode to run Alphafold2 in - alphafold2_model_preset // string: Specifies the model preset to use for Alphafold2 + alphafold2_model_preset // string: Model preset used for single-entry FASTA inputs uniref30_prefix // string: Prefix for uniref30 database files ch_alphafold2_params // channel: path(alphafold2_params) ch_bfd // channel: path(bfd) @@ -50,25 +53,22 @@ workflow ALPHAFOLD2 { ch_msa = channel.empty() ch_pae = channel.empty() ch_multiqc_report = channel.empty() + ch_no_file = channel.fromPath("$projectDir/assets/NO_FILE") - if (alphafold2_model_preset != 'multimer') { - ch_samplesheet - .map { - meta, fasta -> - [ meta, fasta.splitFasta(file:true) ] - } - .transpose() - .set { ch_samplesheet } - } + ch_samplesheet + .map { meta, fasta -> + def resolved_model_preset = resolveModelPresetByFastaEntities(fasta, alphafold2_model_preset, 'multimer') + [ meta, fasta, resolved_model_preset ] + } + .set { ch_samplesheet_prepared } if (alphafold2_mode == 'standard') { // // SUBWORKFLOW: Run Alphafold2 standard mode // RUN_ALPHAFOLD2 ( - ch_samplesheet, + ch_samplesheet_prepared, alphafold2_full_dbs, - alphafold2_model_preset, uniref30_prefix, ch_alphafold2_params, ch_bfd, @@ -83,7 +83,17 @@ workflow ALPHAFOLD2 { ch_uniprot ) - RUN_ALPHAFOLD2 + EXTRACT_METRICS_AF2_STANDARD( + RUN_ALPHAFOLD2 + .out + .raw + .join(ch_no_file) + .map { meta, raw, no_file -> + [ meta, raw, "alphafold2", no_file ] + } + ) + + EXTRACT_METRICS_AF2_STANDARD .out .multiqc .map { it -> it[1] } @@ -95,18 +105,18 @@ workflow ALPHAFOLD2 { ch_pdb = ch_pdb.mix(RUN_ALPHAFOLD2.out.pdb) ch_top_ranked_pdb = ch_top_ranked_pdb.mix(RUN_ALPHAFOLD2.out.top_ranked_pdb) - ch_msa = ch_msa.mix(RUN_ALPHAFOLD2.out.msa) - ch_pae = ch_pae.mix(RUN_ALPHAFOLD2.out.pae) + ch_msa = ch_msa.mix(EXTRACT_METRICS_AF2_STANDARD.out.msa) + ch_pae = ch_pae.mix(EXTRACT_METRICS_AF2_STANDARD.out.pae) ch_versions = ch_versions.mix(RUN_ALPHAFOLD2.out.versions) + ch_versions = ch_versions.mix(EXTRACT_METRICS_AF2_STANDARD.out.versions) } else if (alphafold2_mode == 'split_msa_prediction') { // // SUBWORKFLOW: Run Alphafold2 split mode, MSA and predicition // RUN_ALPHAFOLD2_MSA ( - ch_samplesheet, + ch_samplesheet_prepared, alphafold2_full_dbs, - alphafold2_model_preset, uniref30_prefix, ch_alphafold2_params, ch_bfd, @@ -123,13 +133,15 @@ workflow ALPHAFOLD2 { ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_MSA.out.versions) //synchronize - ch_samplesheet + ch_samplesheet_prepared .join(RUN_ALPHAFOLD2_MSA.out.features) + .map { meta, fasta, resolved_model_preset, features -> + [ meta, fasta, features, resolved_model_preset ] + } .set { ch_fasta_features } RUN_ALPHAFOLD2_PRED ( ch_fasta_features, - alphafold2_model_preset, ch_alphafold2_params, ch_bfd, ch_small_bfd, @@ -143,7 +155,15 @@ workflow ALPHAFOLD2 { ch_uniprot ) - RUN_ALPHAFOLD2_PRED + EXTRACT_METRICS_AF2_PRED( + RUN_ALPHAFOLD2_PRED + .out + .raw + .join(ch_fasta_features) + .map { meta, raw, _fasta, features, _preset -> [ meta, raw, "alphafold2", features ] } + ) + + EXTRACT_METRICS_AF2_PRED .out .multiqc .map { it -> it[1] } @@ -155,9 +175,10 @@ workflow ALPHAFOLD2 { ch_top_ranked_pdb = ch_top_ranked_pdb.mix(RUN_ALPHAFOLD2_PRED.out.top_ranked_pdb) ch_pdb = ch_pdb.mix(RUN_ALPHAFOLD2_PRED.out.pdb) - ch_msa = ch_msa.mix(RUN_ALPHAFOLD2_PRED.out.msa) - ch_pae = ch_pae.mix(RUN_ALPHAFOLD2_PRED.out.pae) + ch_msa = ch_msa.mix(EXTRACT_METRICS_AF2_PRED.out.msa) + ch_pae = ch_pae.mix(EXTRACT_METRICS_AF2_PRED.out.pae) ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_PRED.out.versions) + ch_versions = ch_versions.mix(EXTRACT_METRICS_AF2_PRED.out.versions) } ch_pdb diff --git a/workflows/colabfold.nf b/workflows/colabfold.nf index 312a22b4a..45129c6db 100644 --- a/workflows/colabfold.nf +++ b/workflows/colabfold.nf @@ -30,7 +30,6 @@ workflow COLABFOLD { take: ch_samplesheet // channel: samplesheet read in from --input ch_versions // channel: [ path(versions.yml) ] - colabfold_model_preset // string: Specifies the model preset to use for colabfold ch_colabfold_params // channel: path(colabfold_params) ch_colabfold_db // channel: path(colabfold_db) ch_uniref30 // channel: path(uniref30) @@ -50,9 +49,8 @@ workflow COLABFOLD { ch_versions = ch_versions.mix(MULTIFASTA_TO_CSV.out.versions) COLABFOLD_BATCH( - MULTIFASTA_TO_CSV.out.input_csv, - colabfold_model_preset, - ch_colabfold_params, + MULTIFASTA_TO_CSV.out.input_csv + .combine(ch_colabfold_params), [], [], num_recycles @@ -63,7 +61,6 @@ workflow COLABFOLD { // // MODULE: Run mmseqs // - //Multimer mode MULTIFASTA_TO_CSV( ch_samplesheet ) @@ -79,9 +76,8 @@ workflow COLABFOLD { // MODULE: Run colabfold // COLABFOLD_BATCH( - MMSEQS_COLABFOLDSEARCH.out.a3m, - colabfold_model_preset, - ch_colabfold_params, + MMSEQS_COLABFOLDSEARCH.out.a3m + .combine(ch_colabfold_params), ch_colabfold_db, ch_uniref30, num_recycles diff --git a/workflows/esmfold.nf b/workflows/esmfold.nf index 5a221d986..1a04854a8 100644 --- a/workflows/esmfold.nf +++ b/workflows/esmfold.nf @@ -9,6 +9,7 @@ // include { RUN_ESMFOLD } from '../modules/local/run_esmfold' include { MULTIFASTA_TO_SINGLEFASTA } from '../modules/local/multifasta_to_singlefasta' +include { countMolecularEntitiesInFasta } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' include { modeChannel } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' @@ -36,25 +37,32 @@ workflow ESMFOLD { // // MODULE: Run esmfold // - if (params.esmfold_model_preset != 'monomer') { - MULTIFASTA_TO_SINGLEFASTA( - ch_samplesheet - ) - ch_versions = ch_versions.mix(MULTIFASTA_TO_SINGLEFASTA.out.versions) - RUN_ESMFOLD( - MULTIFASTA_TO_SINGLEFASTA.out.input_fasta, - ch_esmfold_params, - ch_num_recycles - ) - ch_versions = ch_versions.mix(RUN_ESMFOLD.out.versions) - } else { - RUN_ESMFOLD( - ch_samplesheet, - ch_esmfold_params, - ch_num_recycles - ) - ch_versions = ch_versions.mix(RUN_ESMFOLD.out.versions) - } + ch_samplesheet + .map { meta, fasta -> + [ meta, fasta, countMolecularEntitiesInFasta(fasta) ] + } + .branch { it -> + multimer: it[2] > 1 + monomer: it[2] <= 1 + } + .set { ch_input_by_entity_count } + + MULTIFASTA_TO_SINGLEFASTA( + ch_input_by_entity_count.multimer.map { meta, fasta, _entity_count -> + [ meta, fasta ] + } + ) + ch_versions = ch_versions.mix(MULTIFASTA_TO_SINGLEFASTA.out.versions) + RUN_ESMFOLD( + ch_input_by_entity_count.monomer + .map { meta, fasta, _entity_count -> + [ meta, fasta ] + } + .mix(MULTIFASTA_TO_SINGLEFASTA.out.input_fasta), + ch_esmfold_params, + ch_num_recycles + ) + ch_versions = ch_versions.mix(RUN_ESMFOLD.out.versions) RUN_ESMFOLD .out diff --git a/workflows/helixfold3.nf b/workflows/helixfold3.nf index 9defb5c43..21e10f3fc 100644 --- a/workflows/helixfold3.nf +++ b/workflows/helixfold3.nf @@ -9,6 +9,7 @@ // include { RUN_HELIXFOLD3 } from '../modules/local/run_helixfold3' include { FASTA2JSON } from '../modules/local/fasta2json' +include { EXTRACT_METRICS_HELIXFOLD3 } from '../modules/local/extract_metrics_helixfold3' include { modeChannel } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' @@ -77,7 +78,9 @@ workflow HELIXFOLD3 { ch_helixfold3_maxit_src ) - RUN_HELIXFOLD3 + EXTRACT_METRICS_HELIXFOLD3(RUN_HELIXFOLD3.out.raw) + + EXTRACT_METRICS_HELIXFOLD3 .out .multiqc .map { it -> it[1] } @@ -89,6 +92,7 @@ workflow HELIXFOLD3 { ch_pdb = ch_pdb.mix(RUN_HELIXFOLD3.out.pdb) ch_versions = ch_versions.mix(RUN_HELIXFOLD3.out.versions) + ch_versions = ch_versions.mix(EXTRACT_METRICS_HELIXFOLD3.out.versions) RUN_HELIXFOLD3 .out @@ -111,8 +115,8 @@ workflow HELIXFOLD3 { } .set { ch_pdb_final } - modeChannel(RUN_HELIXFOLD3.out.msa, "helixfold3").set { ch_msa_final } - modeChannel(RUN_HELIXFOLD3.out.pae, "helixfold3").set { ch_pae_final } + modeChannel(EXTRACT_METRICS_HELIXFOLD3.out.msa, "helixfold3").set { ch_msa_final } + modeChannel(EXTRACT_METRICS_HELIXFOLD3.out.pae, "helixfold3").set { ch_pae_final } emit: top_ranked_pdb = ch_top_ranked_pdb // channel: [ meta, /path/to/*.pdb ]