From bf0cb6da5056634cdcf7b72a082e0dd804b33f7a Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 26 Feb 2026 11:19:45 +1100 Subject: [PATCH 01/53] feat(dockerfile): update mmseqs-colabfoldsearch dockerfile to download gpu compiled mmseqs --- modules/local/mmseqs_colabfoldsearch/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/mmseqs_colabfoldsearch/Dockerfile b/modules/local/mmseqs_colabfoldsearch/Dockerfile index 1c57a9f16..eb7bc3200 100644 --- a/modules/local/mmseqs_colabfoldsearch/Dockerfile +++ b/modules/local/mmseqs_colabfoldsearch/Dockerfile @@ -22,10 +22,10 @@ RUN apt-get update && \ pip install --no-cache-dir --break-system-packages \ "colabfold @ git+https://github.com/sokrypton/ColabFold.git@e8ebd9a" && \ \ - wget -q https://github.com/soedinglab/MMseqs2/releases/download/${MMSEQS2_VERSION}/mmseqs-linux-sse41.tar.gz && \ - tar xzf mmseqs-linux-sse41.tar.gz && \ + wget -q https://github.com/soedinglab/MMseqs2/releases/download/${MMSEQS2_VERSION}/mmseqs-linux-gpu.tar.gz && \ + tar xzf mmseqs-linux-gpu.tar.gz && \ cp mmseqs/bin/* /usr/local/bin/ && \ - rm -rf mmseqs mmseqs-linux-sse41.tar.gz && \ + rm -rf mmseqs mmseqs-linux-gpu.tar.gz && \ \ apt-get remove -y \ python3-dev \ From 67ded38467a3d01af4d6f8d14a27c28cace7f9a7 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 26 Feb 2026 11:45:39 +1100 Subject: [PATCH 02/53] feat(gpu_option): add gpu search option to nextflow config --- nextflow.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow.config b/nextflow.config index 3c2a5d403..12161806b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -110,6 +110,7 @@ params { colabfold_db_load_mode = 0 colabfold_use_templates = false colabfold_create_index = false + colabfold_enable_gpu_search = true // Colabfold links colabfold_db_link = null From b83052858703e0823bac72f03ba0f452592363ba Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 26 Feb 2026 13:31:42 +1100 Subject: [PATCH 03/53] feat(config): add configuraiton for gpu padded dbs --- conf/modules_colabfold.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/conf/modules_colabfold.config b/conf/modules_colabfold.config index 35cc33330..0e166cbd9 100644 --- a/conf/modules_colabfold.config +++ b/conf/modules_colabfold.config @@ -32,6 +32,9 @@ process { withName: '.*PREPARE_COLABFOLD_DBS:ARIA2_UNIREF30:UNTAR' { ext.prefix = 'colabfold_uniref30' } + withName: '.*PREPARE_COLABFOLD_DBS:MMSEQS_CREATEINDEX_UNIPROT30_PADDED' { + ext.prefix = 'colabfold_uniref30_gpu' + } } // TODO: Differentiate between local and server MSA? From 84604aab17558c9bda3e73b7a6d9d672a3404618 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 26 Feb 2026 13:33:27 +1100 Subject: [PATCH 04/53] feat(mmseqs): add future import for mmseqs makepaddedseqdb --- subworkflows/local/prepare_colabfold_dbs.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/prepare_colabfold_dbs.nf b/subworkflows/local/prepare_colabfold_dbs.nf index 7a4e5fe3a..15825a614 100644 --- a/subworkflows/local/prepare_colabfold_dbs.nf +++ b/subworkflows/local/prepare_colabfold_dbs.nf @@ -1,8 +1,9 @@ // // Download all the required databases and params by Colabfold // -include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_COLABFOLDDB } from '../../modules/nf-core/mmseqs/createindex/main' -include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_UNIPROT30 } from '../../modules/nf-core/mmseqs/createindex/main' +include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_COLABFOLDDB } from '../../modules/nf-core/mmseqs/createindex/main' +include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_UNIPROT30 } from '../../modules/nf-core/mmseqs/createindex/main' +include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_UNIPROT30_PADDED } from '../../modules/nf-core/mmseqs/createindex/main' // TODO: Waiting on RP merge in nf-core/modules include { ARIA2_UNCOMPRESS as ARIA2_COLABFOLD_PARAMS } from './aria2_uncompress' include { ARIA2_UNCOMPRESS as ARIA2_COLABFOLD_DB } from './aria2_uncompress' From abe18b8905eff765b4ae2cbb1798c3ed92f37294 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 26 Feb 2026 13:34:37 +1100 Subject: [PATCH 05/53] feat(mmseqs): add inputs for precomputed databases and add output channels --- subworkflows/local/prepare_colabfold_dbs.nf | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/prepare_colabfold_dbs.nf b/subworkflows/local/prepare_colabfold_dbs.nf index 15825a614..95ce7bd16 100644 --- a/subworkflows/local/prepare_colabfold_dbs.nf +++ b/subworkflows/local/prepare_colabfold_dbs.nf @@ -17,16 +17,19 @@ workflow PREPARE_COLABFOLD_DBS { colabfold_alphafold2_params_path // directory: /path/to/colabfold/alphafold2/params/ colabfold_envdb_path // directory: /path/to/colabfold/db/ colabfold_uniref30_path // directory: /path/to/uniref30/colabfold/ + colabfold_uniref30_path_padded // directory: /path/to/uniref30/colabfold_padded/ + colabfold_enable_gpu_search // boolean: Enable GPU accelerated search (collect or create GPU padded databases) colabfold_alphafold2_params_link // string: Specifies the link to download colabfold alphafold2 params colabfold_db_link // string: Specifies the link to download colabfold db colabfold_uniref30_link // string: Specifies the link to download uniref30 colabfold_create_index // boolean: Create index for colabfold db main: - ch_params = channel.empty() - ch_colabfold_db = channel.empty() - ch_uniref30 = channel.empty() - ch_versions = channel.empty() + ch_params = channel.empty() + ch_colabfold_db = channel.empty() + ch_uniref30 = channel.empty() + ch_uniref30_padded = channel.empty() + ch_versions = channel.empty() if (colabfold_db) { ch_params = channel.value(file(colabfold_alphafold2_params_path, type: 'any')) From 19f023a3d36509057af2ec38022152dbf18cd525 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 26 Feb 2026 13:35:03 +1100 Subject: [PATCH 06/53] feat(mmseqs): emit output channels --- subworkflows/local/prepare_colabfold_dbs.nf | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/prepare_colabfold_dbs.nf b/subworkflows/local/prepare_colabfold_dbs.nf index 95ce7bd16..66b81eedd 100644 --- a/subworkflows/local/prepare_colabfold_dbs.nf +++ b/subworkflows/local/prepare_colabfold_dbs.nf @@ -113,8 +113,9 @@ workflow PREPARE_COLABFOLD_DBS { } emit: - params = ch_params - colabfold_db = ch_colabfold_db - uniref30 = ch_uniref30 - versions = ch_versions + params = ch_params + colabfold_db = ch_colabfold_db + uniref30 = ch_uniref30 + uniref30_padded = ch_uniref30_padded + versions = ch_versions } From 5047707dee7cbe8e894cc520334fe0ba58c69f63 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 26 Feb 2026 13:35:26 +1100 Subject: [PATCH 07/53] feat(mmseqs): add logic for mmseqs gpu database creation --- subworkflows/local/prepare_colabfold_dbs.nf | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/subworkflows/local/prepare_colabfold_dbs.nf b/subworkflows/local/prepare_colabfold_dbs.nf index 66b81eedd..0aee34dd2 100644 --- a/subworkflows/local/prepare_colabfold_dbs.nf +++ b/subworkflows/local/prepare_colabfold_dbs.nf @@ -112,6 +112,24 @@ workflow PREPARE_COLABFOLD_DBS { } } + if (colabfold_enable_gpu_search) { + // TODO: Blocked and awaiting PR merge in nf-core/modules + MMSEQS_CREATEINDEX_UINPROT30_PADDED( + ch_uniref30. + map { path_str -> + def db_file = file(path_str) + [ [id: "uniprot30_gpu"], db_file ] + } + ) + + ch_uniprot30_padded = MMSEQS_CREATEINDEX_UNIPROT30_PADDED + .out + .db_padded + .map { _meta, dir -> + file("${dir}/*") + } + } + emit: params = ch_params colabfold_db = ch_colabfold_db From 0a1f47d34f657a1164c0c7133e66a63769cf3374 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 26 Feb 2026 13:40:15 +1100 Subject: [PATCH 08/53] feat(mmseqs): add gpu search options to input of prepare dbs --- main.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/main.nf b/main.nf index 9ddbee2c5..4bc7f3c59 100644 --- a/main.nf +++ b/main.nf @@ -248,6 +248,8 @@ workflow NFCORE_PROTEINFOLD { params.colabfold_alphafold2_params_path, params.colabfold_envdb_path, params.colabfold_uniref30_path, + params.colabfold_uniref30_path_padded, + params.colabfold_enable_gpu_search, params.colabfold_alphafold2_params_link, params.colabfold_db_link, params.colabfold_uniref30_link, From a84c2956a2dc5827f87ccc3748c922c468263a59 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 26 Feb 2026 13:40:36 +1100 Subject: [PATCH 09/53] feat(mmseqs): add check for gpu search and swap in padded dbs if enabled --- main.nf | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/main.nf b/main.nf index 4bc7f3c59..a9c4c1736 100644 --- a/main.nf +++ b/main.nf @@ -260,16 +260,26 @@ workflow NFCORE_PROTEINFOLD { // // WORKFLOW: Run nf-core/colabfold workflow // - COLABFOLD ( - ch_samplesheet, - ch_versions, - params.colabfold_model_preset, - PREPARE_COLABFOLD_DBS.out.params, - PREPARE_COLABFOLD_DBS.out.colabfold_db, - PREPARE_COLABFOLD_DBS.out.uniref30, - params.colabfold_num_recycles - ) - + if (params.colabfold_enable_gpu_search) { + COLABFOLD ( + ch_samplesheet, + ch_versions, + params.colabfold_model_preset, + PREPARE_COLABFOLD_DBS.out.params, + PREPARE_COLABFOLD_DBS.out.colabfold_db, + PREPARE_COLABFOLD_DBS.out.uniref30_padded, + params.colabfold_num_recycles + } else { + COLABFOLD ( + ch_samplesheet, + ch_versions, + params.colabfold_model_preset, + PREPARE_COLABFOLD_DBS.out.params, + PREPARE_COLABFOLD_DBS.out.colabfold_db, + PREPARE_COLABFOLD_DBS.out.uniref30, + params.colabfold_num_recycles + ) + } ch_multiqc = ch_multiqc.mix(COLABFOLD.out.multiqc_report) ch_versions = ch_versions.mix(COLABFOLD.out.versions) ch_report_input = ch_report_input From f97c5e6bb5020418312b48f1ef2a24d9623bb7e8 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 26 Feb 2026 13:47:25 +1100 Subject: [PATCH 10/53] fix(synax): fix missing closing backet --- main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/main.nf b/main.nf index a9c4c1736..46ca18194 100644 --- a/main.nf +++ b/main.nf @@ -269,6 +269,7 @@ workflow NFCORE_PROTEINFOLD { PREPARE_COLABFOLD_DBS.out.colabfold_db, PREPARE_COLABFOLD_DBS.out.uniref30_padded, params.colabfold_num_recycles + ) } else { COLABFOLD ( ch_samplesheet, From 792db458c9850664db3f7d43d1a6694122af6513 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 26 Feb 2026 14:09:20 +1100 Subject: [PATCH 11/53] feat(mmseqs): add mmseqs/makepaddedseqdb to nf-core modules --- .../mmseqs/makepaddedseqdb/environment.yml | 7 ++ .../nf-core/mmseqs/makepaddedseqdb/main.nf | 51 +++++++++++++++ .../nf-core/mmseqs/makepaddedseqdb/meta.yml | 64 +++++++++++++++++++ .../mmseqs/makepaddedseqdb/tests/main.nf.test | 49 ++++++++++++++ .../makepaddedseqdb/tests/main.nf.test.snap | 36 +++++++++++ .../makepaddedseqdb/tests/nextflow.config | 5 ++ 6 files changed, 212 insertions(+) create mode 100644 modules/nf-core/mmseqs/makepaddedseqdb/environment.yml create mode 100644 modules/nf-core/mmseqs/makepaddedseqdb/main.nf create mode 100644 modules/nf-core/mmseqs/makepaddedseqdb/meta.yml create mode 100644 modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test create mode 100644 modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test.snap create mode 100644 modules/nf-core/mmseqs/makepaddedseqdb/tests/nextflow.config diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/environment.yml b/modules/nf-core/mmseqs/makepaddedseqdb/environment.yml new file mode 100644 index 000000000..072223f2a --- /dev/null +++ b/modules/nf-core/mmseqs/makepaddedseqdb/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::mmseqs2=18.8cc5c diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/main.nf b/modules/nf-core/mmseqs/makepaddedseqdb/main.nf new file mode 100644 index 000000000..61857c179 --- /dev/null +++ b/modules/nf-core/mmseqs/makepaddedseqdb/main.nf @@ -0,0 +1,51 @@ +process MMSEQS_MAKEPADDEDSEQDB { + tag "${meta.id}" + label 'process_low' + conda "${moduleDir}/environment.yml" + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/fe/fe49c17754753d6cd9a31e5894117edaf1c81e3d6053a12bf6dc8f3af1dffe23/data' + : 'community.wave.seqera.io/library/mmseqs2:18.8cc5c--af05c9a98d9f6139'}" + + input: + tuple val(meta), path(db_in) + + output: + tuple val(meta), path("${prefix}/"), emit: db_padded + tuple val("${task.process}"), val('mmseqs'), eval('mmseqs version'), topic: versions, emit: versions_mmseqs + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '*.dbtype' + prefix = task.ext.prefix ?: "${meta.id}" + if ("${db_in}" == "${prefix}") { + error("Input and output names of databases are the same, set prefix in module configuration to disambiguate!") + } + """ + DB_TARGET_PATH_NAME=\$(find -L "${db_in}/" -maxdepth 1 -name "${args2}" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) + mkdir -p ${prefix} + mmseqs \\ + makepaddedseqdb \\ + \$DB_TARGET_PATH_NAME \\ + ${prefix}/${prefix} \\ + ${args} + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo ${args} + mkdir -p ${prefix} + touch ${prefix}/${prefix} + touch ${prefix}/${prefix}.dbtype + touch ${prefix}/${prefix}.index + touch ${prefix}/${prefix}.lookup + touch ${prefix}/${prefix}_h + touch ${prefix}/${prefix}_h.dbtype + touch ${prefix}/${prefix}_h.index + """ +} diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/meta.yml b/modules/nf-core/mmseqs/makepaddedseqdb/meta.yml new file mode 100644 index 000000000..8858d9bb9 --- /dev/null +++ b/modules/nf-core/mmseqs/makepaddedseqdb/meta.yml @@ -0,0 +1,64 @@ +name: "mmseqs_makepaddedseqdb" +description: Create an MMseqs padded database from an existing MMseqs database +keywords: + - protein sequence + - databases + - clustering + - searching + - indexing + - mmseqs2 +tools: + - "mmseqs": + description: "MMseqs2: ultra fast and sensitive sequence search and clustering + suite" + homepage: "https://github.com/soedinglab/MMseqs2" + documentation: "https://mmseqs.com/latest/userguide.pdf" + tool_dev_url: "https://github.com/soedinglab/MMseqs2" + doi: "10.1093/bioinformatics/btw006" + licence: + - "GPL v3" + identifier: biotools:mmseqs +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - db_in: + type: directory + description: Input of existing MMseqs database +output: + db_padded: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "${prefix}/": + type: directory + description: The padded MMseqs2 database + versions_mmseqs: + - - ${task.process}: + type: string + description: The name of the process + - mmseqs: + type: string + description: The name of the tool + - mmseqs version: + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - mmseqs: + type: string + description: The name of the tool + - mmseqs version: + type: eval + description: The expression to obtain the version of the tool +authors: + - "@nbtm-sh" +maintainers: + - "@nbtm-sh" diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test b/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test new file mode 100644 index 000000000..6c3ae4b7e --- /dev/null +++ b/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test @@ -0,0 +1,49 @@ +nextflow_process { + + name "Test Process MMSEQS_MAKEPADDEDSEQDB" + script "../main.nf" + process "MMSEQS_MAKEPADDEDSEQDB" + tag "modules" + tag "modules_nfcore" + tag "mmseqs" + tag "mmseqs/makepaddedseqdb" + tag "mmseqs/createdb" + + config "./nextflow.config" + + setup { + run("MMSEQS_CREATEDB") { + script "../../../mmseqs/createdb/main.nf" + process { + """ + input[0] = [ [ id:'test_query' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true) + ] + """ + } + } + } + + test("mmseqs_db sarscov2 contigs") { + + when { + params { + module_prefix = "test_query_gpu" + } + process { + """ + input[0] = MMSEQS_CREATEDB.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(sanitizeOutput(process.out)).match() + } + ) + } + + } +} diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test.snap b/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test.snap new file mode 100644 index 000000000..8ce6f93de --- /dev/null +++ b/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test.snap @@ -0,0 +1,36 @@ +{ + "mmseqs_db sarscov2 contigs": { + "content": [ + { + "db_padded": [ + [ + { + "id": "test_query" + }, + [ + "test_query_gpu:md5,5b24585ba92fd826c78b8664c63b4e95", + "test_query_gpu.dbtype:md5,01d39098f2bfee5c808a3b4ff54deac2", + "test_query_gpu.index:md5,5946b4989d08320d9daca503155ba693", + "test_query_gpu.lookup:md5,3eb85c645034a0717db62ef0a3da5479", + "test_query_gpu_h:md5,a9fca4931be476b8f302cc27b5dff9b0", + "test_query_gpu_h.dbtype:md5,740bab4f9ec8808aedb68d6b1281aeb2", + "test_query_gpu_h.index:md5,ce0ca30c2e57677077cc23823ef17206" + ] + ] + ], + "versions_mmseqs": [ + [ + "MMSEQS_MAKEPADDEDSEQDB", + "mmseqs", + "18.8cc5c" + ] + ] + } + ], + "timestamp": "2026-02-25T10:33:19.910807101", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.04.6" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/tests/nextflow.config b/modules/nf-core/mmseqs/makepaddedseqdb/tests/nextflow.config new file mode 100644 index 000000000..735a3eb09 --- /dev/null +++ b/modules/nf-core/mmseqs/makepaddedseqdb/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: "MMSEQS_MAKEPADDEDSEQDB" { + ext.prefix = params.module_prefix + } +} From 2e8b218fc1e55efe70674896bc53b8c2f91e7f14 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 26 Feb 2026 14:10:15 +1100 Subject: [PATCH 12/53] feat(mmseqs): add mmseqs/makepaddedseqdb to modules.json --- modules.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules.json b/modules.json index 387857a74..baca22a62 100644 --- a/modules.json +++ b/modules.json @@ -27,6 +27,11 @@ "git_sha": "38697a933bef7041bb935c9b8374d9948ce6c794", "installed_by": ["modules"] }, + "mmseqs/makepaddedseqdb": { + "branch": "master", + "git_sha": "40a31ec6d922ce6022a3a78f9f1e832b33de6a6d", + "installed_by": ["modules"] + }, "mmseqs/tsv2exprofiledb": { "branch": "master", "git_sha": "151460db852d636979d9ff3ee631e2268060d4c3", From 1cc54458e01e06b0dc96545fd75d7754723e3845 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 26 Feb 2026 14:13:10 +1100 Subject: [PATCH 13/53] feat(mmseqs): update include to point to new module --- subworkflows/local/prepare_colabfold_dbs.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/prepare_colabfold_dbs.nf b/subworkflows/local/prepare_colabfold_dbs.nf index 0aee34dd2..ae320278f 100644 --- a/subworkflows/local/prepare_colabfold_dbs.nf +++ b/subworkflows/local/prepare_colabfold_dbs.nf @@ -1,9 +1,9 @@ // // Download all the required databases and params by Colabfold // -include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_COLABFOLDDB } from '../../modules/nf-core/mmseqs/createindex/main' -include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_UNIPROT30 } from '../../modules/nf-core/mmseqs/createindex/main' -include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_UNIPROT30_PADDED } from '../../modules/nf-core/mmseqs/createindex/main' // TODO: Waiting on RP merge in nf-core/modules +include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_COLABFOLDDB } from '../../modules/nf-core/mmseqs/createindex/main' +include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_UNIPROT30 } from '../../modules/nf-core/mmseqs/createindex/main' +include { MMSEQS_MAKEPADDEDSEQDB as MMSEQS_CREATEINDEX_UNIPROT30_PADDED } from '../../modules/nf-core/mmseqs/createindex/main' include { ARIA2_UNCOMPRESS as ARIA2_COLABFOLD_PARAMS } from './aria2_uncompress' include { ARIA2_UNCOMPRESS as ARIA2_COLABFOLD_DB } from './aria2_uncompress' From 585efc3b7ae7703e24c68434d9b289f494dcaf6c Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 26 Feb 2026 14:15:53 +1100 Subject: [PATCH 14/53] fix(path): fix path to mmseqs/makepaddedseqdb --- subworkflows/local/prepare_colabfold_dbs.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/prepare_colabfold_dbs.nf b/subworkflows/local/prepare_colabfold_dbs.nf index ae320278f..93cfbbbde 100644 --- a/subworkflows/local/prepare_colabfold_dbs.nf +++ b/subworkflows/local/prepare_colabfold_dbs.nf @@ -3,7 +3,7 @@ // include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_COLABFOLDDB } from '../../modules/nf-core/mmseqs/createindex/main' include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_UNIPROT30 } from '../../modules/nf-core/mmseqs/createindex/main' -include { MMSEQS_MAKEPADDEDSEQDB as MMSEQS_CREATEINDEX_UNIPROT30_PADDED } from '../../modules/nf-core/mmseqs/createindex/main' +include { MMSEQS_MAKEPADDEDSEQDB as MMSEQS_CREATEINDEX_UNIPROT30_PADDED } from '../../modules/nf-core/mmseqs/makepaddedseqdb/main' include { ARIA2_UNCOMPRESS as ARIA2_COLABFOLD_PARAMS } from './aria2_uncompress' include { ARIA2_UNCOMPRESS as ARIA2_COLABFOLD_DB } from './aria2_uncompress' From b0bed32173860b807a963f40835452ad92dff602 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 26 Feb 2026 14:18:52 +1100 Subject: [PATCH 15/53] fix(mmseqs): change name from createindex to makepaddedseqdb --- subworkflows/local/prepare_colabfold_dbs.nf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/prepare_colabfold_dbs.nf b/subworkflows/local/prepare_colabfold_dbs.nf index 93cfbbbde..da1b12f3f 100644 --- a/subworkflows/local/prepare_colabfold_dbs.nf +++ b/subworkflows/local/prepare_colabfold_dbs.nf @@ -1,9 +1,9 @@ // // Download all the required databases and params by Colabfold // -include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_COLABFOLDDB } from '../../modules/nf-core/mmseqs/createindex/main' -include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_UNIPROT30 } from '../../modules/nf-core/mmseqs/createindex/main' -include { MMSEQS_MAKEPADDEDSEQDB as MMSEQS_CREATEINDEX_UNIPROT30_PADDED } from '../../modules/nf-core/mmseqs/makepaddedseqdb/main' +include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_COLABFOLDDB } from '../../modules/nf-core/mmseqs/createindex/main' +include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_UNIPROT30 } from '../../modules/nf-core/mmseqs/createindex/main' +include { MMSEQS_MAKEPADDEDSEQDB as MMSEQS_MAKEPADDEDSEQDB_UNIPROT30_PADDED } from '../../modules/nf-core/mmseqs/makepaddedseqdb/main' include { ARIA2_UNCOMPRESS as ARIA2_COLABFOLD_PARAMS } from './aria2_uncompress' include { ARIA2_UNCOMPRESS as ARIA2_COLABFOLD_DB } from './aria2_uncompress' @@ -114,7 +114,7 @@ workflow PREPARE_COLABFOLD_DBS { if (colabfold_enable_gpu_search) { // TODO: Blocked and awaiting PR merge in nf-core/modules - MMSEQS_CREATEINDEX_UINPROT30_PADDED( + MMSEQS_MAKEPADDEDSEQDB_UINPROT30_PADDED( ch_uniref30. map { path_str -> def db_file = file(path_str) @@ -122,7 +122,7 @@ workflow PREPARE_COLABFOLD_DBS { } ) - ch_uniprot30_padded = MMSEQS_CREATEINDEX_UNIPROT30_PADDED + ch_uniprot30_padded = MMSEQS_MAKEPADDEDSEQDB_UNIPROT30_PADDED .out .db_padded .map { _meta, dir -> From 7c67723eeb4ce2ef3cc9a658198d842f0600c77e Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 26 Feb 2026 14:23:53 +1100 Subject: [PATCH 16/53] feat(mmseqs): add default param value --- nextflow.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow.config b/nextflow.config index 12161806b..f3980c252 100644 --- a/nextflow.config +++ b/nextflow.config @@ -119,6 +119,7 @@ params { // Colabfold paths colabfold_envdb_path = null colabfold_uniref30_path = null + colabfold_uniref30_path_padded = null // Esmfold parameters esmfold_db = null From 5c0a5e345dc9b0b2f586169e2071ca3d0efca2c0 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 11:19:30 +1100 Subject: [PATCH 17/53] fix(mmseqs): swap in padded databases if gpu support is enabled --- subworkflows/local/prepare_colabfold_dbs.nf | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/subworkflows/local/prepare_colabfold_dbs.nf b/subworkflows/local/prepare_colabfold_dbs.nf index da1b12f3f..0f2bb3cfc 100644 --- a/subworkflows/local/prepare_colabfold_dbs.nf +++ b/subworkflows/local/prepare_colabfold_dbs.nf @@ -34,9 +34,13 @@ workflow PREPARE_COLABFOLD_DBS { if (colabfold_db) { ch_params = channel.value(file(colabfold_alphafold2_params_path, type: 'any')) if (!use_msa_server) { + println colabfold_envdb_path ch_colabfold_db = channel.value(file(colabfold_envdb_path, type: 'any')) ch_uniref30 = channel.value(file(colabfold_uniref30_path, type: 'any')) } + if (colabfold_enable_gpu_search) { + ch_uniref30_padded = channel.value(file(colabfold_uniref30_path_padded, type: 'any')) + } } else { ARIA2_COLABFOLD_PARAMS ( @@ -112,23 +116,6 @@ workflow PREPARE_COLABFOLD_DBS { } } - if (colabfold_enable_gpu_search) { - // TODO: Blocked and awaiting PR merge in nf-core/modules - MMSEQS_MAKEPADDEDSEQDB_UINPROT30_PADDED( - ch_uniref30. - map { path_str -> - def db_file = file(path_str) - [ [id: "uniprot30_gpu"], db_file ] - } - ) - - ch_uniprot30_padded = MMSEQS_MAKEPADDEDSEQDB_UNIPROT30_PADDED - .out - .db_padded - .map { _meta, dir -> - file("${dir}/*") - } - } emit: params = ch_params From eb30ae59f56aa18b841ab059acaadf54ef330b7d Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 11:21:24 +1100 Subject: [PATCH 18/53] fix(mmseqs): enable gpu by default for testing --- modules/local/mmseqs_colabfoldsearch/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/local/mmseqs_colabfoldsearch/main.nf b/modules/local/mmseqs_colabfoldsearch/main.nf index 5cd2b362a..0319ef798 100644 --- a/modules/local/mmseqs_colabfoldsearch/main.nf +++ b/modules/local/mmseqs_colabfoldsearch/main.nf @@ -27,6 +27,7 @@ process MMSEQS_COLABFOLDSEARCH { """ colabfold_search \\ $args \\ + --gpu 1 \\ --threads $task.cpus ${fasta} \\ ./db \\ --af3-json \\ From d93255fcafb53164c197d0cf8c401600c310790a Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 11:26:30 +1100 Subject: [PATCH 19/53] feat(mmseqs): add input to enable and disable GPU search --- modules/local/mmseqs_colabfoldsearch/main.nf | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/modules/local/mmseqs_colabfoldsearch/main.nf b/modules/local/mmseqs_colabfoldsearch/main.nf index 0319ef798..a80f9bb20 100644 --- a/modules/local/mmseqs_colabfoldsearch/main.nf +++ b/modules/local/mmseqs_colabfoldsearch/main.nf @@ -3,12 +3,13 @@ process MMSEQS_COLABFOLDSEARCH { label 'process_high_memory' label 'process_high' - container "nf-core/proteinfold_mmseqs_colabfoldsearch:2.0.0" + container "/home/z3545907/mmseqs_colabfoldsearch.sif" input: tuple val(meta), path(fasta) path ('db/*') path ('db/*') + val colabfold_enable_gpu_search output: tuple val(meta), path("**.a3m"), emit: a3m @@ -25,9 +26,13 @@ process MMSEQS_COLABFOLDSEARCH { def args = task.ext.args ?: '' """ + GPU_ARG="" + if [ "${colabfold_enable_gpu_search}" == "1" ]; then + GPU_ARG="--gpu 1" + fi colabfold_search \\ $args \\ - --gpu 1 \\ + \${GPU_ARG} \\ --threads $task.cpus ${fasta} \\ ./db \\ --af3-json \\ From a40831a6c7f97bfab2b9338517e4e4ec242f781e Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 11:29:25 +1100 Subject: [PATCH 20/53] feat(mmseqs): add check to enable gpu search --- workflows/colabfold.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflows/colabfold.nf b/workflows/colabfold.nf index f085c0c7a..9fcbeded9 100644 --- a/workflows/colabfold.nf +++ b/workflows/colabfold.nf @@ -71,7 +71,8 @@ workflow COLABFOLD { MMSEQS_COLABFOLDSEARCH ( MULTIFASTA_TO_CSV.out.input_csv, ch_colabfold_db, - ch_uniref30 + ch_uniref30, + params.colabfold_enable_gpu_search ?: "1", "0" ) ch_versions = ch_versions.mix(MMSEQS_COLABFOLDSEARCH.out.versions) From 58eb3fd53f8b07950c8e8fb0eb3f5f48ae1c6716 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 11:30:20 +1100 Subject: [PATCH 21/53] feat(mmseqs): add colabfold padded dbs path to config file --- nextflow.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow.config b/nextflow.config index f3980c252..8fc197136 100644 --- a/nextflow.config +++ b/nextflow.config @@ -118,6 +118,7 @@ params { // Colabfold paths colabfold_envdb_path = null + colabfold_envdb_path_padded = null colabfold_uniref30_path = null colabfold_uniref30_path_padded = null From 29a73251bb5cf35d85f84ab6c7e3d19eacbfdbb5 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 11:34:20 +1100 Subject: [PATCH 22/53] feat(mmseqs): add colabfold envdb padded path to prepare dbs input --- subworkflows/local/prepare_colabfold_dbs.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/prepare_colabfold_dbs.nf b/subworkflows/local/prepare_colabfold_dbs.nf index 0f2bb3cfc..424c33949 100644 --- a/subworkflows/local/prepare_colabfold_dbs.nf +++ b/subworkflows/local/prepare_colabfold_dbs.nf @@ -16,6 +16,7 @@ workflow PREPARE_COLABFOLD_DBS { use_msa_server // bool: Specifies whether to use web msa server colabfold_alphafold2_params_path // directory: /path/to/colabfold/alphafold2/params/ colabfold_envdb_path // directory: /path/to/colabfold/db/ + colabfold_envdb_path_padded // directory: /path/to/colabfold/db/ colabfold_uniref30_path // directory: /path/to/uniref30/colabfold/ colabfold_uniref30_path_padded // directory: /path/to/uniref30/colabfold_padded/ colabfold_enable_gpu_search // boolean: Enable GPU accelerated search (collect or create GPU padded databases) From afe7ae9bfc5b68d281c580f5babf2f743f97ecc2 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 11:34:45 +1100 Subject: [PATCH 23/53] feat(mmseqs): collect padded colabfold envdbs if gpu search enabled --- subworkflows/local/prepare_colabfold_dbs.nf | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/prepare_colabfold_dbs.nf b/subworkflows/local/prepare_colabfold_dbs.nf index 424c33949..6c6fc3048 100644 --- a/subworkflows/local/prepare_colabfold_dbs.nf +++ b/subworkflows/local/prepare_colabfold_dbs.nf @@ -26,21 +26,22 @@ workflow PREPARE_COLABFOLD_DBS { colabfold_create_index // boolean: Create index for colabfold db main: - ch_params = channel.empty() - ch_colabfold_db = channel.empty() - ch_uniref30 = channel.empty() - ch_uniref30_padded = channel.empty() - ch_versions = channel.empty() + ch_params = channel.empty() + ch_colabfold_db = channel.empty() + ch_colabfold_db_padded = channel.empty() + ch_uniref30 = channel.empty() + ch_uniref30_padded = channel.empty() + ch_versions = channel.empty() if (colabfold_db) { ch_params = channel.value(file(colabfold_alphafold2_params_path, type: 'any')) if (!use_msa_server) { - println colabfold_envdb_path ch_colabfold_db = channel.value(file(colabfold_envdb_path, type: 'any')) ch_uniref30 = channel.value(file(colabfold_uniref30_path, type: 'any')) } if (colabfold_enable_gpu_search) { ch_uniref30_padded = channel.value(file(colabfold_uniref30_path_padded, type: 'any')) + ch_colabfold_db_padded = channel.value(file(colabfold_envdb_path_padded, type: 'any')) } } else { From 952e80c279acbbbb1b7f98c920180ab3f4df2dea Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 11:35:06 +1100 Subject: [PATCH 24/53] feat(mmseqs): emit padded db channel --- subworkflows/local/prepare_colabfold_dbs.nf | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/prepare_colabfold_dbs.nf b/subworkflows/local/prepare_colabfold_dbs.nf index 6c6fc3048..adcc82a21 100644 --- a/subworkflows/local/prepare_colabfold_dbs.nf +++ b/subworkflows/local/prepare_colabfold_dbs.nf @@ -120,9 +120,10 @@ workflow PREPARE_COLABFOLD_DBS { emit: - params = ch_params - colabfold_db = ch_colabfold_db - uniref30 = ch_uniref30 - uniref30_padded = ch_uniref30_padded - versions = ch_versions + params = ch_params + colabfold_db = ch_colabfold_db + colabfold_db_padded = ch_colabfold_db_padded + uniref30 = ch_uniref30 + uniref30_padded = ch_uniref30_padded + versions = ch_versions } From 18d581715cca6dce48fa53f520041dae06855773 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 11:36:17 +1100 Subject: [PATCH 25/53] feat(mmseqs): add colabfold envdbs padded param to prepare dbs input --- main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/main.nf b/main.nf index 46ca18194..72edfad4c 100644 --- a/main.nf +++ b/main.nf @@ -247,6 +247,7 @@ workflow NFCORE_PROTEINFOLD { params.use_msa_server, params.colabfold_alphafold2_params_path, params.colabfold_envdb_path, + params.colabfold_envdb_path_padded, params.colabfold_uniref30_path, params.colabfold_uniref30_path_padded, params.colabfold_enable_gpu_search, From 6ff19b1bf0d82855c42f235c90907a9cecfc9e95 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 11:37:10 +1100 Subject: [PATCH 26/53] feat(mmseqs): swap in padded dbs if gpu search enabled --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 72edfad4c..946fc31cc 100644 --- a/main.nf +++ b/main.nf @@ -267,7 +267,7 @@ workflow NFCORE_PROTEINFOLD { ch_versions, params.colabfold_model_preset, PREPARE_COLABFOLD_DBS.out.params, - PREPARE_COLABFOLD_DBS.out.colabfold_db, + PREPARE_COLABFOLD_DBS.out.colabfold_db_padded, PREPARE_COLABFOLD_DBS.out.uniref30_padded, params.colabfold_num_recycles ) From 83ec51cef2800d31721afe68e486dea19510cfcc Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 11:48:35 +1100 Subject: [PATCH 27/53] fix(mmseqs): fix syntax error --- workflows/colabfold.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/colabfold.nf b/workflows/colabfold.nf index 9fcbeded9..6aebd67aa 100644 --- a/workflows/colabfold.nf +++ b/workflows/colabfold.nf @@ -72,7 +72,7 @@ workflow COLABFOLD { MULTIFASTA_TO_CSV.out.input_csv, ch_colabfold_db, ch_uniref30, - params.colabfold_enable_gpu_search ?: "1", "0" + params.colabfold_enable_gpu_search ? "1" : "0" ) ch_versions = ch_versions.mix(MMSEQS_COLABFOLDSEARCH.out.versions) From ce9959cfa4349dd8cd08727c7da7048a29962add Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 12:00:26 +1100 Subject: [PATCH 28/53] feat(mmseqs): add default database prefixes to dbs.config --- conf/dbs.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conf/dbs.config b/conf/dbs.config index 684548dfa..ea7441657 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -83,7 +83,9 @@ params { // Colabfold paths colabfold_envdb_path = "${params.colabfold_db}/colabfold_envdb/*" + colabfold_envdb_path = "${params.colabfold_db}/colabfold_envdb_padded/*" colabfold_uniref30_path = "${params.colabfold_db}/colabfold_uniref30/*" + colabfold_uniref30_path = "${params.colabfold_db}/colabfold_uniref30_padded/*" // Are all these params options needed? colabfold_alphafold2_params_tags = [ "alphafold2_multimer_v1" : "alphafold_params_colab_2021-10-27", From 32453a2a1b67e070d7d2c2b0a8ad3cd9c6665ee3 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 12:46:37 +1100 Subject: [PATCH 29/53] fix(mmseqs): fix config variable names --- conf/dbs.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index ea7441657..789d057ca 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -83,9 +83,9 @@ params { // Colabfold paths colabfold_envdb_path = "${params.colabfold_db}/colabfold_envdb/*" - colabfold_envdb_path = "${params.colabfold_db}/colabfold_envdb_padded/*" + colabfold_envdb_path_padded = "${params.colabfold_db}/colabfold_envdb_padded/*" colabfold_uniref30_path = "${params.colabfold_db}/colabfold_uniref30/*" - colabfold_uniref30_path = "${params.colabfold_db}/colabfold_uniref30_padded/*" + colabfold_uniref30_path_padded = "${params.colabfold_db}/colabfold_uniref30_padded/*" // Are all these params options needed? colabfold_alphafold2_params_tags = [ "alphafold2_multimer_v1" : "alphafold_params_colab_2021-10-27", From 45c09f257ec31dad8486189a103f47b061b56936 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 13:04:18 +1100 Subject: [PATCH 30/53] fix(mmseqs): update docker path to temporary docker registry --- modules/local/mmseqs_colabfoldsearch/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/mmseqs_colabfoldsearch/main.nf b/modules/local/mmseqs_colabfoldsearch/main.nf index a80f9bb20..c27cb61db 100644 --- a/modules/local/mmseqs_colabfoldsearch/main.nf +++ b/modules/local/mmseqs_colabfoldsearch/main.nf @@ -3,7 +3,7 @@ process MMSEQS_COLABFOLDSEARCH { label 'process_high_memory' label 'process_high' - container "/home/z3545907/mmseqs_colabfoldsearch.sif" + container "docker.io/nbtmsh/mmseqs_colabfoldsearch:latest" input: tuple val(meta), path(fasta) From d1ee15cda6d5dc0f3eb17806ecd42d9a9b7a2937 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 13:39:03 +1100 Subject: [PATCH 31/53] feat(mmseqs): update changelog to include contributions --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7f77b6d3..eaaa24cdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -113,6 +113,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[PR #492](https://github.com/nf-core/proteinfold/pulls/492)] - Clean TODOs from code and create issues instead for 2.0.0 release preparation. - [[PR #493](https://github.com/nf-core/proteinfold/pulls/493)] - Standardise Dockerfiles labels and bump version 2.0.0 to prepare release. - [[#494](https://github.com/nf-core/proteinfold/issues/494)] - Publish Colabfold DBs when downloaded to be directly consumable using `colabfold_db` parameter. +- [[PR #497](https://github.com/nf-core/proteinfold/pull/497)] - Add MMseqs Colabfold Search GPU Support. ### Parameters From f2b0600fbd0fba098be0393b08e568965d81a872 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 13:41:51 +1100 Subject: [PATCH 32/53] feat(mmseqs): update usage documentation to include colabfold_enable_gpu_search flag --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 0bd3680f0..f7f53e98d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -183,7 +183,7 @@ To provide the predownloaded AlphaFold3 databases and parameters you can specify -Colabfold mode can be used with local database search using the following command: +Colabfold mode can be used with local database search. GPU MSA search is enabeld by default. You may disable GPU MSA search with `--colabfold_enable_gpu_search false`: ```bash nextflow run nf-core/proteinfold \ From 96812dba177a888503f46211d088d143e98589f7 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 13:46:09 +1100 Subject: [PATCH 33/53] feat(mmseqs): update nextflow schema to include new flags --- nextflow_schema.json | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 40b72bdd2..c0094dc9a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1364,5 +1364,19 @@ { "$ref": "#/$defs/helixfold3_dbs_and_parameters_paths_options" } - ] + ], + "properties": { + "colabfold_enable_gpu_search": { + "type": "boolean", + "default": true + }, + "colabfold_envdb_path_padded": { + "type": "string", + "default": "null/colabfold_envdb_padded/*" + }, + "colabfold_uniref30_path_padded": { + "type": "string", + "default": "null/colabfold_uniref30_padded/*" + } + } } From 7e168d51d75fa07fb394fd282448d822b533e852 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 14:04:01 +1100 Subject: [PATCH 34/53] feat(mmseqs): update existing tests to disable gpu search --- tests/colabfold_local.nf.test | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/colabfold_local.nf.test b/tests/colabfold_local.nf.test index 8b6d1a511..68afbf55d 100644 --- a/tests/colabfold_local.nf.test +++ b/tests/colabfold_local.nf.test @@ -11,6 +11,7 @@ nextflow_pipeline { when { params { outdir = "$outputDir" + colabfold_enable_gpu_search = false } } From ab3f11a0595f83e6f410e93d34d431241ed44aee Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 14:04:22 +1100 Subject: [PATCH 35/53] feat(mmseqs): add new test where GPU search is enabled --- tests/colabfold_local_gpu.nf.test | 38 +++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 tests/colabfold_local_gpu.nf.test diff --git a/tests/colabfold_local_gpu.nf.test b/tests/colabfold_local_gpu.nf.test new file mode 100644 index 000000000..8b6d1a511 --- /dev/null +++ b/tests/colabfold_local_gpu.nf.test @@ -0,0 +1,38 @@ +nextflow_pipeline { + + name "Test colabfold local mode stub" + script "../main.nf" + tag "pipeline" + tag "test_colabfold_local" + profile "test_colabfold_local" + + test("-profile test_colabfold_local") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // Early failure no need to test the rest of snapshots + assert workflow.success + assertAll( + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_proteinfold_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} From 42089559f809cdb32219e19be2dece1d0caae5e7 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 27 Feb 2026 14:19:17 +1100 Subject: [PATCH 36/53] revert(mmseqs): disable gpu search by default --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 51872f0e0..dec78b162 100644 --- a/nextflow.config +++ b/nextflow.config @@ -110,7 +110,7 @@ params { colabfold_db_load_mode = 0 colabfold_use_templates = false colabfold_create_index = false - colabfold_enable_gpu_search = true + colabfold_enable_gpu_search = false // Colabfold links colabfold_db_link = null From 4a823e322df9d977df06f7b867952e9fa5ef8592 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Wed, 11 Mar 2026 09:57:15 +1100 Subject: [PATCH 37/53] feat(gpu-docs): add inital documentation for gpu databases --- docs/gpu-dbs.md | 80 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 docs/gpu-dbs.md diff --git a/docs/gpu-dbs.md b/docs/gpu-dbs.md new file mode 100644 index 000000000..11a9e6770 --- /dev/null +++ b/docs/gpu-dbs.md @@ -0,0 +1,80 @@ +# Using padded databases in proteinfold +Proteinfold can make use of GPU MSA search for faster searching. However, this requires creating padded databases for the GPU hardware you wish to use. + +## Requirements +- mmseqs-gpu +- uniref30 database +- colabfold envdb database +- NVIDIA Ampere GPU or newer +- CUDA 12.4 or newer + +## Database structure +Proteinfold can make use of the `--db` flag to load in all required databases. In order to load the padded databases, the database should be structured as such. Below is a truncated version of the database tree. It is important to note that the padded database files have the same prefix as the CPU files. +``` +. +├── boltz1.ckpt +├── ccd.pkl +├── colabfold_envdb +│   ├── colabfold_envdb_202108_sample_h.tsv +│   ├── colabfold_envdb_202108_sample_seq.tsv +│   └── colabfold_envdb_202108_sample.tsv +├── colabfold_envdb_padded +│   ├── colabfold_envdb_202108_db_seq_h.index +│   ├── colabfold_envdb_202108_db_seq.index +│   └── colabfold_envdb_202108_db_seq.lookup +├── colabfold_uniref30 +│   ├── uniref30_2302_db_seq_h.dbtype +│   ├── uniref30_2302_db_seq_h.index +│   ├── uniref30_2302_db_seq.index +│   └── uniref30_2302_db_taxonomy +├── colabfold_uniref30_gpu +│   ├── uniref30_2302_db.idx.index +│   ├── uniref30_2302_db.index +│   └── uniref30_2302_db.lookup +├── colabfold_uniref30.old +│   ├── uniref30_2302_db_seq.7 +│   ├── uniref30_2302_db_seq.dbtype +│   └── uniref30_2302_db_seq.index +├── colabfold_uniref30_padded +│   ├── uniref30_2302_db_seq_h.index +│   ├── uniref30_2302_db_seq.index +│   └── uniref30_2302_db_taxonomy +├── mgnify +│   └── mgy_clusters.fa +├── pdb100 +│   ├── pdb100_2021Mar03_pdb.ffdata +│   └── pdb100_2021Mar03_pdb.ffindex +├── pdb70 +│   ├── pdb70_hhm.ffindex +│   └── pdb_filter.dat +├── pdb_mmcif +│   ├── mmcif_files +│   └── obsolete.dat +├── pdb_seqres +│   └── pdb_seqres.txt +├── rfam +│   └── Rfam-14.9_rep_seq.fasta +├── small_bfd +│   └── bfd-first_non_consensus_sequences.fasta +├── uniprot +│   └── uniprot.fasta +├── uniref30 +│   └── UniRef30_2023_02_hhm.ffindex +└── uniref90 + └── uniref90.fasta +``` + +## Obtaining MMseqs-GPU +MMseqs has two x86 builds available for Linux. This requires the GPU version of MMseqs. It can be obtained via this command: + +```bash +wget https://github.com/soedinglab/MMseqs2/releases/download/18-8cc5c/mmseqs-linux-gpu.tar.gz +tar xvf mmseqs-linux-gpu.tar.gz +``` + +## Downloading UniRef30 Database +Firstly, you must obtain the UniRef database. The database file is approx. 55GB. +```bash +wget https://opendata.mmseqs.org/colabfold/uniref30_2302.db.tar.gz +tar xvf uniref30_2302.db.tar.gz +``` From c1e51de49284d44724dd690dc5f10df855cbe478 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Wed, 11 Mar 2026 13:12:13 +1100 Subject: [PATCH 38/53] feat(gpu): update gpu databases --- docs/gpu-dbs.md | 152 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 151 insertions(+), 1 deletion(-) diff --git a/docs/gpu-dbs.md b/docs/gpu-dbs.md index 11a9e6770..07c69a509 100644 --- a/docs/gpu-dbs.md +++ b/docs/gpu-dbs.md @@ -73,8 +73,158 @@ tar xvf mmseqs-linux-gpu.tar.gz ``` ## Downloading UniRef30 Database -Firstly, you must obtain the UniRef database. The database file is approx. 55GB. +Firstly, you must obtain the UniRef database. The database file is approx. 55GB. You may be able to get faster downloads by using `aria2c` with the `-x 8` option. ```bash wget https://opendata.mmseqs.org/colabfold/uniref30_2302.db.tar.gz tar xvf uniref30_2302.db.tar.gz ``` + +## Downloading the Colabfold EnvDB +Next, you will need to download the Colabfold EnvDB. This database is approx. 120GB. +```bash +wget https://opendata.mmseqs.org/colabfold/colabfold_envdb_202108.db.tar.gz +tar xvf colabfold_envdb_202108.db.tar.gz + +``` + +## CPU Database structure +By now, your directory structure should look something like this +[T.B.D.] + +## Create padded database +Next, we need to create the padded databases. For this, it is recommended to duplicate the databases. +```bash +mkdir colabfold_uniref30_padded +mmseqs makepaddedseqdb ./colabfold_uniref30/uniref30_2302_db_seq ./colabfold_uniref30_padded/uniref30_2302_db_seq +mmseqs makepaddedseqdb ./colabfold_uniref30/uniref30_2302_db ./colabfold_uniref30_padded/uniref30_2302_db +mkdir colabfold_envdb_padded +~/app/mmseqs/mmseqs/bin/mmseqs makepaddedseqdb ./colabfold_envdb/colabfold_envdb_202108_db ./colabfold_envdb_padded/colabfold_envdb_202108_db +~/app/mmseqs/mmseqs/bin/mmseqs makepaddedseqdb ./colabfold_envdb/colabfold_envdb_202108_db_seq ./colabfold_envdb_padded/colabfold_envdb_202108_db_seq +cp ./colabfold_envdb/colabfold_envdb_202108_db_aln.* ./colabfold_envdb_padded/ +``` + +You should now have a directory structure that looks something similar to this +``` +. +├── colabfold_envdb +│   ├── colabfold_envdb_202108_db.0 +│   ├── colabfold_envdb_202108_db.1 +│   ├── colabfold_envdb_202108_db.10 +│   ├── colabfold_envdb_202108_db.11 +│   ├── colabfold_envdb_202108_db.12 +│   ├── colabfold_envdb_202108_db.13 +│   ├── colabfold_envdb_202108_db.14 +│   ├── colabfold_envdb_202108_db.15 +│   ├── colabfold_envdb_202108_db.2 +│   ├── colabfold_envdb_202108_db.3 +│   ├── colabfold_envdb_202108_db.4 +│   ├── colabfold_envdb_202108_db.5 +│   ├── colabfold_envdb_202108_db.6 +│   ├── colabfold_envdb_202108_db.7 +│   ├── colabfold_envdb_202108_db.8 +│   ├── colabfold_envdb_202108_db.9 +│   ├── colabfold_envdb_202108_db_aln.0 +│   ├── colabfold_envdb_202108_db_aln.1 +│   ├── colabfold_envdb_202108_db_aln.10 +│   ├── colabfold_envdb_202108_db_aln.11 +│   ├── colabfold_envdb_202108_db_aln.12 +│   ├── colabfold_envdb_202108_db_aln.13 +│   ├── colabfold_envdb_202108_db_aln.14 +│   ├── colabfold_envdb_202108_db_aln.15 +│   ├── colabfold_envdb_202108_db_aln.2 +│   ├── colabfold_envdb_202108_db_aln.3 +│   ├── colabfold_envdb_202108_db_aln.4 +│   ├── colabfold_envdb_202108_db_aln.5 +│   ├── colabfold_envdb_202108_db_aln.6 +│   ├── colabfold_envdb_202108_db_aln.7 +│   ├── colabfold_envdb_202108_db_aln.8 +│   ├── colabfold_envdb_202108_db_aln.9 +│   ├── colabfold_envdb_202108_db_aln.dbtype +│   ├── colabfold_envdb_202108_db_aln.index +│   ├── colabfold_envdb_202108_db.dbtype +│   ├── colabfold_envdb_202108_db_h +│   ├── colabfold_envdb_202108_db_h.dbtype +│   ├── colabfold_envdb_202108_db_h.index +│   ├── colabfold_envdb_202108_db.idx +│   ├── colabfold_envdb_202108_db.idx.dbtype +│   ├── colabfold_envdb_202108_db.idx.index +│   ├── colabfold_envdb_202108_db.index +│   ├── colabfold_envdb_202108_db_seq.0 +│   ├── colabfold_envdb_202108_db_seq.1 +│   ├── colabfold_envdb_202108_db_seq.10 +│   ├── colabfold_envdb_202108_db_seq.11 +│   ├── colabfold_envdb_202108_db_seq.12 +│   ├── colabfold_envdb_202108_db_seq.13 +│   ├── colabfold_envdb_202108_db_seq.14 +│   ├── colabfold_envdb_202108_db_seq.15 +│   ├── colabfold_envdb_202108_db_seq.2 +│   ├── colabfold_envdb_202108_db_seq.3 +│   ├── colabfold_envdb_202108_db_seq.4 +│   ├── colabfold_envdb_202108_db_seq.5 +│   ├── colabfold_envdb_202108_db_seq.6 +│   ├── colabfold_envdb_202108_db_seq.7 +│   ├── colabfold_envdb_202108_db_seq.8 +│   ├── colabfold_envdb_202108_db_seq.9 +│   ├── colabfold_envdb_202108_db_seq.dbtype +│   ├── colabfold_envdb_202108_db_seq_h +│   ├── colabfold_envdb_202108_db_seq_h.dbtype +│   ├── colabfold_envdb_202108_db_seq_h.index +│   ├── colabfold_envdb_202108_db_seq.index +│   ├── colabfold_envdb_202108_sample_aln.tsv +│   ├── colabfold_envdb_202108_sample_h.tsv +│   ├── colabfold_envdb_202108_sample_seq.tsv +│   └── colabfold_envdb_202108_sample.tsv +├── colabfold_envdb_padded +│   ├── colabfold_envdb_202108_db +│   ├── colabfold_envdb_202108_db.dbtype +│   ├── colabfold_envdb_202108_db_h +│   ├── colabfold_envdb_202108_db_h.dbtype +│   ├── colabfold_envdb_202108_db_h.index +│   ├── colabfold_envdb_202108_db.index +│   ├── colabfold_envdb_202108_db.lookup +│   ├── colabfold_envdb_202108_db_seq +│   ├── colabfold_envdb_202108_db_seq.dbtype +│   ├── colabfold_envdb_202108_db_seq_h +│   ├── colabfold_envdb_202108_db_seq_h.dbtype +│   ├── colabfold_envdb_202108_db_seq_h.index +│   ├── colabfold_envdb_202108_db_seq.index +│   └── colabfold_envdb_202108_db_seq.lookup +├── colabfold_uniref30 +│   ├── uniref30_2302_db +│   ├── uniref30_2302_db_aln +│   ├── uniref30_2302_db_aln.dbtype +│   ├── uniref30_2302_db_aln.index +│   ├── uniref30_2302_db.dbtype +│   ├── uniref30_2302_db.GPU_READY +│   ├── uniref30_2302_db_h +│   ├── uniref30_2302_db_h.dbtype +│   ├── uniref30_2302_db_h.index +│   ├── uniref30_2302_db.idx +│   ├── uniref30_2302_db.idx.dbtype +│   ├── uniref30_2302_db.idx.index +│   ├── uniref30_2302_db.index +│   ├── uniref30_2302_db.lookup +│   ├── uniref30_2302_db_mapping +│   ├── uniref30_2302_db_seq +│   ├── uniref30_2302_db_seq.dbtype +│   ├── uniref30_2302_db_seq_h +│   ├── uniref30_2302_db_seq_h.dbtype +│   ├── uniref30_2302_db_seq_h.index +│   ├── uniref30_2302_db_seq.index +│   └── uniref30_2302_db_taxonomy +└── colabfold_uniref30_padded + ├── uniref30_2302_db + ├── uniref30_2302_db.dbtype + ├── uniref30_2302_db_h + ├── uniref30_2302_db_h.dbtype + ├── uniref30_2302_db_h.index + ├── uniref30_2302_db.index + ├── uniref30_2302_db.lookup + ├── uniref30_2302_db_seq + ├── uniref30_2302_db_seq.dbtype + ├── uniref30_2302_db_seq_h + ├── uniref30_2302_db_seq_h.dbtype + ├── uniref30_2302_db_seq_h.index + ├── uniref30_2302_db_seq.index + └── uniref30_2302_db_seq.lookup +``` From eb5afc1ae6a9e020903a5bf89cf3d916bd99a26d Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Wed, 11 Mar 2026 13:30:09 +1100 Subject: [PATCH 39/53] feat(docs): add examples --- docs/gpu-dbs.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/gpu-dbs.md b/docs/gpu-dbs.md index 07c69a509..ebec3f1b4 100644 --- a/docs/gpu-dbs.md +++ b/docs/gpu-dbs.md @@ -228,3 +228,18 @@ You should now have a directory structure that looks something similar to this ├── uniref30_2302_db_seq.index └── uniref30_2302_db_seq.lookup ``` + +## Running colabfold +You will need to set the `--colabfold_enable_gpu_search true` flag. Below is an example command you can use to run with GPU search enabled: +```bash +nextflow run ./main.nf \ + --input "samplesheet.csv" \ + --outdir "output" \ + --mode "colabfold" \ + --use_gpu \ + --db /path/to/db/root \ + --use_msa_server false \ + --colabfold_enable_gpu_search true \ + --colabfold_model_preset alphafold2_ptm +``` + From e41c5d865a049f96b9755c47864ba276d94575aa Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Wed, 11 Mar 2026 13:33:15 +1100 Subject: [PATCH 40/53] revert(module): remove mmseqs/makepaddedseqdb --- .../mmseqs/makepaddedseqdb/environment.yml | 7 -- .../nf-core/mmseqs/makepaddedseqdb/main.nf | 51 --------------- .../nf-core/mmseqs/makepaddedseqdb/meta.yml | 64 ------------------- .../mmseqs/makepaddedseqdb/tests/main.nf.test | 49 -------------- .../makepaddedseqdb/tests/main.nf.test.snap | 36 ----------- .../makepaddedseqdb/tests/nextflow.config | 5 -- 6 files changed, 212 deletions(-) delete mode 100644 modules/nf-core/mmseqs/makepaddedseqdb/environment.yml delete mode 100644 modules/nf-core/mmseqs/makepaddedseqdb/main.nf delete mode 100644 modules/nf-core/mmseqs/makepaddedseqdb/meta.yml delete mode 100644 modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test delete mode 100644 modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test.snap delete mode 100644 modules/nf-core/mmseqs/makepaddedseqdb/tests/nextflow.config diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/environment.yml b/modules/nf-core/mmseqs/makepaddedseqdb/environment.yml deleted file mode 100644 index 072223f2a..000000000 --- a/modules/nf-core/mmseqs/makepaddedseqdb/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -channels: - - conda-forge - - bioconda -dependencies: - - bioconda::mmseqs2=18.8cc5c diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/main.nf b/modules/nf-core/mmseqs/makepaddedseqdb/main.nf deleted file mode 100644 index 61857c179..000000000 --- a/modules/nf-core/mmseqs/makepaddedseqdb/main.nf +++ /dev/null @@ -1,51 +0,0 @@ -process MMSEQS_MAKEPADDEDSEQDB { - tag "${meta.id}" - label 'process_low' - conda "${moduleDir}/environment.yml" - - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/fe/fe49c17754753d6cd9a31e5894117edaf1c81e3d6053a12bf6dc8f3af1dffe23/data' - : 'community.wave.seqera.io/library/mmseqs2:18.8cc5c--af05c9a98d9f6139'}" - - input: - tuple val(meta), path(db_in) - - output: - tuple val(meta), path("${prefix}/"), emit: db_padded - tuple val("${task.process}"), val('mmseqs'), eval('mmseqs version'), topic: versions, emit: versions_mmseqs - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '*.dbtype' - prefix = task.ext.prefix ?: "${meta.id}" - if ("${db_in}" == "${prefix}") { - error("Input and output names of databases are the same, set prefix in module configuration to disambiguate!") - } - """ - DB_TARGET_PATH_NAME=\$(find -L "${db_in}/" -maxdepth 1 -name "${args2}" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) - mkdir -p ${prefix} - mmseqs \\ - makepaddedseqdb \\ - \$DB_TARGET_PATH_NAME \\ - ${prefix}/${prefix} \\ - ${args} - """ - - stub: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - """ - echo ${args} - mkdir -p ${prefix} - touch ${prefix}/${prefix} - touch ${prefix}/${prefix}.dbtype - touch ${prefix}/${prefix}.index - touch ${prefix}/${prefix}.lookup - touch ${prefix}/${prefix}_h - touch ${prefix}/${prefix}_h.dbtype - touch ${prefix}/${prefix}_h.index - """ -} diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/meta.yml b/modules/nf-core/mmseqs/makepaddedseqdb/meta.yml deleted file mode 100644 index 8858d9bb9..000000000 --- a/modules/nf-core/mmseqs/makepaddedseqdb/meta.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: "mmseqs_makepaddedseqdb" -description: Create an MMseqs padded database from an existing MMseqs database -keywords: - - protein sequence - - databases - - clustering - - searching - - indexing - - mmseqs2 -tools: - - "mmseqs": - description: "MMseqs2: ultra fast and sensitive sequence search and clustering - suite" - homepage: "https://github.com/soedinglab/MMseqs2" - documentation: "https://mmseqs.com/latest/userguide.pdf" - tool_dev_url: "https://github.com/soedinglab/MMseqs2" - doi: "10.1093/bioinformatics/btw006" - licence: - - "GPL v3" - identifier: biotools:mmseqs -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test', single_end:false ]` - - db_in: - type: directory - description: Input of existing MMseqs database -output: - db_padded: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test', single_end:false ]` - - "${prefix}/": - type: directory - description: The padded MMseqs2 database - versions_mmseqs: - - - ${task.process}: - type: string - description: The name of the process - - mmseqs: - type: string - description: The name of the tool - - mmseqs version: - type: eval - description: The expression to obtain the version of the tool -topics: - versions: - - - ${task.process}: - type: string - description: The name of the process - - mmseqs: - type: string - description: The name of the tool - - mmseqs version: - type: eval - description: The expression to obtain the version of the tool -authors: - - "@nbtm-sh" -maintainers: - - "@nbtm-sh" diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test b/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test deleted file mode 100644 index 6c3ae4b7e..000000000 --- a/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test +++ /dev/null @@ -1,49 +0,0 @@ -nextflow_process { - - name "Test Process MMSEQS_MAKEPADDEDSEQDB" - script "../main.nf" - process "MMSEQS_MAKEPADDEDSEQDB" - tag "modules" - tag "modules_nfcore" - tag "mmseqs" - tag "mmseqs/makepaddedseqdb" - tag "mmseqs/createdb" - - config "./nextflow.config" - - setup { - run("MMSEQS_CREATEDB") { - script "../../../mmseqs/createdb/main.nf" - process { - """ - input[0] = [ [ id:'test_query' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true) - ] - """ - } - } - } - - test("mmseqs_db sarscov2 contigs") { - - when { - params { - module_prefix = "test_query_gpu" - } - process { - """ - input[0] = MMSEQS_CREATEDB.out.db - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(sanitizeOutput(process.out)).match() - } - ) - } - - } -} diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test.snap b/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test.snap deleted file mode 100644 index 8ce6f93de..000000000 --- a/modules/nf-core/mmseqs/makepaddedseqdb/tests/main.nf.test.snap +++ /dev/null @@ -1,36 +0,0 @@ -{ - "mmseqs_db sarscov2 contigs": { - "content": [ - { - "db_padded": [ - [ - { - "id": "test_query" - }, - [ - "test_query_gpu:md5,5b24585ba92fd826c78b8664c63b4e95", - "test_query_gpu.dbtype:md5,01d39098f2bfee5c808a3b4ff54deac2", - "test_query_gpu.index:md5,5946b4989d08320d9daca503155ba693", - "test_query_gpu.lookup:md5,3eb85c645034a0717db62ef0a3da5479", - "test_query_gpu_h:md5,a9fca4931be476b8f302cc27b5dff9b0", - "test_query_gpu_h.dbtype:md5,740bab4f9ec8808aedb68d6b1281aeb2", - "test_query_gpu_h.index:md5,ce0ca30c2e57677077cc23823ef17206" - ] - ] - ], - "versions_mmseqs": [ - [ - "MMSEQS_MAKEPADDEDSEQDB", - "mmseqs", - "18.8cc5c" - ] - ] - } - ], - "timestamp": "2026-02-25T10:33:19.910807101", - "meta": { - "nf-test": "0.9.4", - "nextflow": "25.04.6" - } - } -} \ No newline at end of file diff --git a/modules/nf-core/mmseqs/makepaddedseqdb/tests/nextflow.config b/modules/nf-core/mmseqs/makepaddedseqdb/tests/nextflow.config deleted file mode 100644 index 735a3eb09..000000000 --- a/modules/nf-core/mmseqs/makepaddedseqdb/tests/nextflow.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: "MMSEQS_MAKEPADDEDSEQDB" { - ext.prefix = params.module_prefix - } -} From 6523786ccc47ca0e7d35b2666462c940c6962dd8 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Wed, 11 Mar 2026 13:48:47 +1100 Subject: [PATCH 41/53] revert(modules): remove mmseqs/makepaddedseqdb --- modules.json | 51 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/modules.json b/modules.json index baca22a62..53254aec4 100644 --- a/modules.json +++ b/modules.json @@ -8,51 +8,62 @@ "aria2": { "branch": "master", "git_sha": "7d0ccc097c60d23a195099b326a5882e135c7949", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/aria2/aria2.diff" }, "foldseek/easysearch": { "branch": "master", "git_sha": "a02efd7783000a416d5d2f1b2bc86b8d41b9f439", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/foldseek/easysearch/foldseek-easysearch.diff" }, "gunzip": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mmseqs/createindex": { "branch": "master", "git_sha": "38697a933bef7041bb935c9b8374d9948ce6c794", - "installed_by": ["modules"] - }, - "mmseqs/makepaddedseqdb": { - "branch": "master", - "git_sha": "40a31ec6d922ce6022a3a78f9f1e832b33de6a6d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mmseqs/tsv2exprofiledb": { "branch": "master", "git_sha": "151460db852d636979d9ff3ee631e2268060d4c3", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "f0719ae309075ae4a291533883847c3f7c441dad", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/multiqc/multiqc.diff" }, "untar": { "branch": "master", "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/untar/untar.diff" }, "unzip": { "branch": "master", "git_sha": "4dd9d8439a429c7ee566e0e2347f76ddeef27e66", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -61,20 +72,26 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "65f5e638d901a51534c68fd5c1c19e8112fb4df1", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "fdc08b8b1ae74f56686ce21f7ea11ad11990ce57", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file From 3efd7dcfd5527ee0cec8982304e5ece49eed2460 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Wed, 11 Mar 2026 14:09:32 +1100 Subject: [PATCH 42/53] fix(schema): update schema to default to false --- modules.json | 46 ++++++++++++-------------------------------- nextflow_schema.json | 4 +--- 2 files changed, 13 insertions(+), 37 deletions(-) diff --git a/modules.json b/modules.json index 53254aec4..387857a74 100644 --- a/modules.json +++ b/modules.json @@ -8,62 +8,46 @@ "aria2": { "branch": "master", "git_sha": "7d0ccc097c60d23a195099b326a5882e135c7949", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/aria2/aria2.diff" }, "foldseek/easysearch": { "branch": "master", "git_sha": "a02efd7783000a416d5d2f1b2bc86b8d41b9f439", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/foldseek/easysearch/foldseek-easysearch.diff" }, "gunzip": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "mmseqs/createindex": { "branch": "master", "git_sha": "38697a933bef7041bb935c9b8374d9948ce6c794", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "mmseqs/tsv2exprofiledb": { "branch": "master", "git_sha": "151460db852d636979d9ff3ee631e2268060d4c3", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "f0719ae309075ae4a291533883847c3f7c441dad", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/multiqc/multiqc.diff" }, "untar": { "branch": "master", "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/untar/untar.diff" }, "unzip": { "branch": "master", "git_sha": "4dd9d8439a429c7ee566e0e2347f76ddeef27e66", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -72,26 +56,20 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "65f5e638d901a51534c68fd5c1c19e8112fb4df1", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "fdc08b8b1ae74f56686ce21f7ea11ad11990ce57", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} diff --git a/nextflow_schema.json b/nextflow_schema.json index c0094dc9a..38e3c2a16 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -945,7 +945,6 @@ } } }, - "generic_options": { "title": "Generic options", "type": "object", @@ -1367,8 +1366,7 @@ ], "properties": { "colabfold_enable_gpu_search": { - "type": "boolean", - "default": true + "type": "boolean" }, "colabfold_envdb_path_padded": { "type": "string", From 0e064ff3c44e4d301e18153e8a9704b1ebd6145e Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Wed, 11 Mar 2026 14:16:01 +1100 Subject: [PATCH 43/53] fix(docs): pretty --- docs/gpu-dbs.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/docs/gpu-dbs.md b/docs/gpu-dbs.md index ebec3f1b4..bdf210ad0 100644 --- a/docs/gpu-dbs.md +++ b/docs/gpu-dbs.md @@ -1,7 +1,9 @@ # Using padded databases in proteinfold + Proteinfold can make use of GPU MSA search for faster searching. However, this requires creating padded databases for the GPU hardware you wish to use. ## Requirements + - mmseqs-gpu - uniref30 database - colabfold envdb database @@ -9,7 +11,9 @@ Proteinfold can make use of GPU MSA search for faster searching. However, this r - CUDA 12.4 or newer ## Database structure + Proteinfold can make use of the `--db` flag to load in all required databases. In order to load the padded databases, the database should be structured as such. Below is a truncated version of the database tree. It is important to note that the padded database files have the same prefix as the CPU files. + ``` . ├── boltz1.ckpt @@ -65,6 +69,7 @@ Proteinfold can make use of the `--db` flag to load in all required databases. I ``` ## Obtaining MMseqs-GPU + MMseqs has two x86 builds available for Linux. This requires the GPU version of MMseqs. It can be obtained via this command: ```bash @@ -73,14 +78,18 @@ tar xvf mmseqs-linux-gpu.tar.gz ``` ## Downloading UniRef30 Database + Firstly, you must obtain the UniRef database. The database file is approx. 55GB. You may be able to get faster downloads by using `aria2c` with the `-x 8` option. + ```bash wget https://opendata.mmseqs.org/colabfold/uniref30_2302.db.tar.gz tar xvf uniref30_2302.db.tar.gz ``` ## Downloading the Colabfold EnvDB + Next, you will need to download the Colabfold EnvDB. This database is approx. 120GB. + ```bash wget https://opendata.mmseqs.org/colabfold/colabfold_envdb_202108.db.tar.gz tar xvf colabfold_envdb_202108.db.tar.gz @@ -88,11 +97,14 @@ tar xvf colabfold_envdb_202108.db.tar.gz ``` ## CPU Database structure + By now, your directory structure should look something like this [T.B.D.] ## Create padded database + Next, we need to create the padded databases. For this, it is recommended to duplicate the databases. + ```bash mkdir colabfold_uniref30_padded mmseqs makepaddedseqdb ./colabfold_uniref30/uniref30_2302_db_seq ./colabfold_uniref30_padded/uniref30_2302_db_seq @@ -104,6 +116,7 @@ cp ./colabfold_envdb/colabfold_envdb_202108_db_aln.* ./colabfold_envdb_padded/ ``` You should now have a directory structure that looks something similar to this + ``` . ├── colabfold_envdb @@ -230,7 +243,9 @@ You should now have a directory structure that looks something similar to this ``` ## Running colabfold + You will need to set the `--colabfold_enable_gpu_search true` flag. Below is an example command you can use to run with GPU search enabled: + ```bash nextflow run ./main.nf \ --input "samplesheet.csv" \ @@ -242,4 +257,3 @@ nextflow run ./main.nf \ --colabfold_enable_gpu_search true \ --colabfold_model_preset alphafold2_ptm ``` - From eed63bd06e82324ddb516db5672d34b5a630ba16 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Wed, 11 Mar 2026 14:32:50 +1100 Subject: [PATCH 44/53] fix(pretty): mmseqs config file --- conf/modules_colabfold.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules_colabfold.config b/conf/modules_colabfold.config index 53a9f2b80..bbb77dfb4 100644 --- a/conf/modules_colabfold.config +++ b/conf/modules_colabfold.config @@ -43,7 +43,7 @@ process { if (params.use_gpu) { accelerator = 1 } - + ext.args = [ params.use_gpu ? '--use-gpu-relax' : '', params.colabfold_use_amber ? '--amber' : '', From 95428afe35fc03944c68c0d7bf74ce455ce5c376 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Wed, 11 Mar 2026 14:34:01 +1100 Subject: [PATCH 45/53] revert(mmseqs): remove mmseqs makepaddedseqdb --- subworkflows/local/prepare_colabfold_dbs.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/subworkflows/local/prepare_colabfold_dbs.nf b/subworkflows/local/prepare_colabfold_dbs.nf index 311dfeb92..2630631a6 100644 --- a/subworkflows/local/prepare_colabfold_dbs.nf +++ b/subworkflows/local/prepare_colabfold_dbs.nf @@ -3,7 +3,6 @@ // include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_COLABFOLDDB } from '../../modules/nf-core/mmseqs/createindex/main' include { MMSEQS_CREATEINDEX as MMSEQS_CREATEINDEX_UNIPROT30 } from '../../modules/nf-core/mmseqs/createindex/main' -include { MMSEQS_MAKEPADDEDSEQDB as MMSEQS_MAKEPADDEDSEQDB_UNIPROT30_PADDED } from '../../modules/nf-core/mmseqs/makepaddedseqdb/main' include { ARIA2_UNCOMPRESS as ARIA2_COLABFOLD_PARAMS } from './aria2_uncompress' include { ARIA2_UNCOMPRESS as ARIA2_COLABFOLD_DB } from './aria2_uncompress' From 68ab6d757e79b3bd975246d2b0c696e8f4bd9ab4 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 12 Mar 2026 09:31:30 +1100 Subject: [PATCH 46/53] fix(docs): fix mmseqs documentation --- docs/gpu-dbs.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/gpu-dbs.md b/docs/gpu-dbs.md index bdf210ad0..95a0340e7 100644 --- a/docs/gpu-dbs.md +++ b/docs/gpu-dbs.md @@ -110,8 +110,8 @@ mkdir colabfold_uniref30_padded mmseqs makepaddedseqdb ./colabfold_uniref30/uniref30_2302_db_seq ./colabfold_uniref30_padded/uniref30_2302_db_seq mmseqs makepaddedseqdb ./colabfold_uniref30/uniref30_2302_db ./colabfold_uniref30_padded/uniref30_2302_db mkdir colabfold_envdb_padded -~/app/mmseqs/mmseqs/bin/mmseqs makepaddedseqdb ./colabfold_envdb/colabfold_envdb_202108_db ./colabfold_envdb_padded/colabfold_envdb_202108_db -~/app/mmseqs/mmseqs/bin/mmseqs makepaddedseqdb ./colabfold_envdb/colabfold_envdb_202108_db_seq ./colabfold_envdb_padded/colabfold_envdb_202108_db_seq +mmseqs makepaddedseqdb ./colabfold_envdb/colabfold_envdb_202108_db ./colabfold_envdb_padded/colabfold_envdb_202108_db +mmseqs makepaddedseqdb ./colabfold_envdb/colabfold_envdb_202108_db_seq ./colabfold_envdb_padded/colabfold_envdb_202108_db_seq cp ./colabfold_envdb/colabfold_envdb_202108_db_aln.* ./colabfold_envdb_padded/ ``` From b6508530a2f4c876d009b3fc8e42c0f29b26f60f Mon Sep 17 00:00:00 2001 From: Nathan Date: Fri, 13 Mar 2026 09:37:24 +1100 Subject: [PATCH 47/53] Update docs/usage.md Co-authored-by: Keiran Rowell <42729651+keiran-rowell@users.noreply.github.com> --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index f7f53e98d..1c8511c7e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -183,7 +183,7 @@ To provide the predownloaded AlphaFold3 databases and parameters you can specify -Colabfold mode can be used with local database search. GPU MSA search is enabeld by default. You may disable GPU MSA search with `--colabfold_enable_gpu_search false`: +Colabfold mode can be used with local database search. GPU MSA search is enabled by default. You may disable GPU MSA search with `--colabfold_enable_gpu_search false`: ```bash nextflow run nf-core/proteinfold \ From d0ce03db1b487711e6bfb8e232d1f804376fa7d4 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 13 Mar 2026 09:46:27 +1100 Subject: [PATCH 48/53] fix(docs): remove unused files --- docs/gpu-dbs.md | 41 ++++------------------------------------- 1 file changed, 4 insertions(+), 37 deletions(-) diff --git a/docs/gpu-dbs.md b/docs/gpu-dbs.md index 95a0340e7..f0da5b5c3 100644 --- a/docs/gpu-dbs.md +++ b/docs/gpu-dbs.md @@ -16,8 +16,6 @@ Proteinfold can make use of the `--db` flag to load in all required databases. I ``` . -├── boltz1.ckpt -├── ccd.pkl ├── colabfold_envdb │   ├── colabfold_envdb_202108_sample_h.tsv │   ├── colabfold_envdb_202108_sample_seq.tsv @@ -31,41 +29,10 @@ Proteinfold can make use of the `--db` flag to load in all required databases. I │   ├── uniref30_2302_db_seq_h.index │   ├── uniref30_2302_db_seq.index │   └── uniref30_2302_db_taxonomy -├── colabfold_uniref30_gpu -│   ├── uniref30_2302_db.idx.index -│   ├── uniref30_2302_db.index -│   └── uniref30_2302_db.lookup -├── colabfold_uniref30.old -│   ├── uniref30_2302_db_seq.7 -│   ├── uniref30_2302_db_seq.dbtype -│   └── uniref30_2302_db_seq.index -├── colabfold_uniref30_padded -│   ├── uniref30_2302_db_seq_h.index -│   ├── uniref30_2302_db_seq.index -│   └── uniref30_2302_db_taxonomy -├── mgnify -│   └── mgy_clusters.fa -├── pdb100 -│   ├── pdb100_2021Mar03_pdb.ffdata -│   └── pdb100_2021Mar03_pdb.ffindex -├── pdb70 -│   ├── pdb70_hhm.ffindex -│   └── pdb_filter.dat -├── pdb_mmcif -│   ├── mmcif_files -│   └── obsolete.dat -├── pdb_seqres -│   └── pdb_seqres.txt -├── rfam -│   └── Rfam-14.9_rep_seq.fasta -├── small_bfd -│   └── bfd-first_non_consensus_sequences.fasta -├── uniprot -│   └── uniprot.fasta -├── uniref30 -│   └── UniRef30_2023_02_hhm.ffindex -└── uniref90 - └── uniref90.fasta +└── colabfold_uniref30_padded +    ├── uniref30_2302_db_seq_h.index +    ├── uniref30_2302_db_seq.index +    └── uniref30_2302_db_taxonomy ``` ## Obtaining MMseqs-GPU From c2d830e0cb7178524adf85679c370ad2feb1498a Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 13 Mar 2026 09:47:23 +1100 Subject: [PATCH 49/53] feat(tests): add gpu tag ot colabfold local test --- tests/colabfold_local_gpu.nf.test | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/colabfold_local_gpu.nf.test b/tests/colabfold_local_gpu.nf.test index 8b6d1a511..eced61bb3 100644 --- a/tests/colabfold_local_gpu.nf.test +++ b/tests/colabfold_local_gpu.nf.test @@ -3,6 +3,7 @@ nextflow_pipeline { name "Test colabfold local mode stub" script "../main.nf" tag "pipeline" + tag "gpu" tag "test_colabfold_local" profile "test_colabfold_local" From 78164b39ca7a5325e2b419d570455a37a68e7607 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 13 Mar 2026 09:51:19 +1100 Subject: [PATCH 50/53] fix(docs): emphasise setting flags to use gpu search --- docs/gpu-dbs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/gpu-dbs.md b/docs/gpu-dbs.md index f0da5b5c3..33d938c53 100644 --- a/docs/gpu-dbs.md +++ b/docs/gpu-dbs.md @@ -1,6 +1,6 @@ # Using padded databases in proteinfold -Proteinfold can make use of GPU MSA search for faster searching. However, this requires creating padded databases for the GPU hardware you wish to use. +Proteinfold can make use of GPU MSA search for faster searching. However, this requires creating padded databases for the GPU hardware you wish to use, and setting the appropriate flags. ## Requirements From 189ae02c64cbc720fec9baa225838c5a7163036b Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 13 Mar 2026 09:54:00 +1100 Subject: [PATCH 51/53] fix(docs): fix caps --- docs/gpu-dbs.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/gpu-dbs.md b/docs/gpu-dbs.md index 33d938c53..c3051c094 100644 --- a/docs/gpu-dbs.md +++ b/docs/gpu-dbs.md @@ -53,9 +53,9 @@ wget https://opendata.mmseqs.org/colabfold/uniref30_2302.db.tar.gz tar xvf uniref30_2302.db.tar.gz ``` -## Downloading the Colabfold EnvDB +## Downloading the Colabfold envdb -Next, you will need to download the Colabfold EnvDB. This database is approx. 120GB. +Next, you will need to download the Colabfold envdb. This database is approx. 120GB. ```bash wget https://opendata.mmseqs.org/colabfold/colabfold_envdb_202108.db.tar.gz From bda02f9daa0aef703c594130fd8174f3024663c4 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 13 Mar 2026 09:54:15 +1100 Subject: [PATCH 52/53] feat(docs): add cpu database structure --- docs/gpu-dbs.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/gpu-dbs.md b/docs/gpu-dbs.md index c3051c094..bb571a1df 100644 --- a/docs/gpu-dbs.md +++ b/docs/gpu-dbs.md @@ -66,8 +66,19 @@ tar xvf colabfold_envdb_202108.db.tar.gz ## CPU Database structure By now, your directory structure should look something like this -[T.B.D.] +``` +. +├── colabfold_envdb +│   ├── colabfold_envdb_202108_sample_h.tsv +│   ├── colabfold_envdb_202108_sample_seq.tsv +│   └── colabfold_envdb_202108_sample.tsv +└── colabfold_uniref30 +    ├── uniref30_2302_db_seq_h.dbtype +    ├── uniref30_2302_db_seq_h.index +    ├── uniref30_2302_db_seq.index +    └── uniref30_2302_db_taxonomy +``` ## Create padded database Next, we need to create the padded databases. For this, it is recommended to duplicate the databases. From 363236bc6da203e54d06607f4fe180f47346a9db Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 13 Mar 2026 09:55:20 +1100 Subject: [PATCH 53/53] fix(docs): move obtaining gpu-mmseqs step --- docs/gpu-dbs.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/gpu-dbs.md b/docs/gpu-dbs.md index bb571a1df..bc2ceaf17 100644 --- a/docs/gpu-dbs.md +++ b/docs/gpu-dbs.md @@ -10,6 +10,15 @@ Proteinfold can make use of GPU MSA search for faster searching. However, this r - NVIDIA Ampere GPU or newer - CUDA 12.4 or newer +## Obtaining MMseqs-GPU + +MMseqs has two x86 builds available for Linux. This requires the GPU version of MMseqs. It can be obtained via this command: + +```bash +wget https://github.com/soedinglab/MMseqs2/releases/download/18-8cc5c/mmseqs-linux-gpu.tar.gz +tar xvf mmseqs-linux-gpu.tar.gz +``` + ## Database structure Proteinfold can make use of the `--db` flag to load in all required databases. In order to load the padded databases, the database should be structured as such. Below is a truncated version of the database tree. It is important to note that the padded database files have the same prefix as the CPU files. @@ -35,15 +44,6 @@ Proteinfold can make use of the `--db` flag to load in all required databases. I    └── uniref30_2302_db_taxonomy ``` -## Obtaining MMseqs-GPU - -MMseqs has two x86 builds available for Linux. This requires the GPU version of MMseqs. It can be obtained via this command: - -```bash -wget https://github.com/soedinglab/MMseqs2/releases/download/18-8cc5c/mmseqs-linux-gpu.tar.gz -tar xvf mmseqs-linux-gpu.tar.gz -``` - ## Downloading UniRef30 Database Firstly, you must obtain the UniRef database. The database file is approx. 55GB. You may be able to get faster downloads by using `aria2c` with the `-x 8` option.