From 5f76d5f8999724431ddd7dd5150b960af785547c Mon Sep 17 00:00:00 2001
From: "keiran.rowell" <k.rowell@unsw.edu.au>
Date: Wed, 28 Jan 2026 13:58:15 +1100
Subject: [PATCH 01/19] Bulk MultiQC reporting as a module

---
 modules/nf-core/multiqc/environment.yml |   2 +
 multiqc_proteinfold/__init__.py         |   6 +
 multiqc_proteinfold/multiqc_config.yaml |   7 +
 multiqc_proteinfold/proteinfold.py      | 271 ++++++++++++++++++++++++
 setup.py                                |  28 +++
 5 files changed, 314 insertions(+)
 create mode 100644 multiqc_proteinfold/__init__.py
 create mode 100644 multiqc_proteinfold/multiqc_config.yaml
 create mode 100644 multiqc_proteinfold/proteinfold.py
 create mode 100644 setup.py

diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml
index d02016a00..757dd1bc2 100644
--- a/modules/nf-core/multiqc/environment.yml
+++ b/modules/nf-core/multiqc/environment.yml
@@ -5,3 +5,5 @@ channels:
   - bioconda
 dependencies:
   - bioconda::multiqc=1.32
+  - pip
+    - ${projectDir} # Install proteinfold_multiqc as a local plugin
diff --git a/multiqc_proteinfold/__init__.py b/multiqc_proteinfold/__init__.py
new file mode 100644
index 000000000..d57194bdf
--- /dev/null
+++ b/multiqc_proteinfold/__init__.py
@@ -0,0 +1,6 @@
+"""MultiQC plugin for proteinfold pipeline outputs converting metrics to simple tsvs."""
+
+from .proteinfold import MultiqcModule
+
+__version__ = '0.1.0'
+__all__ = ["MultiqcModule"]
diff --git a/multiqc_proteinfold/multiqc_config.yaml b/multiqc_proteinfold/multiqc_config.yaml
new file mode 100644
index 000000000..78e16e57b
--- /dev/null
+++ b/multiqc_proteinfold/multiqc_config.yaml
@@ -0,0 +1,7 @@
+sp:
+  proteinfold:
+    fn: '*_{plddt,msa,ptm,iptm,pae}.tsv'
+
+custom_logo: "/srv/scratch/z3374843/MultiQC/multiqc/logo/SBF_logo.png"
+custom_logo_url: "https://doi.org/10.26190/4KQF-M552"
+custom_logo_title: "Structural Biology Facility - Mark Wainwright Analytical Centre - University of New South Wales"
diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py
new file mode 100644
index 000000000..c196b5200
--- /dev/null
+++ b/multiqc_proteinfold/proteinfold.py
@@ -0,0 +1,271 @@
+from pathlib import Path
+import pandas as pd
+
+from multiqc.base_module import BaseMultiqcModule, ModuleNoSamplesFound
+from multiqc.plots import bargraph, linegraph
+from multiqc import config  # I want the ranks to merge by default, not a user problem
+from multiqc.plots.table_object import ColumnDict
+
+from typing import Dict, Any, cast
+
+
+class MultiqcModule(BaseMultiqcModule):
+    """
+    The module parses results generated by a variety of protein structure prediction programs in the [ProteinFold](https://nf-co.re/proteinfold/) pipeline.
+    This includes (as of release v2.0):
+        - [AlphaFold2](https://github.com/google-deepmind/alphafold)
+        - [AlphaFold3](https://github.com/google-deepmind/alphafold3)
+        - [ColabFold](https://github.com/sokrypton/ColabFold)
+        - [ESMFold](https://github.com/facebookresearch/esm)
+        - [RoseTTaFold-All-Atom](https://github.com/baker-laboratory/RoseTTAFold-All-Atom)
+        - [RoseTTaFold2-Nucleic-Acids](https://github.com/uw-ipd/RoseTTAFold2NA) [Wait on merge]
+        - [HelixFold3](https://github.com/PaddlePaddle/PaddleHelix/tree/dev/apps/protein_folding/helixfold3)
+        - [Boltz](https://github.com/jwohlwend/boltz)
+
+    This is intended to provide a summary of useful metrics for mass 'folding' a large set of proteins, either in terms of fishing for mulitmer interactions or comparing methods across whole proteomes.
+    It provides a visual 'at-a-glance' report of relevant metrics (average pLDDT, ipTM, *etc*) and does not replace the per-protein interactive plot from GENEREATE_REPORT in  nfcore/proteinfold
+    """
+
+    def __init__(self):
+        super(MultiqcModule, self).__init__(
+            name="ProteinFold",
+            anchor="proteinfold",
+            href=[
+                "https://nf-co.re/proteinfold",
+                "https://github.com/google-deepmind/alphafold",
+                "https://github.com/google-deepmind/alphafold3",
+                "https://github.com/sokrypton/ColabFold",
+                "https://github.com/facebookresearch/esm",
+                "https://github.com/baker-laboratory/RoseTTAFold-All-Atom",
+                "https://github.com/uw-ipd/RoseTTAFold2NA",
+                "https://github.com/PaddlePaddle/PaddleHelix/tree/dev/apps/protein_folding/helixfold3",
+                "https://github.com/jwohlwend/boltz",
+            ],
+            info="ProteinFold - protein structure inference methods through a single nextflow pipeline interface",
+            doi=[
+                "10.1038/s41586-021-03819-2",
+                "10.1038/s41592-022-01488-1",
+                "10.1126/science.ade2574",
+                "10.1126/science.adl2528",
+                "10.1038/s41592-023-02086-5",
+                "10.48550/arXiv.2408.16975",
+                "10.1101/2024.11.19.624167",
+            ],
+        )
+
+        # Want to treat the ranked inference runs as 'sub-samples' for grouping logic, even if not separate files
+        if not hasattr(config, "table_sample_merge"):
+            config.table_sample_merge = {}
+
+        # Some codes generated 5 inferences for 5 models and all 25 are processed. If a user sets more they're an expert and can custom handle
+        config.table_sample_merge = {
+            "rank_0": ["_rank_0"],
+            "rank_1": ["_rank_1"],
+            "rank_2": ["_rank_2"],
+            "rank_3": ["_rank_3"],
+            "rank_4": ["_rank_4"],
+            "rank_5": ["_rank_5"],
+            "rank_6": ["_rank_6"],
+            "rank_7": ["_rank_7"],
+            "rank_8": ["_rank_8"],
+            "rank_9": ["_rank_9"],
+            "rank_10": ["_rank_10"],
+            "rank_11": ["_rank_11"],
+            "rank_12": ["_rank_12"],
+            "rank_13": ["_rank_13"],
+            "rank_14": ["_rank_14"],
+            "rank_15": ["_rank_15"],
+            "rank_16": ["_rank_16"],
+            "rank_17": ["_rank_17"],
+            "rank_18": ["_rank_18"],
+            "rank_19": ["_rank_19"],
+            "rank_20": ["_rank_20"],
+            "rank_21": ["_rank_21"],
+            "ranmodek_22": ["_rank_22"],
+            "rank_23": ["_rank_23"],
+            "rank_24": ["_rank_24"],
+        }
+
+        mode_dict = {
+            "alphafold2": "AlphaFold2",
+            "alphafold3": "AlphaFold3",
+            "colabfold": "ColabFold",
+            "esmfold": "ESMFold",
+            "rosettafold2na": "RoseTTAFold2-Nucleic-Acids",
+            "rosettafold_all_atom": "RoseTTAFold-All-Atom",
+            "helixfold3": "HelixFold3",
+            "boltz": "Boltz",
+        }
+
+        self.proteinfold_data: Dict[str, Any] = {}
+        # I want to enable sample grouping: https://docs.seqera.io/multiqc/reports/customisation#sample-grouping
+
+        for f in self.find_log_files("proteinfold"):
+            self.add_data_source(f)
+
+            raw_samplename = f["s_name"].split("_")[0]
+            filepath = Path(f["root"]) / f["fn"]
+            mode = "UNKNOWN"
+            for parent in filepath.parents:
+                if parent.name in mode_dict:  # traverse up the filepath until you hit a mode labelled dir
+                    mode = mode_dict[parent.name]
+                    break
+
+            mode_samplename = f"{raw_samplename}_{mode}"
+            samplename = self.clean_s_name(mode_samplename, f)
+            print(filepath)
+            print(samplename)
+
+            self.proteinfold_data.setdefault(samplename, {})  # Set default creates if doesn't already exist
+
+            if f["fn"].endswith("_msa.tsv"):
+                self.proteinfold_data[samplename]["msa_depth"] = f["f"].count("\n")
+
+            if f["fn"].endswith("_plddt.tsv"):
+                df = pd.read_csv(filepath, sep="\t")
+                rank_cols = [col for col in df.columns if col.startswith("rank_")]
+
+                # Full plddt data frame for plotting purposes
+                plddt_data = {col: df.set_index("Positions")[col].to_dict() for col in rank_cols}
+                self.proteinfold_data[samplename]["plddt"] = plddt_data
+
+                rank_means = df[rank_cols].mean().to_dict()
+                # Parent sample entry should still have the top ranked value
+                self.proteinfold_data[samplename]["mean_plddt"] = rank_means.get("rank_0")
+
+                for rank in rank_cols:
+                    rank_num = rank.split("rank_")[1]
+                    subsample = f"{samplename}_rank_{rank_num}"
+
+                    self.proteinfold_data.setdefault(subsample, {})
+                    self.proteinfold_data[subsample]["mean_plddt"] = rank_means[rank]
+
+            if f["fn"].endswith("_iptm.tsv") and not f["fn"].endswith("_chainwise_iptm.tsv"):
+                iptm_series = cast(
+                    pd.Series, pd.read_csv(filepath, sep="\t", header=None, index_col=0).squeeze(axis=1)
+                )  # Squeeze makes a series accessible on index label
+                if (
+                    0 in iptm_series.index
+                ):  # Since pandas infers the index as an int this is an exact int match not a greedy string match
+                    self.proteinfold_data[samplename]["iptm"] = iptm_series.loc[
+                        0
+                    ]  # Remember loc is an *int* match on rank 0 index
+
+                for rank_num in iptm_series.index:
+                    subsample = f"{samplename}_rank_{rank_num}"
+                    self.proteinfold_data.setdefault(subsample, {})["iptm"] = iptm_series.loc[rank_num]
+
+            if f["fn"].endswith("_ptm.tsv") and not f["fn"].endswith("_chainwise_ptm.tsv"):
+                ptm_series = cast(
+                    pd.Series, pd.read_csv(filepath, sep="\t", header=None, index_col=0).squeeze(axis=1)
+                )  # Squeeze on axis avoids int conversion for single entries
+                if 0 in ptm_series.index:
+                    self.proteinfold_data[samplename]["ptm"] = ptm_series.loc[0]
+
+                for rank_num in ptm_series.index:
+                    subsample = f"{samplename}_rank_{rank_num}"
+                    self.proteinfold_data.setdefault(subsample, {})["ptm"] = ptm_series.loc[rank_num]
+
+        self.write_data_file(
+            self.proteinfold_data, "proteinfold_data"
+        )  # I want to structure and rename from avg_plDDT to summary_stats
+        self.general_stats_table()
+        # Togglable plDDT by residue plots of all ranks
+        self.plddt_line_plot()
+
+    def general_stats_table(self):
+        """
+        Put protein structure prediction metrics into a general table for all different Deep Learning methods
+        """
+        # Check for empy metrics to drop those columns where not appropriate
+
+        has_iptm = any(
+            sample_data.get("iptm") and sample_data.get("iptm") != 0.0 for sample_data in self.proteinfold_data.values()
+        )
+        has_ptm = any(
+            sample_data.get("ptm") and sample_data.get("ptm") != 0.0 for sample_data in self.proteinfold_data.values()
+        )
+
+        headers: Dict[str, ColumnDict] = {
+            "msa_depth": {
+                "title": "Related sequence depth (MSA)",
+                "description": "The number of related sequences (across the whole protein) that could be retrieved from the MSA (Multiple Sequence Alignment) stage",
+            },
+            "mean_plddt": {
+                "title": "Structure confidence (average pLDDT)",
+                "description": "Structure prediction confidence score across all residues in the top ranked protein structure - from the mean pLDDT (predicted Local Distance Difference Test) value",
+                "max": 100,
+                "min": 0,
+                "cond_formatting_rules": {
+                    "very-low": [{"lt": 50}],
+                    "low": [{"gt": 50}, {"lt": 70}],
+                    "high": [{"gt": 70}, {"lt": 90}],
+                    "very-high": [{"gt": 90}],
+                },
+                "cond_formatting_colours": [
+                    {"very-low": "#f0743e"},
+                    {"low": "#f9d613"},
+                    {"high": "#60c2e8"},
+                    {"very-high": "#014ecc"},
+                ],
+            },
+        }
+
+        if has_iptm:
+            headers["iptm"] = {
+                "title": "Interface accuracy (ipTM)",
+                "description": "Accuracy of the relative positions of two protein subunits from a mulitmer calcuation - from the ipTM (interface predicted Template Modelling) score",
+                "max": 1,
+                "min": 0,
+                "format": "{:,.2f}",
+                "scale": "Purples",
+            }
+
+        if has_ptm:
+            headers["ptm"] = {
+                "title": "Global accuracy (TM)",
+                "description": "Global accuracy of the protein folded, less sensitive to localised inaccuracies than raw 3D atomic deviations (RMSD) - from the pTM (predicted Template Modelling) score",
+                "max": 1,
+                "min": 0,
+                "format": "{:,.2f}",
+                "scale": "Blues",
+            }
+
+        self.general_stats_addcols(self.proteinfold_data, headers)
+
+    def plddt_line_plot(self):
+        """Line plot showing pLDDT confidence across residue position of selected sample, for all ranks"""
+
+        parent_samples = {}
+
+        for sample, metrics in self.proteinfold_data.items():
+            if "plddt" in metrics:
+                if "_rank_" not in sample:  # The parent sample already has the plddt data for all ranks
+                    parent_samples[sample] = metrics["plddt"]
+
+        data_labels = []  # Need a data_labels list for the sample switcher
+        plot_data_list = []
+
+        # The populated data_labels plot config section is what the switcher uses
+        for parent_sample, rank_data in parent_samples.items():
+            data_labels.append({"name": parent_sample, "ylab": "pLDDT score"})
+            plot_data_list.append(rank_data)
+
+        pconfig = {
+            "id": "proteinfold_plddt_lineplot",
+            "title": "ProteinFold: pLDDT by Position",
+            "xlab": "Residue Position",
+            "ylab": "pLDDT Score",
+            "ymin": 0,
+            "ymax": 100,
+            "data_labels": data_labels,
+        }
+
+        plot_html = linegraph.plot(plot_data_list, pconfig)
+
+        self.add_section(
+            name="pLDDT Confidence by residue",
+            anchor="proteinfold-plddt-per-res",
+            description="Per-residue confidence scores across all predicted ranks",
+            plot=plot_html,
+        )
diff --git a/setup.py b/setup.py
new file mode 100644
index 000000000..c0402302f
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,28 @@
+from setuptools import setup, find_packages
+
+setup(
+    name='multiqc-proteinfold',
+    version='0.1.0',
+    author='Keiran Rowell',
+    author_email='k.rowell@unsw.edu.au',
+    description='MultiQC plugin for proteinfold pipeline metric tsv outputs',
+    url='https://github.com/nf-core/proteinfold',
+    packages=find_packages(),
+    include_package_data=True,
+    entry_points={
+        'multiqc.modules.v1': [
+            'proteinfold = multiqc_proteinfold.proteinfold:MultiqcModule',
+        ],
+    },
+    install_requires=[
+        'multiqc>=1.15',
+        'pandas',
+    ],
+    classifiers=[
+        'Development Status :: 4 - Beta',
+        'Intended Audience :: Science/Research',
+        'License :: OSI Approved :: MIT License',
+        'Programming Language :: Python :: 3',
+    ],
+    python_requires='>=3.8',
+)

From 74337a52ab3016891a8337ca909abe16a7a72aeb Mon Sep 17 00:00:00 2001
From: "keiran.rowell" <k.rowell@unsw.edu.au>
Date: Wed, 28 Jan 2026 14:09:41 +1100
Subject: [PATCH 02/19] Line which install multiqc from setup.py module

---
 modules/nf-core/multiqc/main.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf
index 8f2086b4f..d2862ffc1 100644
--- a/modules/nf-core/multiqc/main.nf
+++ b/modules/nf-core/multiqc/main.nf
@@ -32,6 +32,7 @@ process MULTIQC {
     def replace = replace_names ? "--replace-names ${replace_names}" : ''
     def samples = sample_names ? "--sample-names ${sample_names}" : ''
     """
+    pip install ${workflow.projectDir}
     multiqc \\
         --force \\
         $args \\

From 8d3bf30b4a08d7e6609b983b84f0e5bd6e1ddee0 Mon Sep 17 00:00:00 2001
From: "keiran.rowell" <k.rowell@unsw.edu.au>
Date: Wed, 28 Jan 2026 14:13:48 +1100
Subject: [PATCH 03/19] Make prettier happy...

---
 multiqc_proteinfold/multiqc_config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multiqc_proteinfold/multiqc_config.yaml b/multiqc_proteinfold/multiqc_config.yaml
index 78e16e57b..fd662497b 100644
--- a/multiqc_proteinfold/multiqc_config.yaml
+++ b/multiqc_proteinfold/multiqc_config.yaml
@@ -1,6 +1,6 @@
 sp:
   proteinfold:
-    fn: '*_{plddt,msa,ptm,iptm,pae}.tsv'
+    fn: "*_{plddt,msa,ptm,iptm,pae}.tsv"
 
 custom_logo: "/srv/scratch/z3374843/MultiQC/multiqc/logo/SBF_logo.png"
 custom_logo_url: "https://doi.org/10.26190/4KQF-M552"

From 652b877970e27ae5ce49fd433ef8991889e24900 Mon Sep 17 00:00:00 2001
From: "keiran.rowell" <k.rowell@unsw.edu.au>
Date: Wed, 28 Jan 2026 16:21:32 +1100
Subject: [PATCH 04/19] Add the multiqc proteinfold extra config channel

---
 main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index 9c95ae0f1..9c72b965a 100644
--- a/main.nf
+++ b/main.nf
@@ -578,7 +578,7 @@ workflow NFCORE_PROTEINFOLD {
     }
 
     ch_multiqc_config        = channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true).first()
-    ch_multiqc_custom_config = params.multiqc_config ? channel.fromPath( params.multiqc_config ).first()  : channel.empty()
+    ch_multiqc_custom_config = params.multiqc_config ? channel.fromPath( params.multiqc_config ).first()  : channel.fromPath("${projectDir}/multiqc_proteinfold/multiqc_proteinfold_config.yml")
     ch_multiqc_logo          = params.multiqc_logo   ? channel.fromPath( params.multiqc_logo ).first()    : channel.empty()
     ch_multiqc_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
     ch_report_template     = channel.value(file("$projectDir/assets/report_template.html", checkIfExists: true))

From 429ee78fe6fe8d9940fba2e90ffd4002081c24f2 Mon Sep 17 00:00:00 2001
From: Keiran Rowell <54380465+keiran-rowell-unsw@users.noreply.github.com>
Date: Mon, 30 Mar 2026 15:08:23 +1100
Subject: [PATCH 05/19] rank typo

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 multiqc_proteinfold/proteinfold.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py
index c196b5200..598529c22 100644
--- a/multiqc_proteinfold/proteinfold.py
+++ b/multiqc_proteinfold/proteinfold.py
@@ -81,7 +81,7 @@ def __init__(self):
             "rank_19": ["_rank_19"],
             "rank_20": ["_rank_20"],
             "rank_21": ["_rank_21"],
-            "ranmodek_22": ["_rank_22"],
+            "rank_22": ["_rank_22"],
             "rank_23": ["_rank_23"],
             "rank_24": ["_rank_24"],
         }

From b98d5c76dee85f7d654774692f8e27ae668fb6a8 Mon Sep 17 00:00:00 2001
From: Keiran Rowell <54380465+keiran-rowell-unsw@users.noreply.github.com>
Date: Mon, 30 Mar 2026 15:08:48 +1100
Subject: [PATCH 06/19] use logger not print

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 multiqc_proteinfold/proteinfold.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py
index 598529c22..e5026500a 100644
--- a/multiqc_proteinfold/proteinfold.py
+++ b/multiqc_proteinfold/proteinfold.py
@@ -113,8 +113,8 @@ def __init__(self):
 
             mode_samplename = f"{raw_samplename}_{mode}"
             samplename = self.clean_s_name(mode_samplename, f)
-            print(filepath)
-            print(samplename)
+            self.log.debug(filepath)
+            self.log.debug(samplename)
 
             self.proteinfold_data.setdefault(samplename, {})  # Set default creates if doesn't already exist
 

From 3e2c9946329c69c0871915bbbaabeeb274589452 Mon Sep 17 00:00:00 2001
From: Keiran Rowell <54380465+keiran-rowell-unsw@users.noreply.github.com>
Date: Mon, 30 Mar 2026 15:09:43 +1100
Subject: [PATCH 07/19] remove unused multiqc modules

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 multiqc_proteinfold/proteinfold.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py
index e5026500a..1e2eb3cd3 100644
--- a/multiqc_proteinfold/proteinfold.py
+++ b/multiqc_proteinfold/proteinfold.py
@@ -1,8 +1,8 @@
 from pathlib import Path
 import pandas as pd
 
-from multiqc.base_module import BaseMultiqcModule, ModuleNoSamplesFound
-from multiqc.plots import bargraph, linegraph
+from multiqc.base_module import BaseMultiqcModule
+from multiqc.plots import linegraph
 from multiqc import config  # I want the ranks to merge by default, not a user problem
 from multiqc.plots.table_object import ColumnDict
 

From 9cc19118fa78bb835fd8009e699e925c8e1cde6c Mon Sep 17 00:00:00 2001
From: Keiran Rowell <54380465+keiran-rowell-unsw@users.noreply.github.com>
Date: Mon, 30 Mar 2026 15:10:30 +1100
Subject: [PATCH 08/19] typos

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 multiqc_proteinfold/proteinfold.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py
index 1e2eb3cd3..48f61926b 100644
--- a/multiqc_proteinfold/proteinfold.py
+++ b/multiqc_proteinfold/proteinfold.py
@@ -22,8 +22,8 @@ class MultiqcModule(BaseMultiqcModule):
         - [HelixFold3](https://github.com/PaddlePaddle/PaddleHelix/tree/dev/apps/protein_folding/helixfold3)
         - [Boltz](https://github.com/jwohlwend/boltz)
 
-    This is intended to provide a summary of useful metrics for mass 'folding' a large set of proteins, either in terms of fishing for mulitmer interactions or comparing methods across whole proteomes.
-    It provides a visual 'at-a-glance' report of relevant metrics (average pLDDT, ipTM, *etc*) and does not replace the per-protein interactive plot from GENEREATE_REPORT in  nfcore/proteinfold
+    This is intended to provide a summary of useful metrics for mass 'folding' a large set of proteins, either in terms of fishing for multimer interactions or comparing methods across whole proteomes.
+    It provides a visual 'at-a-glance' report of relevant metrics (average pLDDT, ipTM, *etc*) and does not replace the per-protein interactive plot from GENERATE_REPORT in  nfcore/proteinfold
     """
 
     def __init__(self):

From 07d32a54b64d83870e10dd06700dcb1bd05f399f Mon Sep 17 00:00:00 2001
From: Keiran Rowell <54380465+keiran-rowell-unsw@users.noreply.github.com>
Date: Mon, 30 Mar 2026 15:11:36 +1100
Subject: [PATCH 09/19] --user in pip install for containers in docker etc

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 modules/nf-core/multiqc/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf
index d2862ffc1..1d1a9ae0d 100644
--- a/modules/nf-core/multiqc/main.nf
+++ b/modules/nf-core/multiqc/main.nf
@@ -32,7 +32,7 @@ process MULTIQC {
     def replace = replace_names ? "--replace-names ${replace_names}" : ''
     def samples = sample_names ? "--sample-names ${sample_names}" : ''
     """
-    pip install ${workflow.projectDir}
+    PYTHONUSERBASE="\$PWD/.multiqc_plugins" pip install --user ${workflow.projectDir}
     multiqc \\
         --force \\
         $args \\

From 5e31fd979a98866ce921b03430d0c55d3f20f70f Mon Sep 17 00:00:00 2001
From: Keiran Rowell <54380465+keiran-rowell-unsw@users.noreply.github.com>
Date: Mon, 30 Mar 2026 15:11:58 +1100
Subject: [PATCH 10/19] Remove facility logo for codebase PR

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 multiqc_proteinfold/multiqc_config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multiqc_proteinfold/multiqc_config.yaml b/multiqc_proteinfold/multiqc_config.yaml
index fd662497b..a3d6bde1c 100644
--- a/multiqc_proteinfold/multiqc_config.yaml
+++ b/multiqc_proteinfold/multiqc_config.yaml
@@ -2,6 +2,6 @@ sp:
   proteinfold:
     fn: "*_{plddt,msa,ptm,iptm,pae}.tsv"
 
-custom_logo: "/srv/scratch/z3374843/MultiQC/multiqc/logo/SBF_logo.png"
+custom_logo: "path/to/custom/logo.png"
 custom_logo_url: "https://doi.org/10.26190/4KQF-M552"
 custom_logo_title: "Structural Biology Facility - Mark Wainwright Analytical Centre - University of New South Wales"

From e9fdc14f72e8589cdf8f73c4bd452b773ddd19fe Mon Sep 17 00:00:00 2001
From: Keiran Rowell <54380465+keiran-rowell-unsw@users.noreply.github.com>
Date: Mon, 30 Mar 2026 15:13:09 +1100
Subject: [PATCH 11/19] typos

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 multiqc_proteinfold/proteinfold.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py
index 48f61926b..40aeba041 100644
--- a/multiqc_proteinfold/proteinfold.py
+++ b/multiqc_proteinfold/proteinfold.py
@@ -214,7 +214,7 @@ def general_stats_table(self):
         if has_iptm:
             headers["iptm"] = {
                 "title": "Interface accuracy (ipTM)",
-                "description": "Accuracy of the relative positions of two protein subunits from a mulitmer calcuation - from the ipTM (interface predicted Template Modelling) score",
+                "description": "Accuracy of the relative positions of two protein subunits from a multimer calculation - from the ipTM (interface predicted Template Modelling) score",
                 "max": 1,
                 "min": 0,
                 "format": "{:,.2f}",

From dc729086ac10db9e8be60914635e7421020993a6 Mon Sep 17 00:00:00 2001
From: "keiran.rowell" <k.rowell@unsw.edu.au>
Date: Mon, 30 Mar 2026 15:30:11 +1100
Subject: [PATCH 12/19] makesure user can specify sample merge rules

---
 multiqc_proteinfold/proteinfold.py | 35 +++++-------------------------
 1 file changed, 6 insertions(+), 29 deletions(-)

diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py
index 40aeba041..94b2b7caa 100644
--- a/multiqc_proteinfold/proteinfold.py
+++ b/multiqc_proteinfold/proteinfold.py
@@ -58,34 +58,11 @@ def __init__(self):
             config.table_sample_merge = {}
 
         # Some codes generated 5 inferences for 5 models and all 25 are processed. If a user sets more they're an expert and can custom handle
-        config.table_sample_merge = {
-            "rank_0": ["_rank_0"],
-            "rank_1": ["_rank_1"],
-            "rank_2": ["_rank_2"],
-            "rank_3": ["_rank_3"],
-            "rank_4": ["_rank_4"],
-            "rank_5": ["_rank_5"],
-            "rank_6": ["_rank_6"],
-            "rank_7": ["_rank_7"],
-            "rank_8": ["_rank_8"],
-            "rank_9": ["_rank_9"],
-            "rank_10": ["_rank_10"],
-            "rank_11": ["_rank_11"],
-            "rank_12": ["_rank_12"],
-            "rank_13": ["_rank_13"],
-            "rank_14": ["_rank_14"],
-            "rank_15": ["_rank_15"],
-            "rank_16": ["_rank_16"],
-            "rank_17": ["_rank_17"],
-            "rank_18": ["_rank_18"],
-            "rank_19": ["_rank_19"],
-            "rank_20": ["_rank_20"],
-            "rank_21": ["_rank_21"],
-            "rank_22": ["_rank_22"],
-            "rank_23": ["_rank_23"],
-            "rank_24": ["_rank_24"],
-        }
-
+        rank_merge = {f"rank_{i}": [f"_rank_{i}"] for i in range(25)}
+        
+        # Load user sample merge config in preference to rank_N default ranking, if user config exists
+        config.table_sample_merge = {**rank_merge, **getattr(config, "table_sample_merge", {})}
+        
         mode_dict = {
             "alphafold2": "AlphaFold2",
             "alphafold3": "AlphaFold3",
@@ -103,7 +80,7 @@ def __init__(self):
         for f in self.find_log_files("proteinfold"):
             self.add_data_source(f)
 
-            raw_samplename = f["s_name"].split("_")[0]
+            raw_samplename = f["s_name"].split("rank_")[0]
             filepath = Path(f["root"]) / f["fn"]
             mode = "UNKNOWN"
             for parent in filepath.parents:

From a9a894e31b22ff47c3fefbd953f07ee2eb2e6f96 Mon Sep 17 00:00:00 2001
From: "keiran.rowell" <k.rowell@unsw.edu.au>
Date: Mon, 30 Mar 2026 15:30:50 +1100
Subject: [PATCH 13/19] Find multiqc yml packages, pip install -e was doing
 this for me silently

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index c0402302f..9cf7ec497 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@
     author_email='k.rowell@unsw.edu.au',
     description='MultiQC plugin for proteinfold pipeline metric tsv outputs',
     url='https://github.com/nf-core/proteinfold',
-    packages=find_packages(),
+    packages={"multiqc_proteinfold": ["*.yaml"]},
     include_package_data=True,
     entry_points={
         'multiqc.modules.v1': [

From d96418e332ad117f3b750ac37aedda91b77ba6fe Mon Sep 17 00:00:00 2001
From: "keiran.rowell" <k.rowell@unsw.edu.au>
Date: Mon, 30 Mar 2026 15:33:15 +1100
Subject: [PATCH 14/19] trailling whitespace...........

---
 multiqc_proteinfold/proteinfold.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py
index 94b2b7caa..90fbc0302 100644
--- a/multiqc_proteinfold/proteinfold.py
+++ b/multiqc_proteinfold/proteinfold.py
@@ -59,10 +59,10 @@ def __init__(self):
 
         # Some codes generated 5 inferences for 5 models and all 25 are processed. If a user sets more they're an expert and can custom handle
         rank_merge = {f"rank_{i}": [f"_rank_{i}"] for i in range(25)}
-        
+
         # Load user sample merge config in preference to rank_N default ranking, if user config exists
         config.table_sample_merge = {**rank_merge, **getattr(config, "table_sample_merge", {})}
-        
+
         mode_dict = {
             "alphafold2": "AlphaFold2",
             "alphafold3": "AlphaFold3",

From cbb27d2854b419e4a273c1628ed0d01d1e2694c8 Mon Sep 17 00:00:00 2001
From: "keiran.rowell" <k.rowell@unsw.edu.au>
Date: Mon, 30 Mar 2026 15:39:02 +1100
Subject: [PATCH 15/19] remove SBF logo and custom links

---
 multiqc_proteinfold/multiqc_config.yaml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/multiqc_proteinfold/multiqc_config.yaml b/multiqc_proteinfold/multiqc_config.yaml
index a3d6bde1c..8324880ec 100644
--- a/multiqc_proteinfold/multiqc_config.yaml
+++ b/multiqc_proteinfold/multiqc_config.yaml
@@ -1,7 +1,3 @@
 sp:
   proteinfold:
     fn: "*_{plddt,msa,ptm,iptm,pae}.tsv"
-
-custom_logo: "path/to/custom/logo.png"
-custom_logo_url: "https://doi.org/10.26190/4KQF-M552"
-custom_logo_title: "Structural Biology Facility - Mark Wainwright Analytical Centre - University of New South Wales"

From d98df4830f57211320e3faf5df02249b7ca80cfa Mon Sep 17 00:00:00 2001
From: "keiran.rowell" <k.rowell@unsw.edu.au>
Date: Mon, 30 Mar 2026 16:01:56 +1100
Subject: [PATCH 16/19] PYTHONPATH with pip install target

---
 modules/nf-core/multiqc/main.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf
index 1d1a9ae0d..f17c8bbbe 100644
--- a/modules/nf-core/multiqc/main.nf
+++ b/modules/nf-core/multiqc/main.nf
@@ -32,8 +32,8 @@ process MULTIQC {
     def replace = replace_names ? "--replace-names ${replace_names}" : ''
     def samples = sample_names ? "--sample-names ${sample_names}" : ''
     """
-    PYTHONUSERBASE="\$PWD/.multiqc_plugins" pip install --user ${workflow.projectDir}
-    multiqc \\
+    pip install --target "$PWD/.multiqc_plugins" ${workflow.projectDir}
+    PYTHONPATH="$PWD/.multiqc_plugins" multiqc \\
         --force \\
         $args \\
         $config \\

From 67ae996f426aaca2774b3477a34fb446668e40eb Mon Sep 17 00:00:00 2001
From: "keiran.rowell" <k.rowell@unsw.edu.au>
Date: Mon, 30 Mar 2026 16:02:59 +1100
Subject: [PATCH 17/19] pip install of the proteinfold multiqc version happens
 in the pipeline main.nf itself

---
 modules/nf-core/multiqc/environment.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml
index 757dd1bc2..d02016a00 100644
--- a/modules/nf-core/multiqc/environment.yml
+++ b/modules/nf-core/multiqc/environment.yml
@@ -5,5 +5,3 @@ channels:
   - bioconda
 dependencies:
   - bioconda::multiqc=1.32
-  - pip
-    - ${projectDir} # Install proteinfold_multiqc as a local plugin

From 2337df18bbf55eda38cdb2b11b2368c6f0e15e74 Mon Sep 17 00:00:00 2001
From: "keiran.rowell" <k.rowell@unsw.edu.au>
Date: Mon, 30 Mar 2026 16:03:24 +1100
Subject: [PATCH 18/19] more robust multiqc install

---
 main.nf                            | 2 +-
 multiqc_proteinfold/proteinfold.py | 2 +-
 setup.py                           | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/main.nf b/main.nf
index 9c72b965a..1d3ea5bbd 100644
--- a/main.nf
+++ b/main.nf
@@ -578,7 +578,7 @@ workflow NFCORE_PROTEINFOLD {
     }
 
     ch_multiqc_config        = channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true).first()
-    ch_multiqc_custom_config = params.multiqc_config ? channel.fromPath( params.multiqc_config ).first()  : channel.fromPath("${projectDir}/multiqc_proteinfold/multiqc_proteinfold_config.yml")
+    ch_multiqc_custom_config = params.multiqc_config ? channel.fromPath( params.multiqc_config ).first()  : channel.fromPath("${projectDir}/multiqc_proteinfold/multiqc_config.yml")
     ch_multiqc_logo          = params.multiqc_logo   ? channel.fromPath( params.multiqc_logo ).first()    : channel.empty()
     ch_multiqc_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
     ch_report_template     = channel.value(file("$projectDir/assets/report_template.html", checkIfExists: true))
diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py
index 90fbc0302..b8693b14c 100644
--- a/multiqc_proteinfold/proteinfold.py
+++ b/multiqc_proteinfold/proteinfold.py
@@ -154,7 +154,7 @@ def general_stats_table(self):
         """
         Put protein structure prediction metrics into a general table for all different Deep Learning methods
         """
-        # Check for empy metrics to drop those columns where not appropriate
+        # Check for empty metrics to drop those columns where not appropriate
 
         has_iptm = any(
             sample_data.get("iptm") and sample_data.get("iptm") != 0.0 for sample_data in self.proteinfold_data.values()
diff --git a/setup.py b/setup.py
index 9cf7ec497..23bf31f9a 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,8 @@
     author_email='k.rowell@unsw.edu.au',
     description='MultiQC plugin for proteinfold pipeline metric tsv outputs',
     url='https://github.com/nf-core/proteinfold',
-    packages={"multiqc_proteinfold": ["*.yaml"]},
+    packages=["multiqc_proteinfold"],
+    package_data={"multiqc_proteinfold": ["*.yaml"]},
     include_package_data=True,
     entry_points={
         'multiqc.modules.v1': [

From 6ccf05952740f80be3307660ce87447ab9dc96cc Mon Sep 17 00:00:00 2001
From: "keiran.rowell" <k.rowell@unsw.edu.au>
Date: Mon, 30 Mar 2026 16:15:17 +1100
Subject: [PATCH 19/19] match import .yml naming while avoiding double multiqc
 collision

---
 main.nf                                                         | 2 +-
 .../{multiqc_config.yaml => multiqc_proteinfold_config.yml}     | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename multiqc_proteinfold/{multiqc_config.yaml => multiqc_proteinfold_config.yml} (100%)

diff --git a/main.nf b/main.nf
index 1d3ea5bbd..9c72b965a 100644
--- a/main.nf
+++ b/main.nf
@@ -578,7 +578,7 @@ workflow NFCORE_PROTEINFOLD {
     }
 
     ch_multiqc_config        = channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true).first()
-    ch_multiqc_custom_config = params.multiqc_config ? channel.fromPath( params.multiqc_config ).first()  : channel.fromPath("${projectDir}/multiqc_proteinfold/multiqc_config.yml")
+    ch_multiqc_custom_config = params.multiqc_config ? channel.fromPath( params.multiqc_config ).first()  : channel.fromPath("${projectDir}/multiqc_proteinfold/multiqc_proteinfold_config.yml")
     ch_multiqc_logo          = params.multiqc_logo   ? channel.fromPath( params.multiqc_logo ).first()    : channel.empty()
     ch_multiqc_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
     ch_report_template     = channel.value(file("$projectDir/assets/report_template.html", checkIfExists: true))
diff --git a/multiqc_proteinfold/multiqc_config.yaml b/multiqc_proteinfold/multiqc_proteinfold_config.yml
similarity index 100%
rename from multiqc_proteinfold/multiqc_config.yaml
rename to multiqc_proteinfold/multiqc_proteinfold_config.yml