From 5f76d5f8999724431ddd7dd5150b960af785547c Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Wed, 28 Jan 2026 13:58:15 +1100 Subject: [PATCH 01/19] Bulk MultiQC reporting as a module --- modules/nf-core/multiqc/environment.yml | 2 + multiqc_proteinfold/__init__.py | 6 + multiqc_proteinfold/multiqc_config.yaml | 7 + multiqc_proteinfold/proteinfold.py | 271 ++++++++++++++++++++++++ setup.py | 28 +++ 5 files changed, 314 insertions(+) create mode 100644 multiqc_proteinfold/__init__.py create mode 100644 multiqc_proteinfold/multiqc_config.yaml create mode 100644 multiqc_proteinfold/proteinfold.py create mode 100644 setup.py diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index d02016a00..757dd1bc2 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -5,3 +5,5 @@ channels: - bioconda dependencies: - bioconda::multiqc=1.32 + - pip + - ${projectDir} # Install proteinfold_multiqc as a local plugin diff --git a/multiqc_proteinfold/__init__.py b/multiqc_proteinfold/__init__.py new file mode 100644 index 000000000..d57194bdf --- /dev/null +++ b/multiqc_proteinfold/__init__.py @@ -0,0 +1,6 @@ +"""MultiQC plugin for proteinfold pipeline outputs converting metrics to simple tsvs.""" + +from .proteinfold import MultiqcModule + +__version__ = '0.1.0' +__all__ = ["MultiqcModule"] diff --git a/multiqc_proteinfold/multiqc_config.yaml b/multiqc_proteinfold/multiqc_config.yaml new file mode 100644 index 000000000..78e16e57b --- /dev/null +++ b/multiqc_proteinfold/multiqc_config.yaml @@ -0,0 +1,7 @@ +sp: + proteinfold: + fn: '*_{plddt,msa,ptm,iptm,pae}.tsv' + +custom_logo: "/srv/scratch/z3374843/MultiQC/multiqc/logo/SBF_logo.png" +custom_logo_url: "https://doi.org/10.26190/4KQF-M552" +custom_logo_title: "Structural Biology Facility - Mark Wainwright Analytical Centre - University of New South Wales" diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py new file mode 100644 index 000000000..c196b5200 --- /dev/null +++ b/multiqc_proteinfold/proteinfold.py @@ -0,0 +1,271 @@ +from pathlib import Path +import pandas as pd + +from multiqc.base_module import BaseMultiqcModule, ModuleNoSamplesFound +from multiqc.plots import bargraph, linegraph +from multiqc import config # I want the ranks to merge by default, not a user problem +from multiqc.plots.table_object import ColumnDict + +from typing import Dict, Any, cast + + +class MultiqcModule(BaseMultiqcModule): + """ + The module parses results generated by a variety of protein structure prediction programs in the [ProteinFold](https://nf-co.re/proteinfold/) pipeline. + This includes (as of release v2.0): + - [AlphaFold2](https://github.com/google-deepmind/alphafold) + - [AlphaFold3](https://github.com/google-deepmind/alphafold3) + - [ColabFold](https://github.com/sokrypton/ColabFold) + - [ESMFold](https://github.com/facebookresearch/esm) + - [RoseTTaFold-All-Atom](https://github.com/baker-laboratory/RoseTTAFold-All-Atom) + - [RoseTTaFold2-Nucleic-Acids](https://github.com/uw-ipd/RoseTTAFold2NA) [Wait on merge] + - [HelixFold3](https://github.com/PaddlePaddle/PaddleHelix/tree/dev/apps/protein_folding/helixfold3) + - [Boltz](https://github.com/jwohlwend/boltz) + + This is intended to provide a summary of useful metrics for mass 'folding' a large set of proteins, either in terms of fishing for mulitmer interactions or comparing methods across whole proteomes. + It provides a visual 'at-a-glance' report of relevant metrics (average pLDDT, ipTM, *etc*) and does not replace the per-protein interactive plot from GENEREATE_REPORT in nfcore/proteinfold + """ + + def __init__(self): + super(MultiqcModule, self).__init__( + name="ProteinFold", + anchor="proteinfold", + href=[ + "https://nf-co.re/proteinfold", + "https://github.com/google-deepmind/alphafold", + "https://github.com/google-deepmind/alphafold3", + "https://github.com/sokrypton/ColabFold", + "https://github.com/facebookresearch/esm", + "https://github.com/baker-laboratory/RoseTTAFold-All-Atom", + "https://github.com/uw-ipd/RoseTTAFold2NA", + "https://github.com/PaddlePaddle/PaddleHelix/tree/dev/apps/protein_folding/helixfold3", + "https://github.com/jwohlwend/boltz", + ], + info="ProteinFold - protein structure inference methods through a single nextflow pipeline interface", + doi=[ + "10.1038/s41586-021-03819-2", + "10.1038/s41592-022-01488-1", + "10.1126/science.ade2574", + "10.1126/science.adl2528", + "10.1038/s41592-023-02086-5", + "10.48550/arXiv.2408.16975", + "10.1101/2024.11.19.624167", + ], + ) + + # Want to treat the ranked inference runs as 'sub-samples' for grouping logic, even if not separate files + if not hasattr(config, "table_sample_merge"): + config.table_sample_merge = {} + + # Some codes generated 5 inferences for 5 models and all 25 are processed. If a user sets more they're an expert and can custom handle + config.table_sample_merge = { + "rank_0": ["_rank_0"], + "rank_1": ["_rank_1"], + "rank_2": ["_rank_2"], + "rank_3": ["_rank_3"], + "rank_4": ["_rank_4"], + "rank_5": ["_rank_5"], + "rank_6": ["_rank_6"], + "rank_7": ["_rank_7"], + "rank_8": ["_rank_8"], + "rank_9": ["_rank_9"], + "rank_10": ["_rank_10"], + "rank_11": ["_rank_11"], + "rank_12": ["_rank_12"], + "rank_13": ["_rank_13"], + "rank_14": ["_rank_14"], + "rank_15": ["_rank_15"], + "rank_16": ["_rank_16"], + "rank_17": ["_rank_17"], + "rank_18": ["_rank_18"], + "rank_19": ["_rank_19"], + "rank_20": ["_rank_20"], + "rank_21": ["_rank_21"], + "ranmodek_22": ["_rank_22"], + "rank_23": ["_rank_23"], + "rank_24": ["_rank_24"], + } + + mode_dict = { + "alphafold2": "AlphaFold2", + "alphafold3": "AlphaFold3", + "colabfold": "ColabFold", + "esmfold": "ESMFold", + "rosettafold2na": "RoseTTAFold2-Nucleic-Acids", + "rosettafold_all_atom": "RoseTTAFold-All-Atom", + "helixfold3": "HelixFold3", + "boltz": "Boltz", + } + + self.proteinfold_data: Dict[str, Any] = {} + # I want to enable sample grouping: https://docs.seqera.io/multiqc/reports/customisation#sample-grouping + + for f in self.find_log_files("proteinfold"): + self.add_data_source(f) + + raw_samplename = f["s_name"].split("_")[0] + filepath = Path(f["root"]) / f["fn"] + mode = "UNKNOWN" + for parent in filepath.parents: + if parent.name in mode_dict: # traverse up the filepath until you hit a mode labelled dir + mode = mode_dict[parent.name] + break + + mode_samplename = f"{raw_samplename}_{mode}" + samplename = self.clean_s_name(mode_samplename, f) + print(filepath) + print(samplename) + + self.proteinfold_data.setdefault(samplename, {}) # Set default creates if doesn't already exist + + if f["fn"].endswith("_msa.tsv"): + self.proteinfold_data[samplename]["msa_depth"] = f["f"].count("\n") + + if f["fn"].endswith("_plddt.tsv"): + df = pd.read_csv(filepath, sep="\t") + rank_cols = [col for col in df.columns if col.startswith("rank_")] + + # Full plddt data frame for plotting purposes + plddt_data = {col: df.set_index("Positions")[col].to_dict() for col in rank_cols} + self.proteinfold_data[samplename]["plddt"] = plddt_data + + rank_means = df[rank_cols].mean().to_dict() + # Parent sample entry should still have the top ranked value + self.proteinfold_data[samplename]["mean_plddt"] = rank_means.get("rank_0") + + for rank in rank_cols: + rank_num = rank.split("rank_")[1] + subsample = f"{samplename}_rank_{rank_num}" + + self.proteinfold_data.setdefault(subsample, {}) + self.proteinfold_data[subsample]["mean_plddt"] = rank_means[rank] + + if f["fn"].endswith("_iptm.tsv") and not f["fn"].endswith("_chainwise_iptm.tsv"): + iptm_series = cast( + pd.Series, pd.read_csv(filepath, sep="\t", header=None, index_col=0).squeeze(axis=1) + ) # Squeeze makes a series accessible on index label + if ( + 0 in iptm_series.index + ): # Since pandas infers the index as an int this is an exact int match not a greedy string match + self.proteinfold_data[samplename]["iptm"] = iptm_series.loc[ + 0 + ] # Remember loc is an *int* match on rank 0 index + + for rank_num in iptm_series.index: + subsample = f"{samplename}_rank_{rank_num}" + self.proteinfold_data.setdefault(subsample, {})["iptm"] = iptm_series.loc[rank_num] + + if f["fn"].endswith("_ptm.tsv") and not f["fn"].endswith("_chainwise_ptm.tsv"): + ptm_series = cast( + pd.Series, pd.read_csv(filepath, sep="\t", header=None, index_col=0).squeeze(axis=1) + ) # Squeeze on axis avoids int conversion for single entries + if 0 in ptm_series.index: + self.proteinfold_data[samplename]["ptm"] = ptm_series.loc[0] + + for rank_num in ptm_series.index: + subsample = f"{samplename}_rank_{rank_num}" + self.proteinfold_data.setdefault(subsample, {})["ptm"] = ptm_series.loc[rank_num] + + self.write_data_file( + self.proteinfold_data, "proteinfold_data" + ) # I want to structure and rename from avg_plDDT to summary_stats + self.general_stats_table() + # Togglable plDDT by residue plots of all ranks + self.plddt_line_plot() + + def general_stats_table(self): + """ + Put protein structure prediction metrics into a general table for all different Deep Learning methods + """ + # Check for empy metrics to drop those columns where not appropriate + + has_iptm = any( + sample_data.get("iptm") and sample_data.get("iptm") != 0.0 for sample_data in self.proteinfold_data.values() + ) + has_ptm = any( + sample_data.get("ptm") and sample_data.get("ptm") != 0.0 for sample_data in self.proteinfold_data.values() + ) + + headers: Dict[str, ColumnDict] = { + "msa_depth": { + "title": "Related sequence depth (MSA)", + "description": "The number of related sequences (across the whole protein) that could be retrieved from the MSA (Multiple Sequence Alignment) stage", + }, + "mean_plddt": { + "title": "Structure confidence (average pLDDT)", + "description": "Structure prediction confidence score across all residues in the top ranked protein structure - from the mean pLDDT (predicted Local Distance Difference Test) value", + "max": 100, + "min": 0, + "cond_formatting_rules": { + "very-low": [{"lt": 50}], + "low": [{"gt": 50}, {"lt": 70}], + "high": [{"gt": 70}, {"lt": 90}], + "very-high": [{"gt": 90}], + }, + "cond_formatting_colours": [ + {"very-low": "#f0743e"}, + {"low": "#f9d613"}, + {"high": "#60c2e8"}, + {"very-high": "#014ecc"}, + ], + }, + } + + if has_iptm: + headers["iptm"] = { + "title": "Interface accuracy (ipTM)", + "description": "Accuracy of the relative positions of two protein subunits from a mulitmer calcuation - from the ipTM (interface predicted Template Modelling) score", + "max": 1, + "min": 0, + "format": "{:,.2f}", + "scale": "Purples", + } + + if has_ptm: + headers["ptm"] = { + "title": "Global accuracy (TM)", + "description": "Global accuracy of the protein folded, less sensitive to localised inaccuracies than raw 3D atomic deviations (RMSD) - from the pTM (predicted Template Modelling) score", + "max": 1, + "min": 0, + "format": "{:,.2f}", + "scale": "Blues", + } + + self.general_stats_addcols(self.proteinfold_data, headers) + + def plddt_line_plot(self): + """Line plot showing pLDDT confidence across residue position of selected sample, for all ranks""" + + parent_samples = {} + + for sample, metrics in self.proteinfold_data.items(): + if "plddt" in metrics: + if "_rank_" not in sample: # The parent sample already has the plddt data for all ranks + parent_samples[sample] = metrics["plddt"] + + data_labels = [] # Need a data_labels list for the sample switcher + plot_data_list = [] + + # The populated data_labels plot config section is what the switcher uses + for parent_sample, rank_data in parent_samples.items(): + data_labels.append({"name": parent_sample, "ylab": "pLDDT score"}) + plot_data_list.append(rank_data) + + pconfig = { + "id": "proteinfold_plddt_lineplot", + "title": "ProteinFold: pLDDT by Position", + "xlab": "Residue Position", + "ylab": "pLDDT Score", + "ymin": 0, + "ymax": 100, + "data_labels": data_labels, + } + + plot_html = linegraph.plot(plot_data_list, pconfig) + + self.add_section( + name="pLDDT Confidence by residue", + anchor="proteinfold-plddt-per-res", + description="Per-residue confidence scores across all predicted ranks", + plot=plot_html, + ) diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..c0402302f --- /dev/null +++ b/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages + +setup( + name='multiqc-proteinfold', + version='0.1.0', + author='Keiran Rowell', + author_email='k.rowell@unsw.edu.au', + description='MultiQC plugin for proteinfold pipeline metric tsv outputs', + url='https://github.com/nf-core/proteinfold', + packages=find_packages(), + include_package_data=True, + entry_points={ + 'multiqc.modules.v1': [ + 'proteinfold = multiqc_proteinfold.proteinfold:MultiqcModule', + ], + }, + install_requires=[ + 'multiqc>=1.15', + 'pandas', + ], + classifiers=[ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 3', + ], + python_requires='>=3.8', +) From 74337a52ab3016891a8337ca909abe16a7a72aeb Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Wed, 28 Jan 2026 14:09:41 +1100 Subject: [PATCH 02/19] Line which install multiqc from setup.py module --- modules/nf-core/multiqc/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 8f2086b4f..d2862ffc1 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -32,6 +32,7 @@ process MULTIQC { def replace = replace_names ? "--replace-names ${replace_names}" : '' def samples = sample_names ? "--sample-names ${sample_names}" : '' """ + pip install ${workflow.projectDir} multiqc \\ --force \\ $args \\ From 8d3bf30b4a08d7e6609b983b84f0e5bd6e1ddee0 Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Wed, 28 Jan 2026 14:13:48 +1100 Subject: [PATCH 03/19] Make prettier happy... --- multiqc_proteinfold/multiqc_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multiqc_proteinfold/multiqc_config.yaml b/multiqc_proteinfold/multiqc_config.yaml index 78e16e57b..fd662497b 100644 --- a/multiqc_proteinfold/multiqc_config.yaml +++ b/multiqc_proteinfold/multiqc_config.yaml @@ -1,6 +1,6 @@ sp: proteinfold: - fn: '*_{plddt,msa,ptm,iptm,pae}.tsv' + fn: "*_{plddt,msa,ptm,iptm,pae}.tsv" custom_logo: "/srv/scratch/z3374843/MultiQC/multiqc/logo/SBF_logo.png" custom_logo_url: "https://doi.org/10.26190/4KQF-M552" From 652b877970e27ae5ce49fd433ef8991889e24900 Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Wed, 28 Jan 2026 16:21:32 +1100 Subject: [PATCH 04/19] Add the multiqc proteinfold extra config channel --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 9c95ae0f1..9c72b965a 100644 --- a/main.nf +++ b/main.nf @@ -578,7 +578,7 @@ workflow NFCORE_PROTEINFOLD { } ch_multiqc_config = channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true).first() - ch_multiqc_custom_config = params.multiqc_config ? channel.fromPath( params.multiqc_config ).first() : channel.empty() + ch_multiqc_custom_config = params.multiqc_config ? channel.fromPath( params.multiqc_config ).first() : channel.fromPath("${projectDir}/multiqc_proteinfold/multiqc_proteinfold_config.yml") ch_multiqc_logo = params.multiqc_logo ? channel.fromPath( params.multiqc_logo ).first() : channel.empty() ch_multiqc_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) ch_report_template = channel.value(file("$projectDir/assets/report_template.html", checkIfExists: true)) From 429ee78fe6fe8d9940fba2e90ffd4002081c24f2 Mon Sep 17 00:00:00 2001 From: Keiran Rowell <54380465+keiran-rowell-unsw@users.noreply.github.com> Date: Mon, 30 Mar 2026 15:08:23 +1100 Subject: [PATCH 05/19] rank typo Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- multiqc_proteinfold/proteinfold.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py index c196b5200..598529c22 100644 --- a/multiqc_proteinfold/proteinfold.py +++ b/multiqc_proteinfold/proteinfold.py @@ -81,7 +81,7 @@ def __init__(self): "rank_19": ["_rank_19"], "rank_20": ["_rank_20"], "rank_21": ["_rank_21"], - "ranmodek_22": ["_rank_22"], + "rank_22": ["_rank_22"], "rank_23": ["_rank_23"], "rank_24": ["_rank_24"], } From b98d5c76dee85f7d654774692f8e27ae668fb6a8 Mon Sep 17 00:00:00 2001 From: Keiran Rowell <54380465+keiran-rowell-unsw@users.noreply.github.com> Date: Mon, 30 Mar 2026 15:08:48 +1100 Subject: [PATCH 06/19] use logger not print Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- multiqc_proteinfold/proteinfold.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py index 598529c22..e5026500a 100644 --- a/multiqc_proteinfold/proteinfold.py +++ b/multiqc_proteinfold/proteinfold.py @@ -113,8 +113,8 @@ def __init__(self): mode_samplename = f"{raw_samplename}_{mode}" samplename = self.clean_s_name(mode_samplename, f) - print(filepath) - print(samplename) + self.log.debug(filepath) + self.log.debug(samplename) self.proteinfold_data.setdefault(samplename, {}) # Set default creates if doesn't already exist From 3e2c9946329c69c0871915bbbaabeeb274589452 Mon Sep 17 00:00:00 2001 From: Keiran Rowell <54380465+keiran-rowell-unsw@users.noreply.github.com> Date: Mon, 30 Mar 2026 15:09:43 +1100 Subject: [PATCH 07/19] remove unused multiqc modules Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- multiqc_proteinfold/proteinfold.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py index e5026500a..1e2eb3cd3 100644 --- a/multiqc_proteinfold/proteinfold.py +++ b/multiqc_proteinfold/proteinfold.py @@ -1,8 +1,8 @@ from pathlib import Path import pandas as pd -from multiqc.base_module import BaseMultiqcModule, ModuleNoSamplesFound -from multiqc.plots import bargraph, linegraph +from multiqc.base_module import BaseMultiqcModule +from multiqc.plots import linegraph from multiqc import config # I want the ranks to merge by default, not a user problem from multiqc.plots.table_object import ColumnDict From 9cc19118fa78bb835fd8009e699e925c8e1cde6c Mon Sep 17 00:00:00 2001 From: Keiran Rowell <54380465+keiran-rowell-unsw@users.noreply.github.com> Date: Mon, 30 Mar 2026 15:10:30 +1100 Subject: [PATCH 08/19] typos Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- multiqc_proteinfold/proteinfold.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py index 1e2eb3cd3..48f61926b 100644 --- a/multiqc_proteinfold/proteinfold.py +++ b/multiqc_proteinfold/proteinfold.py @@ -22,8 +22,8 @@ class MultiqcModule(BaseMultiqcModule): - [HelixFold3](https://github.com/PaddlePaddle/PaddleHelix/tree/dev/apps/protein_folding/helixfold3) - [Boltz](https://github.com/jwohlwend/boltz) - This is intended to provide a summary of useful metrics for mass 'folding' a large set of proteins, either in terms of fishing for mulitmer interactions or comparing methods across whole proteomes. - It provides a visual 'at-a-glance' report of relevant metrics (average pLDDT, ipTM, *etc*) and does not replace the per-protein interactive plot from GENEREATE_REPORT in nfcore/proteinfold + This is intended to provide a summary of useful metrics for mass 'folding' a large set of proteins, either in terms of fishing for multimer interactions or comparing methods across whole proteomes. + It provides a visual 'at-a-glance' report of relevant metrics (average pLDDT, ipTM, *etc*) and does not replace the per-protein interactive plot from GENERATE_REPORT in nfcore/proteinfold """ def __init__(self): From 07d32a54b64d83870e10dd06700dcb1bd05f399f Mon Sep 17 00:00:00 2001 From: Keiran Rowell <54380465+keiran-rowell-unsw@users.noreply.github.com> Date: Mon, 30 Mar 2026 15:11:36 +1100 Subject: [PATCH 09/19] --user in pip install for containers in docker etc Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- modules/nf-core/multiqc/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index d2862ffc1..1d1a9ae0d 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -32,7 +32,7 @@ process MULTIQC { def replace = replace_names ? "--replace-names ${replace_names}" : '' def samples = sample_names ? "--sample-names ${sample_names}" : '' """ - pip install ${workflow.projectDir} + PYTHONUSERBASE="\$PWD/.multiqc_plugins" pip install --user ${workflow.projectDir} multiqc \\ --force \\ $args \\ From 5e31fd979a98866ce921b03430d0c55d3f20f70f Mon Sep 17 00:00:00 2001 From: Keiran Rowell <54380465+keiran-rowell-unsw@users.noreply.github.com> Date: Mon, 30 Mar 2026 15:11:58 +1100 Subject: [PATCH 10/19] Remove facility logo for codebase PR Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- multiqc_proteinfold/multiqc_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multiqc_proteinfold/multiqc_config.yaml b/multiqc_proteinfold/multiqc_config.yaml index fd662497b..a3d6bde1c 100644 --- a/multiqc_proteinfold/multiqc_config.yaml +++ b/multiqc_proteinfold/multiqc_config.yaml @@ -2,6 +2,6 @@ sp: proteinfold: fn: "*_{plddt,msa,ptm,iptm,pae}.tsv" -custom_logo: "/srv/scratch/z3374843/MultiQC/multiqc/logo/SBF_logo.png" +custom_logo: "path/to/custom/logo.png" custom_logo_url: "https://doi.org/10.26190/4KQF-M552" custom_logo_title: "Structural Biology Facility - Mark Wainwright Analytical Centre - University of New South Wales" From e9fdc14f72e8589cdf8f73c4bd452b773ddd19fe Mon Sep 17 00:00:00 2001 From: Keiran Rowell <54380465+keiran-rowell-unsw@users.noreply.github.com> Date: Mon, 30 Mar 2026 15:13:09 +1100 Subject: [PATCH 11/19] typos Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- multiqc_proteinfold/proteinfold.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py index 48f61926b..40aeba041 100644 --- a/multiqc_proteinfold/proteinfold.py +++ b/multiqc_proteinfold/proteinfold.py @@ -214,7 +214,7 @@ def general_stats_table(self): if has_iptm: headers["iptm"] = { "title": "Interface accuracy (ipTM)", - "description": "Accuracy of the relative positions of two protein subunits from a mulitmer calcuation - from the ipTM (interface predicted Template Modelling) score", + "description": "Accuracy of the relative positions of two protein subunits from a multimer calculation - from the ipTM (interface predicted Template Modelling) score", "max": 1, "min": 0, "format": "{:,.2f}", From dc729086ac10db9e8be60914635e7421020993a6 Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Mon, 30 Mar 2026 15:30:11 +1100 Subject: [PATCH 12/19] makesure user can specify sample merge rules --- multiqc_proteinfold/proteinfold.py | 35 +++++------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py index 40aeba041..94b2b7caa 100644 --- a/multiqc_proteinfold/proteinfold.py +++ b/multiqc_proteinfold/proteinfold.py @@ -58,34 +58,11 @@ def __init__(self): config.table_sample_merge = {} # Some codes generated 5 inferences for 5 models and all 25 are processed. If a user sets more they're an expert and can custom handle - config.table_sample_merge = { - "rank_0": ["_rank_0"], - "rank_1": ["_rank_1"], - "rank_2": ["_rank_2"], - "rank_3": ["_rank_3"], - "rank_4": ["_rank_4"], - "rank_5": ["_rank_5"], - "rank_6": ["_rank_6"], - "rank_7": ["_rank_7"], - "rank_8": ["_rank_8"], - "rank_9": ["_rank_9"], - "rank_10": ["_rank_10"], - "rank_11": ["_rank_11"], - "rank_12": ["_rank_12"], - "rank_13": ["_rank_13"], - "rank_14": ["_rank_14"], - "rank_15": ["_rank_15"], - "rank_16": ["_rank_16"], - "rank_17": ["_rank_17"], - "rank_18": ["_rank_18"], - "rank_19": ["_rank_19"], - "rank_20": ["_rank_20"], - "rank_21": ["_rank_21"], - "rank_22": ["_rank_22"], - "rank_23": ["_rank_23"], - "rank_24": ["_rank_24"], - } - + rank_merge = {f"rank_{i}": [f"_rank_{i}"] for i in range(25)} + + # Load user sample merge config in preference to rank_N default ranking, if user config exists + config.table_sample_merge = {**rank_merge, **getattr(config, "table_sample_merge", {})} + mode_dict = { "alphafold2": "AlphaFold2", "alphafold3": "AlphaFold3", @@ -103,7 +80,7 @@ def __init__(self): for f in self.find_log_files("proteinfold"): self.add_data_source(f) - raw_samplename = f["s_name"].split("_")[0] + raw_samplename = f["s_name"].split("rank_")[0] filepath = Path(f["root"]) / f["fn"] mode = "UNKNOWN" for parent in filepath.parents: From a9a894e31b22ff47c3fefbd953f07ee2eb2e6f96 Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Mon, 30 Mar 2026 15:30:50 +1100 Subject: [PATCH 13/19] Find multiqc yml packages, pip install -e was doing this for me silently --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c0402302f..9cf7ec497 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ author_email='k.rowell@unsw.edu.au', description='MultiQC plugin for proteinfold pipeline metric tsv outputs', url='https://github.com/nf-core/proteinfold', - packages=find_packages(), + packages={"multiqc_proteinfold": ["*.yaml"]}, include_package_data=True, entry_points={ 'multiqc.modules.v1': [ From d96418e332ad117f3b750ac37aedda91b77ba6fe Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Mon, 30 Mar 2026 15:33:15 +1100 Subject: [PATCH 14/19] trailling whitespace........... --- multiqc_proteinfold/proteinfold.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py index 94b2b7caa..90fbc0302 100644 --- a/multiqc_proteinfold/proteinfold.py +++ b/multiqc_proteinfold/proteinfold.py @@ -59,10 +59,10 @@ def __init__(self): # Some codes generated 5 inferences for 5 models and all 25 are processed. If a user sets more they're an expert and can custom handle rank_merge = {f"rank_{i}": [f"_rank_{i}"] for i in range(25)} - + # Load user sample merge config in preference to rank_N default ranking, if user config exists config.table_sample_merge = {**rank_merge, **getattr(config, "table_sample_merge", {})} - + mode_dict = { "alphafold2": "AlphaFold2", "alphafold3": "AlphaFold3", From cbb27d2854b419e4a273c1628ed0d01d1e2694c8 Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Mon, 30 Mar 2026 15:39:02 +1100 Subject: [PATCH 15/19] remove SBF logo and custom links --- multiqc_proteinfold/multiqc_config.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/multiqc_proteinfold/multiqc_config.yaml b/multiqc_proteinfold/multiqc_config.yaml index a3d6bde1c..8324880ec 100644 --- a/multiqc_proteinfold/multiqc_config.yaml +++ b/multiqc_proteinfold/multiqc_config.yaml @@ -1,7 +1,3 @@ sp: proteinfold: fn: "*_{plddt,msa,ptm,iptm,pae}.tsv" - -custom_logo: "path/to/custom/logo.png" -custom_logo_url: "https://doi.org/10.26190/4KQF-M552" -custom_logo_title: "Structural Biology Facility - Mark Wainwright Analytical Centre - University of New South Wales" From d98df4830f57211320e3faf5df02249b7ca80cfa Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Mon, 30 Mar 2026 16:01:56 +1100 Subject: [PATCH 16/19] PYTHONPATH with pip install target --- modules/nf-core/multiqc/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1d1a9ae0d..f17c8bbbe 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -32,8 +32,8 @@ process MULTIQC { def replace = replace_names ? "--replace-names ${replace_names}" : '' def samples = sample_names ? "--sample-names ${sample_names}" : '' """ - PYTHONUSERBASE="\$PWD/.multiqc_plugins" pip install --user ${workflow.projectDir} - multiqc \\ + pip install --target "$PWD/.multiqc_plugins" ${workflow.projectDir} + PYTHONPATH="$PWD/.multiqc_plugins" multiqc \\ --force \\ $args \\ $config \\ From 67ae996f426aaca2774b3477a34fb446668e40eb Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Mon, 30 Mar 2026 16:02:59 +1100 Subject: [PATCH 17/19] pip install of the proteinfold multiqc version happens in the pipeline main.nf itself --- modules/nf-core/multiqc/environment.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index 757dd1bc2..d02016a00 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -5,5 +5,3 @@ channels: - bioconda dependencies: - bioconda::multiqc=1.32 - - pip - - ${projectDir} # Install proteinfold_multiqc as a local plugin From 2337df18bbf55eda38cdb2b11b2368c6f0e15e74 Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Mon, 30 Mar 2026 16:03:24 +1100 Subject: [PATCH 18/19] more robust multiqc install --- main.nf | 2 +- multiqc_proteinfold/proteinfold.py | 2 +- setup.py | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/main.nf b/main.nf index 9c72b965a..1d3ea5bbd 100644 --- a/main.nf +++ b/main.nf @@ -578,7 +578,7 @@ workflow NFCORE_PROTEINFOLD { } ch_multiqc_config = channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true).first() - ch_multiqc_custom_config = params.multiqc_config ? channel.fromPath( params.multiqc_config ).first() : channel.fromPath("${projectDir}/multiqc_proteinfold/multiqc_proteinfold_config.yml") + ch_multiqc_custom_config = params.multiqc_config ? channel.fromPath( params.multiqc_config ).first() : channel.fromPath("${projectDir}/multiqc_proteinfold/multiqc_config.yml") ch_multiqc_logo = params.multiqc_logo ? channel.fromPath( params.multiqc_logo ).first() : channel.empty() ch_multiqc_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) ch_report_template = channel.value(file("$projectDir/assets/report_template.html", checkIfExists: true)) diff --git a/multiqc_proteinfold/proteinfold.py b/multiqc_proteinfold/proteinfold.py index 90fbc0302..b8693b14c 100644 --- a/multiqc_proteinfold/proteinfold.py +++ b/multiqc_proteinfold/proteinfold.py @@ -154,7 +154,7 @@ def general_stats_table(self): """ Put protein structure prediction metrics into a general table for all different Deep Learning methods """ - # Check for empy metrics to drop those columns where not appropriate + # Check for empty metrics to drop those columns where not appropriate has_iptm = any( sample_data.get("iptm") and sample_data.get("iptm") != 0.0 for sample_data in self.proteinfold_data.values() diff --git a/setup.py b/setup.py index 9cf7ec497..23bf31f9a 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,8 @@ author_email='k.rowell@unsw.edu.au', description='MultiQC plugin for proteinfold pipeline metric tsv outputs', url='https://github.com/nf-core/proteinfold', - packages={"multiqc_proteinfold": ["*.yaml"]}, + packages=["multiqc_proteinfold"], + package_data={"multiqc_proteinfold": ["*.yaml"]}, include_package_data=True, entry_points={ 'multiqc.modules.v1': [ From 6ccf05952740f80be3307660ce87447ab9dc96cc Mon Sep 17 00:00:00 2001 From: "keiran.rowell" Date: Mon, 30 Mar 2026 16:15:17 +1100 Subject: [PATCH 19/19] match import .yml naming while avoiding double multiqc collision --- main.nf | 2 +- .../{multiqc_config.yaml => multiqc_proteinfold_config.yml} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename multiqc_proteinfold/{multiqc_config.yaml => multiqc_proteinfold_config.yml} (100%) diff --git a/main.nf b/main.nf index 1d3ea5bbd..9c72b965a 100644 --- a/main.nf +++ b/main.nf @@ -578,7 +578,7 @@ workflow NFCORE_PROTEINFOLD { } ch_multiqc_config = channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true).first() - ch_multiqc_custom_config = params.multiqc_config ? channel.fromPath( params.multiqc_config ).first() : channel.fromPath("${projectDir}/multiqc_proteinfold/multiqc_config.yml") + ch_multiqc_custom_config = params.multiqc_config ? channel.fromPath( params.multiqc_config ).first() : channel.fromPath("${projectDir}/multiqc_proteinfold/multiqc_proteinfold_config.yml") ch_multiqc_logo = params.multiqc_logo ? channel.fromPath( params.multiqc_logo ).first() : channel.empty() ch_multiqc_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) ch_report_template = channel.value(file("$projectDir/assets/report_template.html", checkIfExists: true)) diff --git a/multiqc_proteinfold/multiqc_config.yaml b/multiqc_proteinfold/multiqc_proteinfold_config.yml similarity index 100% rename from multiqc_proteinfold/multiqc_config.yaml rename to multiqc_proteinfold/multiqc_proteinfold_config.yml